1. text (\t 으로 구분, /data/text 폴더에 저장)

create table tb_text  (

      ymd            String,

      tag            String,

      cnt            int

 ROW FORMAT DELIMITED

 FIELDS TERMINATED BY '\t'

 STORED AS TEXTFILE

 LOCATION '/data/text' 

;


테이블 생성한뒤 hive에서 아래와 같이 실행하면 insert할때 압축한다. 


SET hive.exec.compress.output=true;


SET mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;      -- gzip
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;             -- snappy



2. parquet

create table tb_text  (

      ymd            String,

      tag            String,

      cnt            int

ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'

STORED AS
INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'

 LOCATION '/data/text' 

;


압축은 hive 인터프리터에서 셋중에 하나 실행하면 설정이 된다.

set parquet.compression=UNCOMPRESSED;

set parquet.compression=GZIP;

set parquet.compression=SNAPPY;

3. rcfile

create table tb_text  (

      ymd            String,

      tag            String,

      cnt            int

STORED AS RCFILE

LOCATION '/data/text' 

;


압축은 이렇게


SET hive.exec.compress.output=true;


SET mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;      -- gzip
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;             -- snappy




4. sequence

create table tb_text  (

      ymd            String,

      tag            String,

      cnt            int

STORED AS SEQUENCEFILE

LOCATION '/data/text' 

;


압축은 이렇게


SET hive.exec.compress.output=true;


SET mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;      -- gzip
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;             -- snappy




5. orc

create table tb_text  (

      ymd            String,

      tag            String,

      cnt            int

stored as orc 

LOCATION '/data/text' 

tblproperties ("orc.compress"="NONE") 

;


압축은 위에서 아래값으로 대체하면됨.

tblproperties ("orc.compress"="ZLIB") 


tblproperties ("orc.compress"="SNAPPY")


+ Recent posts