Hive 파일타입/압축 지정하여 테이블 생성하기

2015. 9. 22. 20:08

1. text (\t 으로 구분, /data/text 폴더에 저장)

create table tb_text (

ymd String,

tag String,

cnt int

)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY '\t'

STORED AS TEXTFILE

LOCATION '/data/text'

;

테이블 생성한뒤 hive에서 아래와 같이 실행하면 insert할때 압축한다.

SET hive.exec.compress.output=true;

SET mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;      -- gzip

SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;             -- snappy

2. parquet

create table tb_text (

ymd String,

tag String,

cnt int

)

ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'

STORED AS
INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'

LOCATION '/data/text'

;

압축은 hive 인터프리터에서 셋중에 하나 실행하면 설정이 된다.

set parquet.compression=UNCOMPRESSED;

set parquet.compression=GZIP;
set parquet.compression=SNAPPY;

3. rcfile

create table tb_text (

ymd String,

tag String,

cnt int

)

STORED AS RCFILE

LOCATION '/data/text'

;

압축은 이렇게

SET hive.exec.compress.output=true;

SET mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;      -- gzip

SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;             -- snappy

4. sequence

create table tb_text (

ymd String,

tag String,

cnt int

)

STORED AS SEQUENCEFILE

LOCATION '/data/text'

;

압축은 이렇게

SET hive.exec.compress.output=true;

SET mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;      -- gzip

SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;             -- snappy

5. orc

create table tb_text (

ymd String,

tag String,

cnt int

)

stored as orc

LOCATION '/data/text'

tblproperties ("orc.compress"="NONE")

;

압축은 위에서 아래값으로 대체하면됨.

tblproperties ("orc.compress"="ZLIB")

tblproperties ("orc.compress"="SNAPPY")

저작자표시 비영리 변경금지

정선생의 블로그

Hive 파일타입/압축 지정하여 테이블 생성하기

+ Recent posts

티스토리툴바