一、行转列的使用
1、问题
hive如何将
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
变为:
a b 1,2,3
c d 4,5,6
2、数据
test.txt
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
3、答案
1.建表
drop table tmp_bsf_test;
create table tmp_bsf_test
(
col1 string,
col2 string,
col3 string
)
row format delimited fields terminated by ' '
stored as textfile;
load data local inpath '/home/jiangzl/shell/test.txt' into table tmp_bsf_test;
2.处理
select col1,col2,concat_ws(',',collect_set(col3))
from tmp_bsf_test
group by col1,col2;
二、列转行
1、问题
hive如何将
a b 1,2,3
c d 4,5,6
变为:
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
2、答案
1.建表
drop table tmp_bsf_test;
create table tmp_bsf_test
(
col1 string,
col2 string,
col3 string
)
row format delimited fields terminated by ' '
stored as textfile;
处理:
select col1, col2, col5
from tmp_bsf_test a
lateral view explode(split(col3,',')) b AS col5
应用实例:哪些券更容易组合在一起使用
drop table if exists tmp.user_idnumber;
create table tmp.user_idnumber
select
a.uid,
a.city_name,
a.order_number,
a.id_number,
b.name,
b.money,
b.pay_min,
b.valid_days,
b.description,
b.tag_name,
c.rn
from
(select uid,order_number,id_number,city_name from dwd.trd_sensors_order_product
where pdate='2021-07-27'
and status=2
and id_number is not null
group by uid,order_number,id_number,city_name
)a
left join
(
select id_number,name,money,pay_min,valid_days,description,tag_name from dim.coupon_ticket
where snapshot='2021-07-27'
group by id_number,name,money,pay_min,valid_days,description,tag_name
)b on a.id_number=b.id_number
left join
tmp.user_order_paixu c on a.order_number=c.order_number
group by a.uid,
a.city_name,
a.order_number,
a.id_number,
b.name,
b.money,
b.pay_min,
b.valid_days,
b.description,
b.tag_name,
c.rn;
select city_name,
uid,
order_number,
rn,
concat_ws(',',collect_set(tag_name)) tag_lianhe
from
tmp.user_idnumber
group by city_name,uid,order_number,rn
select city_name,
rn,
tag_lianhe,
count(distinct uid) uid_num
from
(
select city_name,
uid,
order_number,
rn,
concat_ws(',',collect_set(tag_name)) tag_lianhe
from
tmp.user_idnumber
group by city_name,uid,order_number,rn
)tt
group by city_name,
rn,
tag_lianhe
limit 1000000;