表数据:https://pan.baidu.com/s/1giOajxNUaU_Lc4jzTL2UnA 提取码:cba4
hive练习题
建表:1、找出顾客最常用的信用卡2、找出客户资料中排名前五的的职位名称3、在美国女性最常用的信用卡4、按性别和国家进行客户统计/国家/工作/邮箱/语言/信用卡5、计算每月总收入6、计算每个季度的总收入7、按年计算总收入8、按工作日计算总收入9、按时间段计算总收入10、按时间段计算平均消费11、按工作日计算平均消费12、计算年、月、日的交易总数13、找出交易量最大的10个客户14、找出消费最多的前10位顾客15、统计该期间交易数量最少的用户16、计算每个季度的独立客户总数17、计算每周的独立客户总数18、计算整个活动客户平均花费的最大值19、统计每月花费最多的客户20、统计每月访问次数最多的客户21、按总价找出最受欢迎的5种产品22、根据购买频率找出最畅销的5种产品23、根据客户数量找出最受欢迎的5种产品24、按客流量找出最受欢迎的商店25、根据顾客消费价格找出最受欢迎的商店26、根据顾客交易情况找出最受欢迎的商店27、根据商店和唯一的顾客id获取最受欢迎的产品28、获取每个商店的员工与顾客比29、按年和月计算每家店的收入30、按店铺制作总收益饼图31、找出每个商店最繁忙的时间段32、找出每家店的忠实顾客33、根据每位员工的最高收入找出明星商店34、在ext_store_review中找出存在冲突的交易映射关系35、了解客户评价的覆盖率36、根据评分了解客户的分布情况37、根据交易了解客户的分布情况38、客户给出的最佳评价是否总是同一家门店
建表:
store_details表
create external
table store_details
(
store_id
INT,
store_name STRING
,
employee_number
INT
)
row format delimited
fields terminated by ','
tblproperties
("skip.header.line.count"="1");
load data local inpath
'/root/kb08/hive/store/store_details.csv' into table store_details
;
store_review表
create external
table store_review
(
transaction_id
BIGINT,
store_id
INT,
review_score
INT
)
row format delimited
fields terminated by ','
tblproperties
("skip.header.line.count"="1");
load data local inpath
'/root/kb08/hive/store/store_review.csv' into table store_review
;
transaction_details表
create external
table transaction_details
(
transaction_id
BIGINT,
customer_id
BIGINT,
store_id
INT,
price
DOUBLE,
product STRING
,
date STRING
,
time STRING
)
row format delimited
fields terminated by ','
tblproperties
("skip.header.line.count"="1");
load data local inpath
'/root/kb08/hive/store/transaction_details.csv' into table transaction_details
;
customer_datails表
create table if not exists customer_datails
(
customer_id
INT,
first_name STRING
,
last_name STRING
,
email STRING
,
gender STRING
,
address STRING
,
country string
,
language STRING
,
job STRING
,
credit_type STRING
,
credit_type STRING
,
credit_no STRING
)
row format delimited
fields terminated by ','
lines terminated by '\n'
tblproperties
("skip.header.line.count"="1");
load data local inpath
'/root/kb08/hive/store/customer_datails.csv' into table customer_datails
;
练习题:
1、找出顾客最常用的信用卡
select credit_type
from (
select credit_type
,dense_rank
() over(order by num
DESC) rnk
from (
select credit_type
,count(1) num
from customer_details
group by credit_type
order by num
DESC
)T
)T
where rnk
=1
2、找出客户资料中排名前五的的职位名称
select job
from(
select job
,dense_rank
() over(order by num
DESC) rnk
from(
select job
,count(1) num
from customer_datails
group by job
order by num
DESC
)T
)T
where rnk
<=5
3、在美国女性最常用的信用卡
select credit_type
from(
select credit_type
,dense_rank
() over(order by num
DESC) rnk
from (
select credit_type
,count(1) num
from customer_datails
where country
='United States' and gender
='Female'
group by credit_type
order by num
DESC
)T
)T
where rnk
=1
4、按性别和国家进行客户统计/国家/工作/邮箱/语言/信用卡
select country
,gender
,concat_ws
(',',collect_set
(dn
)) hdns
from(
select country
,gender
,dn
from(
select country
,gender
,dn
,enum,rank
() over(partition by country
,gender
) rnk
from(
select country
,gender
,dn
,count(1) enum from(
select country
,gender
,regexp_extract
(email
,'(\\w+)@(.+)',2) dn
from customer_datails
)T
group by country
,gender
,dn
)T
)T
where rnk
=1
)T
group by country
,gender
5、计算每月总收入
select month(to_date
(regexp_replace
(date,'/','-'))) month,sum(price
) from transaction_details
group by month(to_date
(regexp_replace
(date,'/','-'))) order by month
6、计算每个季度的总收入
select quarter
,sum(price
) from
(select ceil
(month(to_date
(regexp_replace
(date,'/','-')))/3) quarter
,price
from transaction_details
)r1
group by quarter
7、按年计算总收入
select year(to_date
(regexp_replace
(date,'/','-'))) year,sum(price
) from transaction_details
group by year(to_date
(regexp_replace
(date,'/','-'))) order by year
8、按工作日计算总收入
周日:
1
周一:
2
周二:
3
周三:
4
周四:
5
周五:
6
周六:
7
select dayofweek
,sum
from
(select dayofweek
(to_date
(regexp_replace
(date,'/','-'))) dayofweek
,sum(price
) sum
from transaction_details
group by dayofweek
(to_date
(regexp_replace
(date,'/','-'))))r1
where dayofweek
>=2 and dayofweek
<=6
9、按时间段计算总收入
select cast
(substr
(time,0,instr
(time,':')-1) as INT) hour,sum(price
) sum_prive
from transaction_details
group by substr
(time,0,instr
(time,':')-1) order by hour
10、按时间段计算平均消费
select cast
(substr
(time,0,instr
(time,':')-1) as INT) hour,avg(price
) avg_price
from transaction_details
group by substr
(time,0,instr
(time,':')-1) order by hour
11、按工作日计算平均消费
select dayofweek
,sum
from
(select dayofweek
(to_date
(regexp_replace
(date,'/','-'))) dayofweek
,avg(price
) avg_price
from transaction_details
group by dayofweek
(to_date
(regexp_replace
(date,'/','-'))))r1
where dayofweek
>=2 and dayofweek
<=6
12、计算年、月、日的交易总数
with
r1
as (select year(to_date
(regexp_replace
(date,'/','-'))) time,count(transaction_id
) from transaction_details
group by year(to_date
(regexp_replace
(date,'/','-'))) order by time DESC),
r2
as (select month(to_date
(regexp_replace
(date,'/','-'))) time,count(transaction_id
) from transaction_details
group by month(to_date
(regexp_replace
(date,'/','-'))) order by time DESC),
r3
as (select day(to_date
(regexp_replace
(date,'/','-'))) time,count(transaction_id
) from transaction_details
group by day(to_date
(regexp_replace
(date,'/','-'))) order by time DESC)
select * from r1
union all select * from r2
union all select * from r3
13、找出交易量最大的10个客户
select customer_id
,num
,rnk
from
(select customer_id
,dense_rank
() over(order by num
DESC) rnk
from
(select customer_id
,count(1) num
from transaction_details
group by customer_id
)r1
)r2
where rnk
<=10
14、找出消费最多的前10位顾客
select customer_id
,,sum_pricernk
from
(select customer_id
,dense_rank
() over(order by sum_price
DESC) rnk
from
(select customer_id
,sum(price
) sum_price
from transaction_details
group by customer_id
)r1
)r2
where rnk
<=10
15、统计该期间交易数量最少的用户
select customer_id
,rnk
from
(select customer_id
,dense_rank
() over(order by num
) rnk
from
(select customer_id
,count(1) num
from transaction_details
group by customer_id
)r1
)r2
where rnk
=1
16、计算每个季度的独立客户总数
select quarter
,count(customer_id
) num
from(
select case when month<=3 then 1 when month<=6 then 2 when month<=9 then 3 else 4 end quarter
,customer_id
from
(
select month(to_date
(regexp_replace
(date,'/','-'))) month,customer_id
from transaction_details
)r1
group by month,customer_id
)r2
group by quarter
17、计算每周的独立客户总数
select weekofyear
,count(customer_id
) num
from
(select weekofyear
,customer_id
from
(select weekofyear
(to_date
(regexp_replace
(date,'/','-'))) weekofyear
,customer_id
from transaction_details
)r1
group by weekofyear
,customer_id
)r2
group by weekofyear
18、计算整个活动客户平均花费的最大值
select customer_id
,avg_price
,rnk
from
(select customer_id
,avg_price
,dense_rank
() over(order by avg_price
DESC) rnk
from
(select customer_id
,avg(price
) avg_price
from transaction_details
group by customer_id
)r1
)r2
where rnk
=1
19、统计每月花费最多的客户
select month,customer_id
,sum_price
,rnk
from
(
select month,customer_id
,sum_price
,dense_rank
() over(partition by month order by sum_price
DESC) rnk
from
(
select month,customer_id
,sum(price
) sum_price
from
(select month(to_date
(regexp_replace
(date,'/','-'))) month,customer_id
,price
from transaction_details
)r1
group by month,customer_id
)r2
)r3
where rnk
=1
20、统计每月访问次数最多的客户
select month,customer_id
,rnk
from
(
select month,customer_id
,dense_rank
() over(partition by month order by num
DESC) rnk
from
(
select month,customer_id
,count(transaction_id
) num
from
(
select
month(to_date
(regexp_replace
(date,'/','-'))) month,
customer_id
,transaction_id
from
transaction_details
)r1
group by month,customer_id
)r2
)r3
where rnk
=1
21、按总价找出最受欢迎的5种产品
select product
,sum_price
,rnk
from
(
select product
,sum_price
,dense_rank
() over(order by sum_price
DESC) rnk
from
(
select product
,sum(price
) sum_price
from transaction_details
group by product
)r1
)r2
where rnk
<=5
22、根据购买频率找出最畅销的5种产品
select product
,num
,rnk
from
(
select product
,num
,dense_rank
() over(order by num
DESC) rnk
from
(
select product
,count(transaction_id
) num
from transaction_details
group by product
)r1
)r2
where rnk
<=5
23、根据客户数量找出最受欢迎的5种产品
select product
,num
,rnk
from
(
select product
,num
,dense_rank
() over(order by num
DESC) rnk
from
(
select product
,count(customer_id
) num
from transaction_details
group by product
)r1
)r2
where rnk
<=5
24、按客流量找出最受欢迎的商店
select store_id
,num
,rnk
from
(select store_id
,num
,dense_rank
() over(order by num
DESC) rnk
from(
select store_id
,count(transaction_id
) num
from transaction_details
group by store_id
)r1
)r2
where rnk
=1
25、根据顾客消费价格找出最受欢迎的商店
select store_id
,sum(price
) sum_price
from transaction_details
group by store_id
order by sum_price
DESC limit 1
select store_id
,sum_price
,rnk
from
(
select store_id
,sum_price
,dense_rank
() over(order by sum_price
) rnk
from
(
select store_id
,sum(price
) sum_price
from transaction_details
group by store_id
)r1
)r2
where rnk
=1
26、根据顾客交易情况找出最受欢迎的商店
select store_id
,rnk
from
(select store_id
,dense_rank
() over(order by num
) rnk
from
(select store_id
,count(transaction_id
) num
from transaction_details
group by store_id
)r1
)r2
where rnk
=1
27、根据商店和唯一的顾客id获取最受欢迎的产品
select store_id
,product
,rnk
from
(select store_id
,product
,dense_rank
() over(partition by store_id
order by num
DESC) rnk
from
(select store_id
,product
,count(distinct customer_id
) num
from transaction_details
group by store_id
,product
)r1
)r2
where rnk
= 1
28、获取每个商店的员工与顾客比
select S
.store_id
,customer_num
,employee_number
,customer_num
/employee_number
as `%` from
(
select store_id
,count(customer_id
) customer_num
from transaction_details
group by store_id
)r1
inner join store_details S
on S
.store_id
=r1
.store_id
29、按年和月计算每家店的收入
select store_id
,year,month,sum(price
) sum_price
from
(
select store_id
,
year(to_date
(regexp_replace
(date,'/','-'))) year,
month(to_date
(regexp_replace
(date,'/','-'))) month,price
from transaction_details
)r1
group by store_id
,year,month
30、按店铺制作总收益饼图
select store_id
,sum(price
) sum_price
from transaction_details
group by store_id
31、找出每个商店最繁忙的时间段
select store_id
,hours
,sum
,rnk
from
(select store_id
,hours
,sum
,dense_rank
() over(partition by store_id
order by sum
DESC) rnk
from
(select
store_id
,
case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end hours
,
sum(num
) sum
from
(select
store_id
,
cast
(substr
(time,0,instr
(time,':')-1)as INT) hour,
count(transaction_id
) num
from
transaction_details
group by store_id
,cast
(substr
(time,0,instr
(time,':')-1)as INT)
)r1
group by
store_id
,case when hour<=6 then 1 when hour<=12 then 2 when hour<=18 then 3 else 4 end)r2
)r3
where rnk
=1
32、找出每家店的忠实顾客
select store_id
,customer_id
,rnk
from
(
select store_id
,customer_id
,dense_rank
() over(partition by store_id
order by num
DESC) rnk
from
(
select store_id
,customer_id
,count(*) num
from transaction_details
group by store_id
,customer_id
)r1
)r2
where rnk
=1
33、根据每位员工的最高收入找出明星商店
select r3
.store_id
,sum_price
,employee_number
,wage
,rnk
from
(select r2
.store_id
,sum_price
,employee_number
,wage
,dense_rank
() over(order by wage
DESC) rnk
from
(select S
.store_id
,sum_price
,employee_number
,(sum_price
/employee_number
)as wage
from
(select store_id
,sum(price
) sum_price
from transaction_details
group by store_id
)r1
inner join store_details S
on r1
.store_id
=S
.store_id
)r2
)r3
where rnk
=1
34、在ext_store_review中找出存在冲突的交易映射关系
select transaction_id
from transaction_details
group by transaction_id
having count(1)>1
35、了解客户评价的覆盖率
select store_id
,count(1) from v_store_review
group by store_id
36、根据评分了解客户的分布情况
select store_id
,review_score
,count(1) from v_store_review
group by store_id
,review_score
37、根据交易了解客户的分布情况
select store_id
,customer_id
,count(1) from transaction_details
group by store_id
,customer_id
38、客户给出的最佳评价是否总是同一家门店
select store_id
,count(1) from
(select store_id
,review_score
from v_store_review
where review_score
='5')r1
group by store_id