点击关注公众号,干货第一时间送达

关于 "比例" 的求解,
前面的文章:
SQL脚本案例【10】查询首次下单后第二天连续下单的用户比率
有关类似演示。
不难,今天再来复习一遍。
【温馨提示:建表语句及数据导入脚本,已经放置在文章末尾】
一、需求场景
题目:
查询订单配送表delivery_info中,即时订单的比例。
即时订单:如果下单日期和配送日期一样,则为即时订单。
筛选每个用户最早的订单即可。
订单配送表delivery_info:

显而易见,要计算的就是 order_date=custom_date 的比例。
正确结果:

二、解决方案
演示工具:Hive-3.1.3 + DataGrip2022
这题只需要操作一张订单配送表delivery_info即可,
而筛选出每个用户的最早订单,用 "开窗排序" 就行
【友情提示:关于"开窗排序"的函数 rank() ,可以翻看之前的文章:
(1)给每个用户的订单进行 order_date 排序
select
delivery_id,
user_id,
order_date,
custom_date,
rank() over (partition by user_id order by order_date) as rk
from delivery_info
结果集:

(2)选择每个用户最早的下单即可,筛选rk=1
select *
from (
select
delivery_id,
user_id,
order_date,
custom_date,
rank() over (partition by user_id order by order_date) as rk
from delivery_info
)t1
where rk=1
结果集:

(3)利用if() 函数,筛选 order_date=custom_date 的数据,并计算比例
select sum(`if`(order_date=custom_date,1,0)) * 100 / count(*)
from (
select
delivery_id,
user_id,
order_date,
custom_date,
rank() over (partition by user_id order by order_date) as rk
from delivery_info
)t1
where rk=1
结果集:

(4)结果值修饰一番,round()保留小数位,concat() 拼接 '%'
select concat(round(sum(`if`(order_date=custom_date,1,0)) * 100/count(*), 2), '%')
from (
select
delivery_id,
user_id,
order_date,
custom_date,
rank() over (partition by user_id order by order_date) as rk
from delivery_info
)t1
where rk=1
结果集:

三、源数据
订单配送表delivery_detail:
create table delivery_info
(
delivery_id string,
order_id string,
user_id string,
order_date string,
custom_date string
)
row format serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
with serdeproperties ('field.delim' = '\t') stored as
inputformat 'org.apache.hadoop.mapred.TextInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location 'hdfs://mycluster/user/hive/warehouse/hql_test2.db/delivery_info'
tblproperties ('bucketing_version' = '2');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('1', '1', '101', '2021-09-27', '2021-09-29');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('2', '2', '101', '2021-09-28', '2021-09-28');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('3', '3', '101', '2021-09-29', '2021-09-30');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('4', '4', '101', '2021-09-30', '2021-10-01');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('5', '5', '102', '2021-10-01', '2021-10-01');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('6', '6', '102', '2021-10-01', '2021-10-01');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('7', '7', '102', '2021-10-01', '2021-10-03');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('8', '8', '102', '2021-10-02', '2021-10-02');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('9', '9', '103', '2021-10-02', '2021-10-03');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('10', '10', '103', '2021-10-02', '2021-10-04');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('11', '11', '103', '2021-10-02', '2021-10-02');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('12', '12', '103', '2021-10-03', '2021-10-03');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('13', '13', '104', '2021-10-03', '2021-10-04');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('14', '14', '104', '2021-10-03', '2021-10-04');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('15', '15', '104', '2021-10-03', '2021-10-03');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('16', '16', '104', '2021-10-03', '2021-10-03');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('17', '17', '105', '2021-10-04', '2021-10-04');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('18', '18', '105', '2021-10-04', '2021-10-06');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('19', '19', '105', '2021-10-04', '2021-10-06');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('20', '20', '105', '2021-10-04', '2021-10-04');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('21', '21', '106', '2021-10-04', '2021-10-04');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('22', '22', '106', '2021-10-05', '2021-10-05');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('23', '23', '106', '2021-10-05', '2021-10-05');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('24', '24', '106', '2021-10-05', '2021-10-07');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('25', '25', '107', '2021-10-05', '2021-10-05');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('26', '26', '107', '2021-10-05', '2021-10-06');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('27', '27', '107', '2021-10-06', '2021-10-06');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('28', '28', '107', '2021-10-06', '2021-10-07');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('29', '29', '108', '2021-10-06', '2021-10-06');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('30', '30', '108', '2021-10-06', '2021-10-06');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('31', '31', '108', '2021-10-07', '2021-10-09');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('32', '32', '108', '2021-10-07', '2021-10-09');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('33', '33', '109', '2021-10-07', '2021-10-08');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('34', '34', '109', '2021-10-07', '2021-10-08');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('35', '35', '109', '2021-10-08', '2021-10-10');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('36', '36', '109', '2021-10-08', '2021-10-09');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('37', '37', '1010', '2021-10-08', '2021-10-10');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('38', '38', '1010', '2021-10-08', '2021-10-10');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('39', '39', '1010', '2021-10-08', '2021-10-09');
INSERT INTO hql_test2.delivery_info (delivery_id, order_id, user_id, order_date, custom_date) VALUES ('40', '40', '1010', '2021-10-08', '2021-10-09');

文章转载自皮皮克克,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




