
R-Recency(最近一次购买时间)
F-Frequency(消费频率)
M-Money(消费金额)
import pandas as pd
data=pd.read_csv('UserBehavior.csv')
print(data.head())


create table user (
id int not null,
item int not null,
behavior varchar(10) not null,
category int not null,
times int not null,
constraint id_behave primary key(id,item,times));
将时间戳转换为日期类型
set sql_safe_updates=0;
alter table user add column datetime timestamp(0) null;
update user set datetime = from_unixtime(times);

将其按date和time拆分成两列
alter table user add column date char(10) null;--增加date一列
update user set date = substring(datetime from 1 for 10);--取出年-月-天
alter table user add column time char(10) null;--增加time一列
update user set time = substring(datetime from 12 for 2);--取出小时

删去11-25至12-03日以外的数据
delete from user where datetime<'2017-11-25 00:00:00' or datetime>'2017-12-04 00:00:00';
将每个用户的登录时间排名
select *, row_number() over (partition by id order by date) as user_rank from user

统计每日新增用户数DNU
select date,count(id) as DNU from
(select *, row_number() over (partition by id order by date) as user_rank from user) as A
where A.user_rank=1 group by date order by date asc;


依次算出每日留存人数
select date,count(distinct id) as 留存人数,
count(distinct id)/(select count(distinct id) from user where date='2017-11-25') as 留存率 from user
where id in (select distinct id from user where date='2017-11-25')
group by date order by date asc;

创建用户行为视图(按天)
create view user_behavior as
SELECT date,
SUM(CASE WHEN behavior='pv' THEN 1 ELSE 0 END)AS '浏览数',
SUM(CASE WHEN behavior='fav' THEN 1 ELSE 0 END)AS '收藏数',
SUM(CASE WHEN behavior='cart' THEN 1 ELSE 0 END)AS '购物车',
SUM(CASE WHEN behavior='buy' THEN 1 ELSE 0 END)AS '付费数'
FROM user
GROUP BY date
ORDER BY date


创建用户每小时的行为视图(按小时)
create view time_behavior as SELECT time,
SUM(CASE WHEN behavior='pv' THEN 1 ELSE 0 END)AS '浏览数',
SUM(CASE WHEN behavior='fav' THEN 1 ELSE 0 END)AS '收藏数',
SUM(CASE WHEN behavior='cart' THEN 1 ELSE 0 END)AS '购物车',
SUM(CASE WHEN behavior='buy' THEN 1 ELSE 0 END)AS '付费数'
FROM user
GROUP BY time
ORDER BY time

计算用户总数
select count(distinct id) as 用户总数 from user;

计算跳失率:跳失率=只点击一次浏览的用户数量/总用户访问量
select count(*) as 只浏览一次就离开的人数 from
(select id from user group by id having count(behavior)=1) as A

用户行为数漏斗计算
select behavior,count(*) as 行为次数 from user group by behavior;


假设一:用户浏览商品时未使用收藏加购功能,导致产生购买意愿时增加了搜寻成本,从而使购买意愿下降。 假设二:商品热搜产品与热销产品不匹配造成的转化率低。

建立购买流程视图
create view 购买流程as
select id,item,category,
sum(case when behavior='pv' then 1 else 0 end) as pv,
sum(case when behavior'fav' then 1 else 0 end) as fav,
sum(case when behavior='cart' then 1 else 0 end) as cart,
sum(case when behavior='buy' then 1 else 0 end) as buy
from user
group by id,item,category;--分别按照id,item,category分组

不同购买流程分析
点击后直接购买:pv-buy
select count(1) as 浏览人数 from 购买流程 where pv>0;

select count(1) as 浏览后直接购买人数 from 购买流程
where pv>0 and fav=0 and cart=0 and buy>0 ;

点击、收藏后购买:pv-fav-buy
select count(1) as 浏览后收藏人数 from 购买流程 where pv>0 and fav>0;

select count(1) as 浏览后收藏再购买人数 from 购买流程 where pv>0 and fav>0 and cart=0 and buy>0;

点击、加购后购买:pv-cart-buy
select count(1) as 浏览后加购再购买人数 from 购买流程 where pv>0 and cart>0 ;

select count(1) as 浏览后加购再购买人数 from 购买流程 where pv>0 and fav=0 and cart>0 and buy>0;

点击、收藏并加购后购买:pv-fav、cart-buy
select count(1) as 浏览收藏加购人数 from 购买流程 where pv>0 and cart>0 and fav>0 ;

select count(1) as 浏览收藏加购再购买人数 from 购买流程 where pv>0 and fav>0 and cart>0 and buy>0;





提取排名前50的热销商品
select * from (select item,count(1) as 热销商品,
row_number()over(order by count(1) desc )as 热销排名 from user
where behavior='buy' group by item)as A where A.热销排名<51;

提取排名前50的热销商品
select * from (SELECT item, COUNT(1) AS 热搜商品 ,
row_number() over(order by COUNT(1) desc) as 热搜排名 FROM user
WHERE behavior='pv' or behavior='fav' or behavior='cart' GROUP BY item) as A where A.热搜排名<51;

热销商品与热搜商品匹配度分析
select A.item,A.热销商品,A.热销排名,B.热搜商品,B.热搜排名 from
(select * from (select item,count(1) as 热销商品,row_number()over(order by count(1) desc )as 热销排名 from user where behavior='buy' group by item)as A where A.热销排名<51) as A
inner join
(select * from (SELECT item, COUNT(1) AS 热搜商品 ,row_number() over(order by COUNT(1) desc) as 热搜排名 FROM user WHERE behavior='pv' or behavior='fav' or behavior='cart' GROUP BY item) as A where A.热搜排名<51) as B
on A.item=B.item

按被购买次数划分商品
SELECT A.`被购买次数`,COUNT(item) as '商品数' from
(SELECT item, COUNT(id) AS '被购买次数' FROM user WHERE behavior='buy' GROUP BY item) as A
GROUP BY A.`被购买次数`
order by A.`被购买次数` asc;

不用时间尺度下的转化率 每日付费转化率
select date,浏览数,付费数,付费数/浏览数 as '转化率' from user_behavior


每小时付费转化率
select time,浏览数,付费数,付费数/浏览数 as 转化率 from time_behavior;


不同类别商品的付费转化率
SELECT category,
SUM(CASE WHEN behavior='pv' THEN 1 ELSE 0 END)AS '浏览数',
SUM(CASE WHEN behavior='fav' THEN 1 ELSE 0 END)AS '收藏数',
SUM(CASE WHEN behavior='cart' THEN 1 ELSE 0 END)AS '购物车',
SUM(CASE WHEN behavior='buy' THEN 1 ELSE 0 END)AS '付费数'
FROM user
GROUP BY category
order by 浏览数 desc


创建用户购买时间到12-04日的距离视图
CREATE VIEW pay_B AS
SELECT id, DATEDIFF('2017-12-04',MAX(date)) AS B FROM user WHERE behavior='buy' GROUP BY id;

创建R得分视图
create view RR as
SELECT id,
(CASE WHEN B BETWEEN 7 AND 8 THEN 1
WHEN B BETWEEN 5 AND 6 THEN 2
WHEN B BETWEEN 3 AND 4 THEN 3
WHEN B BETWEEN 1 AND 2 THEN 4
WHEN B BETWEEN 0 AND 1 THEN 5
ELSE null END) AS R
FROM pay_B ORDER BY R DESC

查看用户购买次数
SELECT id,count(id)as '次数' from user where behavior='buy' group by id order by 次数 desc;

创建用户购买次数视图
CREATE VIEW pay_F AS SELECT id, COUNT(*) AS A FROM user WHERE behavior='buy' GROUP BY id;
创建F得分视图
create view FF as
SELECT id, (CASE WHEN A BETWEEN 1 AND 15 THEN 1
WHEN A BETWEEN 16 AND 30 THEN 2
WHEN A BETWEEN 31 AND 45 THEN 3
WHEN A BETWEEN 46 AND 57 THEN 4
WHEN A BETWEEN 58 AND 72 THEN 5 ELSE 0 END) as F
FROM pay_F ORDER BY F DESC;

创建RFM得分视图
create view RR_FF as SELECT RR.id,RR.R,FF.F from RR left join FF on RR.id=FF.id

计算R、F平均值
select avg(R) as R平均值,avg(F) as F平均值 from RR_FF;

按得分与平均分的关系进行用户分类
select 用户分类,count(用户ID) as 人数
from (select 用户ID,
(case when R >3.5544 and F >1.3249 then "重要价值客户"
when R >3.5544 and F <1.3249 then "重要发展客户"
when R <3.5544 and F >1.3249 then "重要保持客户"
when R <3.5544 and F <1.3249 then "重要挽留客户"
else 0 end) as 用户分类
from RR_FF) as a group by 用户分类;

文章转载自大数据学习与分享,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




