需求说明
复杂的继承路径,非常类似中国古代的姓氏制度。
有些子孙封到某地后,便以此地为姓,其变为此姓的始祖。
存在2支或多支同姓的,需要合并,以最早的始祖为组。
- 路径中,当气泡不为sr或repeat(空白未标的都是repeat)时,以及背景颜色不同时,需切断——新姓的始祖
- 同姓合并,图中第2刀处的501应归到第一刀的027一起,以027作为改姓始祖
实现逻辑
- 父子关系数据构建(同时关联取potype和tinyclass)
drop table if exists ods.sku_path;
create table ods.sku_path as
with sku as(
select distinct product_code,tiny_class from ods.dim_sku
),road as(
select a.style_id,a.po_type,b.productyearseason_id ,b.repeat_style_id ,b.root_style_id,b.style_path -- ,COALESCE (b.style_path,a.style_id )style_path
,row_number() over (partition by b.style_id order by repeat_level_style desc) rn
from ods.f_po_head a
inner join ods.dim_style_repeat_hierarchy b on a.style_id =b.style_id
where b.style_path is not null and b.style_path like '120150515%'
)select a.*,b.product_code,b.tiny_class
from road a
inner join sku b on a.style_id=b.product_code
where rn=1
;
- 找新的“宗主”
drop table if exists ods.sku_path_root;
create table ods.sku_path_root as
select a.style_id ,a.repeat_style_id ,a.root_style_id ,a.style_path ,a.po_type ,a.tiny_class ,
b.po_type parent_po_type,b.tiny_class parent_tiny_class,
case when b.style_id is null --自己是第一个(无爹)
or a.po_type not in('Fast-track Repeat','Standard Repeat') --自己非standar和repeat类的也是
or a.tiny_class <>b.tiny_class --和爹小类不一样
then a.style_id
end as my_root
from ods.sku_path a
left join ods.sku_path b on a.repeat_style_id =b.style_id and a.root_style_id =b.root_style_id --上级唯一
;
- 补空白(找同宗)
drop table ods.sku_path_all_root;
create table ods.sku_path_all_root as
select a.style_id ,a.style_path, a.repeat_style_id ,a.po_type,a.tiny_class ,
length(a.style_path)-length(replace(a.style_path,'/',''))+1 lvl,max(b.my_root) my_root
from ods.sku_path_root a
inner join ods.sku_path_root b on instr(a.style_path,b.my_root)>0 and a.root_style_id =b.root_style_id
group by a.style_id ,a.style_path ,a.repeat_style_id,a.po_type,a.tiny_class
;
- 合并同宗
drop table if exists ods.sku_path_result;
create table ods.sku_path_result as
select a.style_id ,a.style_path,a.po_type,a.tiny_class,
case when a.po_type in('Fast-track Repeat','Standard Repeat') and a.lvl>2 then (min(b.my_root) over (partition by c.my_root))
--when a.po_type='Fast-track Repeat' and a.lvl=2 then a.repeat_style_id
else a.my_root end as new_root
from ods.sku_path_all_root a
inner join ods.sku_path_all_root b on a.my_root=b.style_id --小宗
left join ods.sku_path_all_root c on b.repeat_style_id=c.style_id --父
where a.style_path like '120150515%'
order by a.style_path
export HADOOP_USER_NAME=hive;