需求是要计算商品的连带情况,即商品组合。比如一个订单同时买了ABC3个商品,则有A、B、C、AB、AC、BC、ABC7种组合情况,单个不算为4种。
一条数据记录里的商品组合就需要循环计算,当数据量大时性能是问题。
尝试了很多方法都无法适应大数据量,目前想到1个还算“将就”的解法。
- 一张组合表
几个数的组合情况,当个数已定,其组合情况即确定。
当真实数据代入后,取本表的idx作为数组下标。
create table dim_combination(id int,idx int[],counts int)
;
insert into dim_combination values(1,array[1],1)
;
-- 依次插入从1开始直到10个数的组合
insert into dim_combination
with t as(
select 10 as id
)select t.id,b.idx||t.id,t.counts+1
from t left join tenant_1800000143_rst.dim_combination b on t.id=b.id+1
union all
select t.id,b.idx,t.counts
from t left join tenant_1800000143_rst.dim_combination b on t.id=b.id+1
union all
select t.id,array[t.id],1 from t
数据预览
id | idx | counts |
---|---|---|
1 | {1} | 1 |
2 | {1} | 1 |
2 | {2} | 1 |
2 | {1,2} | 2 |
3 | {3} | 1 |
3 | {1,2,3} | 3 |
3 | {2,3} | 2 |
3 | {1,3} | 2 |
3 | {1} | 1 |
3 | {2} | 1 |
3 | {1,2} | 2 |
- 一个转换函数
传入数组和组合下标,返回该数组的所有组合。
(这步不佳,用到了循环。本打算用动态sql,将下标1,2,3替换成arr[1],arr[2],arr[3]进行动态sql查询返回,一直出错,后续再优化。)
CREATE OR REPLACE FUNCTION f_get_combination(arr text[],arr_id int[])
RETURNS text[] as
-- select f_get_combination(array['a','b','c'],array[1,2])
$BODY$
declare sql text;
rst text[];
i int;
begin
for i in 1..array_length(arr_id,1) loop
rst=rst||arr[i];
end loop
;
return rst;
end
$BODY$
LANGUAGE plpgsql
- 查询示范regexp_split_to_array(skc_codes,',')
select a.* ,b.idx,f_get_combination(skc_codes,idx)
from bi_fct_sales_skcs a
inner join dim_combination b on a.counts=b.id and b.counts>1
如下面3个商品,得到4种组合情况。
skc_codes | counts | idx | f_get_combination |
---|---|---|---|
KC19324590046,KM19419997039,KN19416516051 | 3 | {1,2} | {KC19324590046,KM19419997039} |
KC19324590046,KM19419997039,KN19416516051 | 3 | {1,3} | {KC19324590046,KM19419997039} |
KC19324590046,KM19419997039,KN19416516051 | 3 | {2,3} | {KC19324590046,KM19419997039} |
KC19324590046,KM19419997039,KN19416516051 | 3 | {1,2,3} | {KC19324590046,KM19419997039,KN19416516051} |
运用
-- 构造各个订单的skc组合(去重+排序)(不含1个及10个以上的)
create table tenant_1800000143_rst.fct_sales_skcs as
select doc_id,array_agg(distinct skc_code order by skc_code) skc_codes,count(distinct skc_code) counts
--select * -- select count(*)--732123
from tenant_1800000143_rst.bi_fct_skc_sales_detail
group by doc_id
having count(distinct skc_code)>1 and count(distinct skc_code)<10
;
-- 生成skc的任意个数组合
create table tenant_1800000143_rst.fct_sales_skcs_combination as
select a.* ,b.idx,f_get_combination(skc_codes,idx)
from tenant_1800000143_rst.fct_sales_skcs a
inner join dim_combination b on a.counts=b.id and b.counts>1
动态sql生成组合(函数改写)
CREATE OR REPLACE FUNCTION public.f_get_combination2(arr text[], arr_id text)
RETURNS text[]
-- select public.f_get_combination2(array['a','b','c'],'1,3')
LANGUAGE plpgsql
AS $function$
declare sql text;
rst text;--[];
i int;
begin
sql='select array[$1['||replace(arr_id,',','],$1[')||']]';
raise notice 'sql: %',sql;
execute sql using $1 into rst;
--execute 'select count(*) from dictionarys where parentid=$1' using $1 into v_count;
return rst;
end
$function$
;