import pandas as pd
import sqlalchemy
import datetime
import numpy as np
def lastDayofMonth(x):
next_month = x.replace(day=28) + datetime.timedelta(days=4)
res = next_month - datetime.timedelta(days=next_month.day)
return res
con='mysql+pymysql://kykviewer:$KykForView@keyikedb.mysql.rds.aliyuncs.com/wechat_finance_db'
order=pd.read_sql(" SELECT o1.agent_id,
'保险' AS product_type,
o1.busi_time as order_time,
case when o1.status in('TO_TRIAL','TO_RETRIAL','TRIAL_FAIL','RETRIAL_FAIL') then'进件失效' else'进件成功' end as status_type,
LAST_DAY(o1.busi_time) as lastDayofMonth
FROM chsell_insuran_order o1
UNION ALL
SELECT o2.agent_id,
CASE WHEN pp.type IN ( 'loan', 'other' ) THEN'贷款' WHEN pp.type = 'credit_card' THEN'信用卡' END AS product_type,
o2.busi_time as order_time,
case when o2.status in('TO_TRIAL','TO_RETRIAL','TRIAL_FAIL','RETRIAL_FAIL') then'进件失效' else'进件成功' end as status_type,
LAST_DAY(o2.busi_time) as lastDayofMonth
FROM chsell_order o2
LEFT JOIN chsell_product pp ON o2.product_id = pp.id ",
con=con)
order['order_time']=pd.to_datetime(order['order_time'],dayfirst=True)
order['order_time']=order['order_time'].apply(lambda x: datetime.datetime.date(x))
order['lastDayofMonth']=order['order_time'].apply(lambda x: lastDayofMonth(x))
dates= order['lastDayofMonth'].sort_values().unique()
def getPass(df):
result=[]
for dt in dates:
dft=df[df['order_time']<=dt]
dft=dft[dft['status_type']=='进件成功']
cnt=len(dft)
if cnt<2:
continue
dft=dft.sort_values(by='order_time')
l=dft['order_time']
interval=[(j-i).days for i,j in list(zip(l[:-1],l[1:]))]
if len(interval)>0:
avg_interval=np.mean(interval)
else:
avg_interval=np.NaN
tmp=pd.DataFrame({'agent_id':dft['agent_id'].values[0],'lastDayofMonth':dt,'avg_interval':[avg_interval],
'pass_cnt':cnt,'days_since_last_order_until_endofMonth':
[ (dt-l.max()).days if len(interval)>0 else [np.NaN] ]})
result.append(tmp)
if len(result)>0:
return pd.concat(result).drop_duplicates()
return
def getOrder(df):
result=[]
for dt in dates:
dft=df[df['order_time']<=dt]
pass_cnt=len(dft[dft['status_type']=='进件成功'])
cnt=len(dft)
if cnt<2:
continue
dft=dft.sort_values(by='order_time')
l=dft['order_time']
interval=[(j-i).days for i,j in list(zip(l[:-1],l[1:]))]
if len(interval)>0:
avg_interval=np.mean(np.array(interval))
else:
avg_interval=np.NaN
tmp=pd.DataFrame({'agent_id':dft['agent_id'].values[0],'lastDayofMonth':dt,'avg_interval':[avg_interval],
'order_cnt':[len(l)],'pass_cnt':pass_cnt,'days_since_last_order_until_endofMonth':
[ (dt-l.max()).days if len(interval)>0 else [np.NaN] ]})
result.append(tmp)
if len(result)>0:
return pd.concat(result).drop_duplicates()
return
t1=order.dropna().groupby('agent_id',as_index=False).apply(getOrder).reset_index(drop=True)
t2=order.dropna().groupby('agent_id',as_index=False).apply(getPass).reset_index(drop=True)
t1.to_excel('d:/dataset/进件.xlsx',index=False)
t2.to_excel('d:/dataset/成交.xlsx',index=False)