2018-06-03 PYTHON code

# python 2.7 ##

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# import datetime
# import lightgbm as lgb
# from math import radians, cos, sin, asin, sqrt
import os
cwd = os.getcwd()



###
path_train = cwd+"/data/dm/train.csv"  # 训练文件路径
path_test = "/data/dm/test.csv"  # 测试文件路径
path_result_out = "model/pro_result.csv" #预测结果文件路径


ori_data = pd.read_csv(path_train)
# ori_data.sort(['A', 'B'], ascending=[1, 0])
data = ori_data
# sort the data
data = data.sort_values(by=['TERMINALNO',  'TIME'])
# show the unix time by minutes
data['TIME'] = data['TIME']/60
data['TIME'] = data['TIME'].astype('int')
# define new_trip_id, some the original id are wrong
data['NEW_TRIPID'] = data['TRIP_ID']
# swap two columns
columnsTitles = ['TERMINALNO', 'TIME', 'NEW_TRIPID', 'LONGITUDE', 'LATITUDE', 'DIRECTION', 'HEIGHT', 'SPEED', 'CALLSTATE',
               'Y', 'TRIP_ID']
data = data.reindex(columns=columnsTitles)

plt.ioff()


# def plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Term):
#     N_trips = len(Y4)
#     f, axs = plt.subplots(6, N_trips, figsize=(N_trips / 6 * 6, 6))
#     f.subplots_adjust(hspace=.5, wspace=.5)
#     axs = axs.ravel()
#
#     for j in range(0, N_trips):  # iterate on trips
#         axs[(1 - 1) * N_trips + j].plot(X[j], Y1[j])
#         axs[(2 - 1) * N_trips + j].plot(X[j], Y2[j])
#         axs[(3 - 1) * N_trips + j].plot(X[j], Y3[j])
#         axs[(4 - 1) * N_trips + j].plot(X[j], Y4[j])
#         axs[(5 - 1) * N_trips + j].plot(X[j], Y5[j])
#         axs[(6 - 1) * N_trips + j].plot(X[j], Y6[j])
#     f.savefig(str(Term) + "test.png")


# plot
def plt_trips(x, y1, y2, y3, y4, y5, y6, Tmp_Term):
    # f = plt.figure(figsize=(10, 8))
    # f1, f2, f3, f4, f5, f6 = (f.add_subplot(str(23)+str(i)) for i in range(1, 7))
    y_label = ['Longitude', 'Latitude', 'Direction', 'Height', 'Speed', 'Call_state']
    y = [y1, y2, y3, y4, y5, y6]
    fig, axs = plt.subplots(6, 1, figsize=(0.05*len(x), 8))
    fig.subplots_adjust(hspace=.5, wspace=.5)

    axs = axs.ravel()

    for i in range(6):
        axs[i].plot(x, y[i])
        axs[i].set_xlabel('time')
        axs[i].set_ylabel(y_label[i])
        axs[i].set_title('Time and ' + y_label[i])
    fig.savefig(str(Tmp_Term) + "test.png")
    # plt.close(fig)  # close the figure
    #
    # plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
    #                     wspace=0.35)
    # plt.show()


# def div2Trip(data):
#     Curr_Term = data.iloc[0, 0]
#     Curr_Time = data.iloc[0, 1]
#     Curr_Trip = data.iloc[0, 2]
#     # Create 1d list to store a single trip
#     x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
#     # Create a variable-sized 2d list for a terminal
#     X, Y1, Y2, Y3, Y4, Y5, Y6 = ([] for i in range(7))
#
#     for i in range(0, 100):
#         Tmp_Term = data.iloc[i, 0]
#         Tmp_Time = data.iloc[i, 1]
#         Tmp_Trip = data.iloc[i, 2]
#         if Tmp_Term == Curr_Term:
#             if Tmp_Trip == Curr_Trip:
#                 # print('aaa')
#                 x.append(data.iloc[i, 1]-Curr_Time)
#                 y1.append(data.iloc[i, 3])
#                 y2.append(data.iloc[i, 4])
#                 y3.append(data.iloc[i, 5])
#                 y4.append(data.iloc[i, 6])
#                 y5.append(data.iloc[i, 7])
#                 y6.append(data.iloc[i, 8])
#             else:
#                 Curr_Time = data.iloc[i, 1]
#                 Curr_Trip = data.iloc[i, 2]
#                 X += [x]
#                 Y1 += [y1]
#                 Y2 += [y2]
#                 Y3 += [y3]
#                 Y4 += [y4]
#                 Y5 += [y5]
#                 Y6 += [y6]
#                 print('bbb')
#                 x = []
#                 y1, y2, y3, y4, y5, y6 = ([] for i in range(6))
#                 x.append(data.iloc[i, 1]-Curr_Time)
#                 y1.append(data.iloc[i, 3])
#                 y2.append(data.iloc[i, 4])
#                 y3.append(data.iloc[i, 5])
#                 y4.append(data.iloc[i, 6])
#                 y5.append(data.iloc[i, 7])
#                 y6.append(data.iloc[i, 8])
#         else:
#             print ('ddd')
#             Curr_Term = data.iloc[i, 0]
#             plt_trip(X, Y1, Y2, Y3, Y4, Y5, Y6, Curr_Term - 1)
#             # continue


def comb2trips(data):
    # initialize
    curr_term = data.iloc[0, 0]
    begin_time = data.iloc[0, 1]
    curr_trip = 1
    gap_time = 0    # time gap between this trip end and next trip beginning
    # Create 1d list to store a single trip
    x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
    # change the tripI-id for the first line
    data.iloc[0, 2] = 1

    for i in range(1, len(data)):
        tmp_term = data.iloc[i, 0]
        tmp_time = data.iloc[i, 1]
        #  tmp_trip = data.iloc[i, 2]
        if tmp_term == curr_term:
            # if tmp_trip != curr_trip:
            # if find the trip has changed
            if (tmp_time - data.iloc[i-1, 1]) > 5:  # 5*60 if in seconds
                gap_time += tmp_time - data.iloc[i-1, 1]
                curr_trip += 1
                print('Find a new trip ' + str(i))

            x.append(data.iloc[i, 1] - begin_time - gap_time)
            y1.append(data.iloc[i, 3])
            y2.append(data.iloc[i, 4])
            y3.append(data.iloc[i, 5])
            y4.append(data.iloc[i, 6])
            y5.append(data.iloc[i, 7])
            y6.append(data.iloc[i, 8])
        else:
            curr_term = data.iloc[i, 0]
            curr_trip = 1
            begin_time = data.iloc[i, 1]  # redefine begin time for a new term
            gap_time = 0
            print ('Curr_Term ' + str(curr_term))
            # plt_trips(x, y1, y2, y3, y4, y5, y6, curr_term - 1)
            x, y1, y2, y3, y4, y5, y6 = ([] for i in range(7))
        # re_define the trip index
        data.iloc[i, 2] = curr_trip


comb2trips(data)

data.to_pickle('rearranged_data')
df = pd.read_pickle('rearranged_data')

del data
del ori_data

df['NEW_TRIPID'].describe()
df['TRIP_ID'].describe()

df['Y'].describe()


def features_append(features, series):
    return features


def define_test(data):
    length = max(data.iloc[:, 0])
    result = pd.DataFrame(index=range(length), columns=['Id', 'Pred'])
    return result


def pred_ratio(features):
    return 0


def pred_gen(df, result):
    # initialize
    curr_term = 1
    features = []
    for i in range(0, len(df)):
        tmp_term = df.iloc[i, 0]
        if tmp_term == curr_term:
            features_append(features, df.iloc[i, 4])
        else:
            print(tmp_term)
            result.iloc[curr_term-1, 0] = curr_term
            result.iloc[curr_term-1, 1] = pred_ratio(features)
            curr_term = tmp_term
            features = []
        if i == len(df)-1:
            print('end of final term: ')
            print(tmp_term)
            result.iloc[curr_term - 1, 0] = curr_term
            result.iloc[curr_term - 1, 1] = pred_ratio(features)
            curr_term = tmp_term
            features = []


result = define_test(df)
pred_gen(df, result)

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 202,723评论 5 476
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 85,080评论 2 379
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 149,604评论 0 335
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 54,440评论 1 273
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 63,431评论 5 364
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,499评论 1 281
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,893评论 3 395
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,541评论 0 256
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,751评论 1 296
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,547评论 2 319
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,619评论 1 329
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,320评论 4 318
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,890评论 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,896评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,137评论 1 259
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 42,796评论 2 349
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,335评论 2 342

推荐阅读更多精彩内容