# -*- coding: utf-8 -*-
"""
Created on Tue Jun 26 16:52:22 2018
@author: hao.wang
"""
import os
import pandas as pd
import sys
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import scipy.signal as signal
from sklearn.preprocessing import scale
cwd = os.getcwd()
# data_dir = 'D:\百分点工作资料\PHM技术资料\_871PHM_贝晓强\871PHM\871PHM\data_dictionary\data1.csv'
data_dir = 'data1.csv'
strPath = data_dir
ori_data = pd.read_csv(strPath)
ori_data = ori_data.sort_values(by=['code', 'time'])
machine1 = ori_data[ori_data['code'].isin([663201])]
machine2 = ori_data[ori_data['code'].isin([663202])]
machine3 = ori_data[ori_data['code'].isin([663203])]
machine1.to_csv('machine1.csv')
machine2.to_csv('machine2.csv')
machine3.to_csv('machine3.csv')
machine1 = pd.read_csv('machine1.csv')
machine2 = pd.read_csv('machine2.csv')
machine3 = pd.read_csv('machine3.csv')
# delete the non-numerical columns
tmp = machine1.dtypes == 'object' # store if the type is not numerical
for i in range(0, len(tmp)-1):
if tmp[i] == True:
del machine1[tmp.index[i]]
del machine2[tmp.index[i]]
del machine3[tmp.index[i]]
# def separate(df, n):
# to_cluster_data = df.loc[:, ('frequency', 'GNOM', 'MP02', 'MP03', 'MP05', 'MP10', 'MP11')]
# scaled = scale(to_cluster_data, axis=0)
# scaled[:, 0] = scaled[:, 0]*6
# estimator = KMeans(n_clusters=n)
# estimator.fit(scaled)
# label_pred = estimator.labels_
# # centroids = estimator.cluster_centers_
# # inertia = estimator.inertia_
# return label_pred
# see the frequency of different machines
machine1['frequency'].value_counts()
machine2['frequency'].value_counts()
machine3['frequency'].value_counts()
ori_data['frequency'].value_counts()
machine1['preset_power'].value_counts()
def choose_frequency(machine):
tmp_count = machine['frequency'].value_counts()
tmp_count > len(machine)*0.02
# chooose the frequency that is larger than 0.02
choose_freq = tmp_count.index[tmp_count > len(machine)*0.02]
return choose_freq
choose_frequency(machine1)
choose_frequency(machine2)
choose_frequency(machine3)
# use boxplot to show the data
def save_seriesplot(df, name):
# close the in screen print of the figure
plt.ioff()
# define a figure
f, axs = plt.subplots(27, 2, figsize=(10*2, 27*2))
axs = axs.ravel() # use ravel to make the tuple flat
for i in range(0, len(df.iloc[0,:])-1):
axs[i].plot(df.iloc[:, i].reset_index(drop=True))
axs[i].set_title(df.columns[i])
f.savefig(str(name)+'_seriesplot.png')
# open the on screen print of the figure
plt.ion()
# define a function to separate diffrent frequency on specific machine
def choose_and_plot(machine, name):
freq = choose_frequency(machine)
for item in freq:
print(item)
machine_freq = machine[machine['frequency'] == item]
machine_freq = machine_freq[machine_freq['op_st'] == 112]
machine_freq.to_csv('freq_' + str(item) + '_'+ name + '.csv')
save_seriesplot(machine_freq, 'freq_' + str(item) + '_' + name)
choose_and_plot(machine1, 'machine1')
choose_and_plot(machine2, 'machine2')
choose_and_plot(machine3, 'machine3')
len(machine1)
machine_freq['']
# define a function to find the average stable length of a series
# def mean_normal(series):
# tmp_length = 1 # store the temp normal series length
# shift_times = 0 # store the total shift times
# length = 0
# search_range = len(series)
# for i in range(1, search_range):
# if series[i]==series[i-1]:
# tmp_length += 1
# else:
# shift_times += 1
# length = length/shift_times*(shift_times-1) + tmp_length/shift_times*1
# # print(tmp_length)
# # print(length)
# tmp_length = 1
# # do again for the last patch since there is not shift
# shift_times += 1
# return length/shift_times*(shift_times-1) + tmp_length/shift_times*1
#
#
# mean_normal(label_pred)
#
#
# # define my function to smooth abnormal shift
# def smooth_abnormal(series):
# length = mean_normal(series)
# radius = int(length/2)
# series_new = series
# # then we try to smooth the abnormal shifts:
# for i in range(0, len(series)):
# if series[i] != series[i-radius] and series[i] != series[i+radius]:
# series_new[i] = series[i-radius]
# return series_new
#
#
# # label_smooth = smooth_abnormal(label_pred)
# label_smooth = signal.medfilt(label_pred, 999)
#
# f, axs = plt.subplots(4, 1, figsize=(10, 10))
# axs = axs.ravel() # use ravel to make the tuple flat
# axs[0].plot(label_pred)
# axs[0].set_title('label_pred')
# axs[1].plot(label_smooth)
# axs[1].set_title('label_smooth')
# axs[2].plot(machine1['frequency'])
# axs[2].set_title('frequency')
#
#
# machine1['label'] = label_smooth
# machine1['label'] = label_pred
len(machine1) + len(machine2) + len(machine3) == len(ori_data)
del ori_data
machine1.columns
# separate the machine data into different frequency
# machine1_frequency_1 = machine1[machine1['label'].isin(['0'])]
# machine1_frequency_2 = machine1[machine1['label'].isin(['1'])]
# machine1_frequency_3 = machine1[machine1['label'].isin(['2'])]
#
# machine1_frequency_1 = machine1_frequency_1[machine1_frequency_1['op_st'].isin([112])]
# machine1_frequency_2 = machine1_frequency_2[machine1_frequency_2['op_st'].isin([112])]
# machine1_frequency_3 = machine1_frequency_3[machine1_frequency_3['op_st'].isin([112])]
# make some summarize about the machines data
def save_quant(df, name):
# make a copy of original stdout route
stdout_backup = sys.stdout
# define the log file that receives your log info
log_file = open(str(name) + "_message.log", "w")
# redirect print output to log file
sys.stdout = log_file
# begin print
for item in df.columns:
print(df[item].describe())
# close the file
log_file.close()
# restore the output to initial pattern
sys.stdout = stdout_backup
save_quant(machine1, 'machine1')
save_quant(machine2, 'machine2')
save_quant(machine3, 'machine3')
# use boxplot to show the data
def save_boxplot(df, name):
# close the in screen print of the figure
plt.ioff()
# define a figure
f, axs = plt.subplots(6, 9, figsize=(6*6 , 9*6))
axs = axs.ravel() # use ravel to make the tuple flat
for i in range(0, len(df.iloc[0, :])-1):
axs[i].boxplot(df.iloc[:, i])
axs[i].set_title(df.columns[i])
f.savefig(str(name)+'_boxplot.png')
# open the on screen print of the figure
plt.ion()
save_boxplot(machine1, 'machine1')
save_boxplot(machine2, 'machine2')
save_boxplot(machine3, 'machine3')
save_seriesplot(machine1_frequency_1, 'machine1_frequency_1')
save_seriesplot(machine1_frequency_2, 'machine1_frequency_2')
save_seriesplot(machine1_frequency_3, 'machine1_frequency_3')
save_seriesplot(machine2, 'machine2')
save_seriesplot(machine3, 'machine3')
import os
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
wd = os.getcwd()
data_dir = '/data_20180627/my_871uptransmitterminutedata2018-06-27.csv'
# obtain the original data
ori_data = pd.read_csv(wd + data_dir, header=None, sep='\t')
# split the string to measurement data
split_data = ori_data.iloc[:, 8].str.split(':', expand=True)
# 删除第一列,因为是空列
del split_data[0]
# give name to those columns
# 给各列命名
split_data.columns = ['mode', 'frequency', 'preset_power', 'mdlt_md', 'op_st',
'laV1', 'lg1V2', 'lg2V2', 'laV2', 'VaV2',
'APD', 'VSWR', 'Pfwd', 'Vfil1', 'VaV1',
'Vfil2', 'Vg1V2', 'Vg2V2', 'water', 'MP01',
'MP13', 'MP02', 'MP03', 'MP04', 'MP05',
'MP06', 'MP07', 'MP08', 'MP09', 'MP10',
'MP11', 'MP12', 'MS22', 'MS23', 'MS24',
'MS25', 'MS27','CPFD', 'CPRV', 'CPH1',
'CPH2', 'RFAT', 'GNOM', 'PREEQ', 'TAF',
'TOL', 'TAO', 'TNOM', 'TP', 'TTCS',
'TIS', 'TSF', 'TFS', 'TFH', 'TBHH',
'switch_comb']
len(split_data.columns)
# for some '57' like entries, can be transferred to numeric by 'to_numeric' function
# 部分列可以直接转换为numeric模式
for item in ['mode', 'frequency', 'preset_power', 'mdlt_md', 'op_st',
'laV1', 'lg1V2', 'lg2V2', 'laV2', 'VaV2',
'APD', 'VSWR', 'Pfwd', 'Vfil1', 'VaV1',
'Vfil2', 'Vg1V2', 'Vg2V2', 'water', 'MP01',
'MP13', 'MP02', 'MP03', 'MP04', 'MP05',
'MP06', 'MP07', 'MP08', 'MP09', 'MP10',
'MP11', 'MP12',
'CPFD', 'CPRV', 'CPH1',
'CPH2', 'RFAT', 'GNOM', 'TAF',
'TOL', 'TAO', 'TNOM', 'TP', 'TTCS',
'TIS', 'TSF', 'TFS', 'TFH', 'TBHH',
'switch_comb']:
split_data[item] = pd.to_numeric(split_data[item], errors='coerce').fillna(0)
# convert ON OFF to 0/1 variables
for item in ['MS22', 'MS23', 'MS24', 'MS25', 'MS27', 'PREEQ']:
split_data[item] = (split_data[item] == 'ON').astype(int)
split_data['index'] = split_data.index
other_data = pd.DataFrame({'code': ori_data.ix[:, 1], 'time': ori_data.ix[:, 2]})
other_data['index'] = other_data.index
# merge to obtain a new dataframe
new_data = pd.merge(other_data, split_data, on='index')
new_data.columns
new_data.dtypes
new_data = new_data.sort_values(by='time')
new_data.to_csv('new_data.csv')
new_data = pd.read_csv('new_data.csv')
# change the time string to 'datetime' format
new_data['time'] = pd.to_datetime(new_data['time'])
new_data = new_data.set_index('time', drop= False)
# separate the newdata based on time index
new_20170725 = new_data['2017-07-24':'2017-07-26']
new_20170805 = new_data['2017-08-04':'2017-08-06']
new_20170815 = new_data['2017-08-14':'2017-08-17']
# check the total length
len(new_20170725) + len(new_20170805) + len(new_20170815)
len(new_data)
# 找到高压挂起的时间点
# Find the high V pending time using TTCS
new_20170725['TTCS'].value_counts()
new_20170805['TTCS'].value_counts()
new_20170815['TTCS'].value_counts()
new_20170815[new_20170815['TTCS'] == 1]
# 找到粗调的时间点
# 1:normal 2:coarse 3:fine 4: VaRising
new_20170725['TFS'].value_counts()
new_20170805['TFS'].value_counts()
new_20170815['TFS'].value_counts()
tmp = new_20170725[new_20170725['TFS'] == 2.0]
new_20170725[new_20170725['TFS'] == 3.0]
new_20170725[new_20170725['TFS'] == 4.0]
# 画图
# use boxplot to show the data
def save_seriesplot(df, name):
# close the in screen print of the figure
plt.ioff()
# define a figure
f, axs = plt.subplots(56, 1, figsize=(80, 80))
axs = axs.ravel() # use ravel to make the tuple flat
for i in range(0, 56):
axs[i].plot(df.iloc[:, i + 3], linewidth=0.5)
axs[i].set_title(df.columns[i + 3])
plt.subplots_adjust(hspace=1)
f.savefig(str(name)+'_seriesplot.png')
# open the on screen print of the figure
plt.ion()
# use boxplot to show the data
def save_seriesplot_noindex(df, name):
# close the in screen print of the figure
plt.ioff()
# define a figure
f, axs = plt.subplots(56, 1, figsize=(80, 80))
axs = axs.ravel() # use ravel to make the tuple flat
if drop == False:
for i in range(0, 56):
axs[i].plot(df.iloc[:, i + 3], linewidth=0.5)
axs[i].set_title(df.columns[i + 3])
else:
for i in range(0, 56):
axs[i].plot(df.iloc[:, i + 3].reset_index(drop=True), linewidth=0.5)
axs[i].set_title(df.columns[i + 3])
plt.subplots_adjust(hspace=1)
f.savefig(str(name)+'_seriesplot.png')
# open the on screen print of the figure
plt.ion()
new_20170725.iloc[:, 0+3].reset_index(drop=False)
f = plt
f.plot(new_20170725.iloc[:, 0+3].reset_index(drop=False))
save_seriesplot(new_20170725, 'new_20170725')
save_seriesplot(new_20170805, 'new_20170805')
save_seriesplot(new_20170815, 'new_20170815')
# remove all zero data as possible
# get the on working data
new_20170725_nozero = new_20170725[(new_20170725['laV1'] > 0.5) &
(new_20170725['op_st'] == 112)&
(new_20170725['laV2'] > 0.5)]
new_20170805_nozero = new_20170805[(new_20170805['laV1'] > 0.5) &
(new_20170805['op_st'] == 112)&
(new_20170805['laV2'] > 0.5)]
new_20170815_nozero = new_20170815[(new_20170815['laV1'] > 0.5) &
(new_20170815['op_st'] == 112)&
(new_20178025['laV2'] > 0.5)]
save_seriesplot(new_20170725_nozero, 'new_20170725_nozero', True)
save_seriesplot(new_20170805_nozero, 'new_20170805_nozero', True)
save_seriesplot(new_20170815_nozero, 'new_20170815_nozero', True)
# all data sum up for frequency
import pandas as pd
import matplotlib.pyplot as plt
# read the data
new_data = pd.read_csv('new_data.csv')
machine1 = pd.read_csv('machine1.csv')
machine2 = pd.read_csv('machine2.csv')
machine3 = pd.read_csv('machine3.csv')
# remove some columns
del new_data['Unnamed: 0']
del new_data['index']
del machine1['Unnamed: 0'], machine1['TAH'], machine1['index']
del machine2['Unnamed: 0'], machine2['TAH'], machine2['index']
del machine3['Unnamed: 0'], machine3['TAH'], machine3['index']
# check
len(new_data.ix[0, :])
len(machine1.ix[0, :])
len(machine2.ix[0, :])
len(machine3.ix[0, :])
# merge, 'axis=0' means join by rows
all_data = pd.concat([new_data, machine1, machine2, machine3], axis=0, join='outer')
# check
len(all_data) == len(machine1) + len(machine2) +len(machine3) +len(new_data)
# see the counts of different frequency
tmp_counts = all_data['frequency'].value_counts()
tmp_counts
# divide by frequency
# based on THAMES Transmitter Technique Specifications
interval = [[5900, 6295], [7100, 7600], [9400, 9900],
[11500, 12175], [13570, 13870], [15030, 15800],
[17480, 17900], [18900, 19020], [21450, 21850],
[25670, 26100]]
# define some frequency series
freq = [None]*10
# sort the data based on the frequency interval
for i in range(0, 10):
freq[i] = all_data[(all_data['frequency'] >= interval[i][0]) &
(all_data['frequency'] <= interval[i][1])]
# see the length of each frequency
sum = 0
for i in range(0, 10):
print(len(freq[i]))
sum += len(freq[i])
len(all_data)-sum
tmp = all_data[(all_data['frequency'] == 9370)]
len(tmp)
del f
2018-06-26 code
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- Cange note: “Reading TMR1H will latch the contents of TMR...