测试2:影评态度
SVM在康奈尔影评数据集上的表现:
代码:
#-*-coding:utf-8-*-
fromsklearnimportsvm
importnumpyasnp
importscipyassp
fromsklearn.cross_validationimporttrain_test_split
importmatplotlib.pyplotasplt
fromsklearn.datasetsimportload_files
fromsklearn.feature_extraction.textimportTfidfVectorizer
movie_reviews = load_files(u'E:/ML/DATA/电影分类数据/tokens')
#读取
movie_data = sp.load('movie_data.npy')
movie_target= sp.load('movie_target.npy')
x = movie_data
y =movie_target
count_vec = TfidfVectorizer(binary=False,decode_error='ignore',stop_words='english')
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
x_train = count_vec.fit_transform(x_train)#矩阵坐标,TF-IDF权值
x_test = count_vec.transform(x_test)
clf_linear = svm.SVC(kernel='linear').fit(x_train,y_train)
clf_poly = svm.SVC(kernel='poly',degree=3).fit(x_train,y_train)
clf_rbf = svm.SVC().fit(x_train,y_train)
clf_sigmoid = svm.SVC(kernel='sigmoid').fit(x_train,y_train)
fori,clfinenumerate( (clf_linear, clf_poly, clf_rbf, clf_sigmoid)):
printclf
answer = clf.predict(x_test)
#print answer
#print y_test
print(np.mean( answer == y_test ))
==================================================
D:\Anaconda2\python.exe D:/PyCharm/start/ML/SVM/SVM2_MOVIE.py
clf_linear:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
[1 1 0 1 1 0 0 0 1 1 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 1 0 1 1 0 0 0 0 0 0 1 0
1 1 0 0 0 1 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 1 1 0 1
1 0 0 1 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 0 0
1 0 0 1 1 0 1 0 1 1 1 1 0 0 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 0 1 1 0 1
1 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 0 0 1 0 1
0 0 0 1 0 0 0 1 0 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 1 0 0 1 1 0 1
1 0 1 0 1 0 1 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 1 1 0 1 0 1 0 0 0 1 0 1 0 1
1 0 1 0 0 1 1 0 0 0 0 1 0 0 1 1 0 0 0 1 1]
[1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 0 0 0 0 0 1 1 1 0
1 1 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1
1 0 0 1 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 0 0 0
1 0 0 1 1 1 1 0 1 0 1 1 0 1 0 1 1 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0
1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 1 1 1 0 1 1 0 1
0 0 0 1 0 0 0 1 0 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0
0 0 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 0 0 1 0 0 0 0
1 0 1 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1]
0.832142857143
clf_poly:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3, gamma='auto', kernel='poly',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
[1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 0 0 0 0 0 1 1 1 0
1 1 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1
1 0 0 1 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 0 0 0
1 0 0 1 1 1 1 0 1 0 1 1 0 1 0 1 1 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0
1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 1 1 1 0 1 1 0 1
0 0 0 1 0 0 0 1 0 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0
0 0 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 0 0 1 0 0 0 0
1 0 1 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1]
0.460714285714
clf_rbf:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
[1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 0 0 0 0 0 1 1 1 0
1 1 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1
1 0 0 1 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 0 0 0
1 0 0 1 1 1 1 0 1 0 1 1 0 1 0 1 1 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0
1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 1 1 1 0 1 1 0 1
0 0 0 1 0 0 0 1 0 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0
0 0 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 0 0 1 0 0 0 0
1 0 1 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1]
0.460714285714
clf_sigmoid:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3, gamma='auto', kernel='sigmoid',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
[1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 0 0 0 0 0 1 1 1 0
1 1 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1
1 0 0 1 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 0 0 0
1 0 0 1 1 1 1 0 1 0 1 1 0 1 0 1 1 1 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0
1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 1 1 1 0 1 1 0 1
0 0 0 1 0 0 0 1 0 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0
0 0 1 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 0 0 1 0 0 0 0
1 0 1 0 0 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1]
0.460714285714
Process finished with exit code 0