神经网络
本节选用sklearn的神经网络模型进行预测,神经网络是cnn的基础,也是机器学习最重要的组成部分,想要更上一层楼的同学需要把神经网络搞明白,具体可参考周志华的《机器学习》,还有花书 如果不想翻书的话,关于神经网络入门的博客一捏一大把,我就放一个连接吧,主要靠大家自己~
https://blog.csdn.net/a819825294/article/details/53393837
代码
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np
train_data=pd.read_csv(r"C:\Users\312\Desktop\digit-recognizer\train.csv")
test_data=pd.read_csv(r"C:\Users\312\Desktop\digit-recognizer\test.csv")
data=pd.concat([train_data,test_data],axis=0).reset_index(drop=True)
data.drop(['label'],axis=1,inplace=True)
label=train_data.label
pca=PCA(n_components=100, random_state=34)
data_pca=pca.fit_transform(data)
Xtrain,Ytrain,xtest,ytest=train_test_split(data_pca[0:len(train_data)],label,test_size=0.1, random_state=34)
clf=MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', alpha=0.0001,learning_rate='constant', learning_rate_init=0.001,max_iter=200, shuffle=True, random_state=34)
clf.fit(Xtrain,xtest)
y_predict=clf.predict(Ytrain)
zeroLable=ytest-y_predict
rightCount=0
for i in range(len(zeroLable)):
if list(zeroLable)[i]==0:
rightCount+=1
print ('the right rate is:',float(rightCount)/len(zeroLable))
result=clf.predict(data_pca[len(train_data):])
with open("C:\\Users\\312\\Desktop\\digit-recognizer\\result.csv", 'w') as fw:
with open('C:\\Users\\312\\Desktop\\digit-recognizer\\sample_submission.csv') as pred_file:
fw.write('{},{}\n'.format('ImageId', 'Label'))
for i,line in enumerate(pred_file.readlines()[1:]):
splits = line.strip().split(',')
fw.write('{},{}\n'.format(splits[0],result[i]))
结果:
the right rate is: 0.9533333333333334