1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
| import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier import pickle
data = pd.read_hdf("data/hy_round1_train_20200102.h5") print(data.shape) print(data.columns)
data = np.array(data)
data[np.where(data[:,-1] == "刺网"),-1] = 0 data[np.where(data[:,-1] == "围网"),-1] = 1 data[np.where(data[:,-1] == "拖网"),-1] = 2 x_train, x_test, y_train, y_test = train_test_split(data[:,:-2],data[:,-1],test_size=0.25,random_state=42)
transfer = StandardScaler() x_train = transfer.fit_transform(x_train) x_test = transfer.fit_transform(x_test) y_train = y_train.astype("int") y_test = y_test.astype("int") print(y_train)
estimator = KNeighborsClassifier(n_neighbors=5) estimator.fit(x_train,y_train)
f = open("model/hy_knn_model.pickle","wb") pickle.dump(estimator, f)
y_predict = estimator.predict(x_test) print("模型预测结果:\n", y_predict) print("真实值对比结果:\n", y_test == y_predict)
score = estimator.score(x_test,y_test) print("模型准确率:",score)
|