import pandas as pd import numpy as np import matplotlib.pyplot as plt import sklearn.neighbors as ne knn = ne.KNeighborsClassifier(n_neighbors = 5) data = pd.read_csv("Trans150000.csv") dataSort = data.sort_values(by='0') #data = pd.read_csv("Sample.csv") #dataSort = data.sort_values(by='X') testArray = np.array(dataSort) #创建训练集 trainArray = np.array([[-565,3757],[-547,3755],[-541,3754],[39356,365],[39491,359],[40434,359],[40499,359],[-654,3237],[-613,3159],[-553,3112]]) trainLabels = np.array([1,1,1,1,1,1,1,2,2,2]) trainData = np.c_[trainArray,trainLabels] knn.fit(trainArray,trainLabels) ax = plt.figure() trainDataFrame = pd.DataFrame(trainData) for index in range(0,testArray.shape[0]-20,20): for i in range(index,index+19,1): offset = np.abs(testArray[i][1]-testArray[i+1][1]) if(offset > 40): predict = knn.predict(testArray[i].reshape(1,2)) trainArray = np.r_[trainArray,testArray[i].reshape(1,2),] trainLabels = np.r_[trainLabels,predict] predict = knn.predict(testArray[i+1].reshape(1,2)) trainArray = np.r_[trainArray,testArray[i+1].reshape(1,2),] trainLabels = np.r_[trainLabels,predict] knn.fit(trainArray,trainLabels) trainData = np.c_[trainArray,trainLabels] trainDataFrame = pd.DataFrame(trainData) break plt.scatter(trainDataFrame[trainDataFrame[2]==1][0],trainDataFrame[trainDataFrame[2]==1][1],c='r') plt.scatter(trainDataFrame[trainDataFrame[2]==2][0],trainDataFrame[trainDataFrame[2]==2][1],c='b') plt.scatter(trainDataFrame[trainDataFrame[2]==3][0],trainDataFrame[trainDataFrame[2]==3][1],c='g') plt.show() pd.DataFrame(trainData).to_csv("D:\\Train150000.csv") """ dataTrain = pd.read_csv("D:\\Train90000.csv") trainArray = np.array(dataTrain) knn.fit(trainArray[:,1:3],trainArray[:,3]) predict = knn.predict(testArray[:,0:3:2]) result = np.c_[testArray,predict] resultDataFrame = pd.DataFrame(result) dataReAnalyze = resultDataFrame[((resultDataFrame[3]==2) | (resultDataFrame[3]==3)) & (((resultDataFrame[0] > 6500) & (resultDataFrame[0] < 8500 )) | ((resultDataFrame[0] > 58500) & (resultDataFrame[0] < 60500 ))) ] for index in range(0,dataReAnalyze.shape[0]-2,1): deltaX = dataReAnalyze.iloc[index+1][0]-dataReAnalyze.iloc[index][0] deltaY = np.abs(dataReAnalyze.iloc[index+1][2]-dataReAnalyze.iloc[index][2]) if(deltaX < 1 and deltaY > 10): if(dataReAnalyze.iloc[index + 1][2] > dataReAnalyze.iloc[index][2]): resultDataFrame.loc[dataReAnalyze.index.tolist()[index],3] = 3 resultDataFrame.loc[dataReAnalyze.index.tolist()[index + 1],3] = 2 else: resultDataFrame.loc[dataReAnalyze.index.tolist()[index],3] = 2 resultDataFrame.loc[dataReAnalyze.index.tolist()[index + 1],3] = 3 level1 = resultDataFrame[resultDataFrame[4]==1] level2 = resultDataFrame[resultDataFrame[4]==2] level3 = resultDataFrame[resultDataFrame[4]==3] level1.to_csv("D:\\Level1Plane.csv",index=False) level2.to_csv("D:\\Level2Plane.csv",index=False) level3.to_csv("D:\\Level3Plane.csv",index=False) # 展示分层 plt.scatter(level1[0],level1[2],c='r') plt.scatter(level2[0],level2[2],c='b') plt.scatter(level3[0],level3[2],c='g') plt.grid() plt.show() """