import pandas as pd
|
import numpy as np
|
import matplotlib.pyplot as plt
|
import sklearn.neighbors as ne
|
|
knn = ne.KNeighborsClassifier(n_neighbors = 5)
|
|
data = pd.read_csv("Trans150000.csv")
|
dataSort = data.sort_values(by='0')
|
#data = pd.read_csv("Sample.csv")
|
#dataSort = data.sort_values(by='X')
|
testArray = np.array(dataSort)
|
|
|
|
#创建训练集
|
trainArray = np.array([[-565,3757],[-547,3755],[-541,3754],[39356,365],[39491,359],[40434,359],[40499,359],[-654,3237],[-613,3159],[-553,3112]])
|
trainLabels = np.array([1,1,1,1,1,1,1,2,2,2])
|
trainData = np.c_[trainArray,trainLabels]
|
knn.fit(trainArray,trainLabels)
|
ax = plt.figure()
|
trainDataFrame = pd.DataFrame(trainData)
|
for index in range(0,testArray.shape[0]-20,20):
|
for i in range(index,index+19,1):
|
offset = np.abs(testArray[i][1]-testArray[i+1][1])
|
if(offset > 40):
|
predict = knn.predict(testArray[i].reshape(1,2))
|
trainArray = np.r_[trainArray,testArray[i].reshape(1,2),]
|
trainLabels = np.r_[trainLabels,predict]
|
predict = knn.predict(testArray[i+1].reshape(1,2))
|
trainArray = np.r_[trainArray,testArray[i+1].reshape(1,2),]
|
trainLabels = np.r_[trainLabels,predict]
|
knn.fit(trainArray,trainLabels)
|
trainData = np.c_[trainArray,trainLabels]
|
trainDataFrame = pd.DataFrame(trainData)
|
break
|
plt.scatter(trainDataFrame[trainDataFrame[2]==1][0],trainDataFrame[trainDataFrame[2]==1][1],c='r')
|
plt.scatter(trainDataFrame[trainDataFrame[2]==2][0],trainDataFrame[trainDataFrame[2]==2][1],c='b')
|
plt.scatter(trainDataFrame[trainDataFrame[2]==3][0],trainDataFrame[trainDataFrame[2]==3][1],c='g')
|
plt.show()
|
pd.DataFrame(trainData).to_csv("D:\\Train150000.csv")
|
|
|
"""
|
dataTrain = pd.read_csv("D:\\Train90000.csv")
|
trainArray = np.array(dataTrain)
|
knn.fit(trainArray[:,1:3],trainArray[:,3])
|
predict = knn.predict(testArray[:,0:3:2])
|
|
result = np.c_[testArray,predict]
|
resultDataFrame = pd.DataFrame(result)
|
|
|
dataReAnalyze = resultDataFrame[((resultDataFrame[3]==2) | (resultDataFrame[3]==3)) & (((resultDataFrame[0] > 6500) & (resultDataFrame[0] < 8500 )) | ((resultDataFrame[0] > 58500) & (resultDataFrame[0] < 60500 ))) ]
|
|
for index in range(0,dataReAnalyze.shape[0]-2,1):
|
deltaX = dataReAnalyze.iloc[index+1][0]-dataReAnalyze.iloc[index][0]
|
deltaY = np.abs(dataReAnalyze.iloc[index+1][2]-dataReAnalyze.iloc[index][2])
|
if(deltaX < 1 and deltaY > 10):
|
if(dataReAnalyze.iloc[index + 1][2] > dataReAnalyze.iloc[index][2]):
|
resultDataFrame.loc[dataReAnalyze.index.tolist()[index],3] = 3
|
resultDataFrame.loc[dataReAnalyze.index.tolist()[index + 1],3] = 2
|
else:
|
resultDataFrame.loc[dataReAnalyze.index.tolist()[index],3] = 2
|
resultDataFrame.loc[dataReAnalyze.index.tolist()[index + 1],3] = 3
|
|
|
level1 = resultDataFrame[resultDataFrame[4]==1]
|
level2 = resultDataFrame[resultDataFrame[4]==2]
|
level3 = resultDataFrame[resultDataFrame[4]==3]
|
|
level1.to_csv("D:\\Level1Plane.csv",index=False)
|
level2.to_csv("D:\\Level2Plane.csv",index=False)
|
level3.to_csv("D:\\Level3Plane.csv",index=False)
|
|
# 展示分层
|
plt.scatter(level1[0],level1[2],c='r')
|
plt.scatter(level2[0],level2[2],c='b')
|
plt.scatter(level3[0],level3[2],c='g')
|
plt.grid()
|
plt.show()
|
"""
|