|
|
@@ -0,0 +1,84 @@ |
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
import matplotlib
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
def get_data(file):
|
|
|
|
df = pd.read_csv(file,header = None)
|
|
|
|
return df.values
|
|
|
|
|
|
|
|
def get_Polar(data):
|
|
|
|
dist = np.sqrt(data[0]**2 + data[1]**2)
|
|
|
|
angle = np.arctan(data[1]/data[0])
|
|
|
|
return [dist,angle]
|
|
|
|
|
|
|
|
def get_distance(data,origin):
|
|
|
|
if(data == origin).all():
|
|
|
|
return 0
|
|
|
|
dist = np.sqrt((data[0]-origin[0])**2 + (data[1]-origin[1])**2)
|
|
|
|
return dist
|
|
|
|
|
|
|
|
def kmeans():
|
|
|
|
data = get_data("dataset_circles.csv")
|
|
|
|
|
|
|
|
polar_data = list(map(get_Polar,data.tolist()))
|
|
|
|
polar_data = np.array(polar_data)
|
|
|
|
# print(polar_data)
|
|
|
|
# plt.scatter(polar_data[:,0], polar_data[:,1], s=None, c="b")
|
|
|
|
# # plt.scatter(cluster2[:,0], cluster2[:,1], s=None, c="r")
|
|
|
|
# plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
data_x = polar_data[:,0]
|
|
|
|
data_y = polar_data[:,1]
|
|
|
|
|
|
|
|
cluster_center1 = polar_data[np.random.randint(data_x.shape[0])]
|
|
|
|
cluster_center2 = polar_data[np.random.randint(data_y.shape[0])]
|
|
|
|
|
|
|
|
cluster1_index = np.array([],dtype="int64")
|
|
|
|
cluster2_index = np.array([],dtype="int64")
|
|
|
|
|
|
|
|
last_center1 = np.array([],dtype="float64")
|
|
|
|
last_center2 = np.array([],dtype="float64")
|
|
|
|
while True:
|
|
|
|
for i in range(data.shape[0]):
|
|
|
|
# print(origin)
|
|
|
|
dist1 = get_distance(polar_data[i],cluster_center1)#获取距离
|
|
|
|
dist2 = get_distance(polar_data[i],cluster_center2)
|
|
|
|
if dist1 > dist2:#比距离,放到距离中心小的坐标系中
|
|
|
|
cluster2_index = np.append(cluster2_index,i)
|
|
|
|
else:
|
|
|
|
cluster1_index = np.append(cluster1_index,i)
|
|
|
|
|
|
|
|
last_center1 = cluster_center1
|
|
|
|
last_center2 = cluster_center2
|
|
|
|
|
|
|
|
cluster1 = polar_data[cluster1_index.tolist()]#获取两类数据
|
|
|
|
cluster2 = polar_data[cluster2_index.tolist()]
|
|
|
|
cluster_center1 = np.mean(cluster1[:,:2],axis=0)#求均值重新判断中心
|
|
|
|
cluster_center2 = np.mean(cluster2[:,:2],axis=0)
|
|
|
|
|
|
|
|
|
|
|
|
if(cluster_center1 == last_center1).all() and (cluster_center2 == last_center2).all():##如果两次聚类没有发生变化就弹出
|
|
|
|
break
|
|
|
|
# print("1: ",cluster_center1," ",last_center1)
|
|
|
|
# print("2: ",cluster_center2," ",last_center2)
|
|
|
|
# print(cluster1_index.shape[0])
|
|
|
|
# print(cluster2_index.shape[0])
|
|
|
|
|
|
|
|
# print()
|
|
|
|
cluster1_index = np.array([],dtype="int64")
|
|
|
|
cluster2_index = np.array([],dtype="int64")
|
|
|
|
|
|
|
|
# print(cluster1_index)
|
|
|
|
# print(cluster2_index)
|
|
|
|
plt.scatter(data[cluster1_index.tolist()][:,0], data[cluster1_index.tolist()][:,1], s=None, c="b")
|
|
|
|
plt.scatter(data[cluster2_index.tolist()][:,0], data[cluster2_index.tolist()][:,1], s=None, c="r")
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
kmeans()
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main() |