diff --git a/第三次作业 b/第三次作业 new file mode 100644 index 0000000..224128a --- /dev/null +++ b/第三次作业 @@ -0,0 +1,84 @@ +import pandas as pd +import numpy as np +import matplotlib +import matplotlib.pyplot as plt + +def get_data(file): + df = pd.read_csv(file,header = None) + return df.values + +def get_Polar(data): + dist = np.sqrt(data[0]**2 + data[1]**2) + angle = np.arctan(data[1]/data[0]) + return [dist,angle] + +def get_distance(data,origin): + if(data == origin).all(): + return 0 + dist = np.sqrt((data[0]-origin[0])**2 + (data[1]-origin[1])**2) + return dist + +def kmeans(): + data = get_data("dataset_circles.csv") + + polar_data = list(map(get_Polar,data.tolist())) + polar_data = np.array(polar_data) + # print(polar_data) + # plt.scatter(polar_data[:,0], polar_data[:,1], s=None, c="b") + # # plt.scatter(cluster2[:,0], cluster2[:,1], s=None, c="r") + # plt.show() + + + data_x = polar_data[:,0] + data_y = polar_data[:,1] + + cluster_center1 = polar_data[np.random.randint(data_x.shape[0])] + cluster_center2 = polar_data[np.random.randint(data_y.shape[0])] + + cluster1_index = np.array([],dtype="int64") + cluster2_index = np.array([],dtype="int64") + + last_center1 = np.array([],dtype="float64") + last_center2 = np.array([],dtype="float64") + while True: + for i in range(data.shape[0]): + # print(origin) + dist1 = get_distance(polar_data[i],cluster_center1)#获取距离 + dist2 = get_distance(polar_data[i],cluster_center2) + if dist1 > dist2:#比距离,放到距离中心小的坐标系中 + cluster2_index = np.append(cluster2_index,i) + else: + cluster1_index = np.append(cluster1_index,i) + + last_center1 = cluster_center1 + last_center2 = cluster_center2 + + cluster1 = polar_data[cluster1_index.tolist()]#获取两类数据 + cluster2 = polar_data[cluster2_index.tolist()] + cluster_center1 = np.mean(cluster1[:,:2],axis=0)#求均值重新判断中心 + cluster_center2 = np.mean(cluster2[:,:2],axis=0) + + + if(cluster_center1 == last_center1).all() and (cluster_center2 == last_center2).all():##如果两次聚类没有发生变化就弹出 + break + # print("1: ",cluster_center1," ",last_center1) + # print("2: ",cluster_center2," ",last_center2) + # print(cluster1_index.shape[0]) + # print(cluster2_index.shape[0]) + + # print() + cluster1_index = np.array([],dtype="int64") + cluster2_index = np.array([],dtype="int64") + + # print(cluster1_index) + # print(cluster2_index) + plt.scatter(data[cluster1_index.tolist()][:,0], data[cluster1_index.tolist()][:,1], s=None, c="b") + plt.scatter(data[cluster2_index.tolist()][:,0], data[cluster2_index.tolist()][:,1], s=None, c="r") + plt.show() + + +def main(): + kmeans() + +if __name__ == "__main__": + main() \ No newline at end of file