程式碼
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- from sklearn import datasets
- iris=datasets.load_iris() #下載資料
- X=iris.data
- X=X[:,2:4]
- from sklearn.cluster import AgglomerativeClustering
- #Sklearn
- # n_clusters: 要分成幾群 ,給定數值
- # affinity: 距離的計算方式,"euclidean","l1","l2","manhattan","cosine"...¶
- # linkage: 群與群之間的距離,"ward","complete","average","single"
- ml=AgglomerativeClustering(n_clusters=3,affinity='euclidean',linkage='ward')
- ml.fit_predict(X)
- plt.scatter(X[:,0],X[:,1],c=ml.fit_predict(X))
- plt.show()
複製代碼
輸出 array
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)
Scikit-Learn 演算法 分群
程式碼
- # Scipy
- # metric: 距離的計算方式
- # method: 群與群之間的計算方式,"single", "complete", "average", "weighted", "centroid", "median", "ward"
- import scipy.cluster.hierarchy as sch
- dis=sch.linkage(X,metric='euclidean',method='ward')
- sch.dendrogram(dis)
- plt.title('Hierarchical Clustering')
- plt.show()
複製代碼
Scikit-Learn 演算法 分群
程式碼
- max_dis=5
- clusters=sch.fcluster(dis,max_dis,criterion='distance')
- clusters
- print('clusters',clusters)
- plt.scatter(X[:,0],X[:,1],c=clusters)
- plt.show()
複製代碼
Scikit-Learn 演算法 分群
程式碼
- k=5
- clusters=sch.fcluster(dis,k,criterion='maxclust')
- plt.scatter(X[:,0],X[:,1],c=clusters)
- plt.show()
複製代碼
Scikit-Learn 演算法 分群
來源
|