程式碼 
- import numpy as np
 
 - import matplotlib.pyplot as plt
 
 - import pandas as pd
 
 - from sklearn import datasets
 
  
- iris=datasets.load_iris() #下載資料
 
 - X=iris.data
 
 - X=X[:,2:4]
 
  
- from sklearn.cluster import AgglomerativeClustering
 
 - #Sklearn
 
 - # n_clusters: 要分成幾群 ,給定數值
 
 - # affinity: 距離的計算方式,"euclidean","l1","l2","manhattan","cosine"...¶
 
 - # linkage: 群與群之間的距離,"ward","complete","average","single"
 
 - ml=AgglomerativeClustering(n_clusters=3,affinity='euclidean',linkage='ward')
 
 - ml.fit_predict(X)
 
 - plt.scatter(X[:,0],X[:,1],c=ml.fit_predict(X))
 
 - plt.show()
 
  複製代碼 
 
輸出 array 
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       1, 1, 1, 1, 1, 1, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,       2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2,       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64) 
 
Scikit-Learn 演算法 分群 
 
 
 
程式碼 
- # Scipy
 
 - # metric: 距離的計算方式
 
 - # method: 群與群之間的計算方式,"single", "complete", "average", "weighted", "centroid", "median", "ward"
 
 - import scipy.cluster.hierarchy as sch
 
 - dis=sch.linkage(X,metric='euclidean',method='ward')
 
 - sch.dendrogram(dis)
 
 - plt.title('Hierarchical Clustering')
 
 - plt.show()
 
  複製代碼 
Scikit-Learn 演算法 分群 
 
 
 
程式碼 
- max_dis=5
 
 - clusters=sch.fcluster(dis,max_dis,criterion='distance')
 
 - clusters
 
 - print('clusters',clusters)
 
 - plt.scatter(X[:,0],X[:,1],c=clusters)
 
 - plt.show()
 
  複製代碼 
Scikit-Learn 演算法 分群 
 
 
 
 
程式碼 
- k=5
 
 - clusters=sch.fcluster(dis,k,criterion='maxclust')
 
 - plt.scatter(X[:,0],X[:,1],c=clusters)
 
 - plt.show()
 
  複製代碼 
Scikit-Learn 演算法 分群 
 
 
 
來源 
 |