tab不能缩进,简书吃枣药丸
`
class bicluster:
def init(self, vec, left=None, right=None, distance=0.0, id=None):
self.left=left
self.right=right
self.vec=vec
self.id=id
self.distance=distance
def hcluster(rows, distance=pearson):
distances=()
currentclustid = -1
clust = [bicluster(rows[i], id = i) for i in range(len(rows))]
遍历每一个配对,寻找最小距离
while len(clust)>1:
lowestpair = (0,1)
closest = distance(clust[0].vec, clust[1].vec)
for i in range(len(clust)):
for j in range(i+1, len(clust)):
if (clust[1].id, clust[j].id) not in distances:
distances[(clust[i].id,clust[j].id)] = distance(clust[i].vec, clust[j].vec)
d = distances[(clust[i].id, clust[j].id)]
if d < closest:
closest = d
lowestpair = (i,j)
计算两个聚类的平均值
mergevec=[(clust[lowestpair[0]].vec[i]+clust[lowestpair[1]].vec[i])/2.0 for i in range(len(clust[0].vec))]
建立新的聚类
newcluster = bicluster(mergevec,left=clust(lowestpair[0]],right = clust[lowestpair[1]], distance = closest, id = currentclustid)
不在原始集合中的聚类,其id为负数
currentclustid-=1
del clust[lowestpair[1]]
del clust[lowestpair[0]]
clust.append(newcluster)
return clust[0]
调用:
import clusters
blognames,words,data = clusters.readfile
clust = clusters.hcluster(data)
vec是什么?
markdown格式有毒(