hash查找的是性能较好的算法之一,但它对于hash算法的设计有很大的技巧。生成hash的时候,不同的元素可能生成相同的hash值。所以减少冲突就成了很大的问题,尤其是元素基数很大的时候。
#coding:utf-8
import time
import random
def myHash(data,hashLength):
return data % hashLength
def hashSearch(hash,hashLength,data):
hashAddress = myHash(data,hashLength)
while hash.get(hashAddress) and hash[hashAddress] != data:
hashAddress += 1
hashAddress = hashAddress % hashLength
if hash.get(hashAddress) == None:
return None
return hashAddress# 数组实现hash映射
def toHash(num,hash,hashlength):
for i in num:
hashAdress = myHash(i,hashLength)
while hash.get(hashAdress):
hashAdress += 1 #防止冲突
hashAdress = hashAdress % hashlength
hash[hashAdress] = i
def sampleSearch(num,data):
for i in num:
if i == data:
return True
else:
return False
if __name__ == '__main__':
hashLength = 20
L = []
for i in range(100):
L.append(i)
L[i] += random.random()
L.append(38) #由于L中元素中全部都是随机量,这里传入一个整数方便查找
hash = {}
toHash(L,hash,hashLength) # 比较hash查找与普通顺序查找的性能差别
start = time.clock()
result = hashSearch(hash,hashLength,38)
print 'use time %f' % (time.clock() - start)
start = time.clock()
print sampleSearch(L,38)
print 'use time %f' % (time.clock() - start)
if result:
print('hash index is ' + str(result)) # 结果为int类型,用str函数转为str类型
print(hash[result])
else:
print('no result')