Python字典查找性能,get vs in




mydict = { 'hello': 'yes', 'goodbye': 'no' }
key = 'hello'

# (A)
if key in mydict:
    a = mydict[key]

# vs (B)
a = mydict.get(key,None)
if a is not None:

编辑:这些速度相同。常识告诉我,(B) 应该明显更快,因为它只有 1 次字典查找,而 2 次,但结果不同。我正在挠头。

基准测试的结果平均超过 12 次运行,其中 1/2 为命中,另一半为未命中:

doing in
switching to get
total time for IN:  0.532250006994
total time for GET:  0.480916659037
times found: 12000000
times not found: 12000000

当运行类似的一个(*10 多个循环)而没有找到密钥时,

doing in
switching to get
total time for IN:  2.35899998744
total time for GET:  4.13858334223



import time
smalldict = {}
for i in range(10):
    smalldict[str(i*4)] = str(i*18)

smalldict["8"] = "hello"

bigdict = {}
for i in range(10000):
    bigdict[str(i*100)] = str(i*4123)
bigdict["hello"] = "yes!"

timetotal = 0
totalin = 0
totalget = 0
key = "hello"
found= 0
notfound = 0

ddo = bigdict # change to smalldict for small dict gets
print 'doing in'

for r in range(12):
    start = time.time()
    a = r % 2
    for i in range(1000000):
        if a == 0:
            if str(key) in ddo:
                found = found + 1
                foo = ddo[str(key)]
                notfound = notfound + 1
                foo = "nooo"
            if 'yo' in ddo:
                found = found + 1
                foo = ddo['yo']
                notfound = notfound + 1
                foo = "nooo"
    timetotal = timetotal + (time.time() - start)

totalin = timetotal / 12.0 

print 'switching to get'
timetotal = 0
for r in range(12):
    start = time.time()
    a = r % 2
    for i in range(1000000):
        if a == 0:
            foo = ddo.get(key,None)
            if foo is not None:
                found = found + 1
                notfound = notfound + 1
                foo = "nooo"
            foo = ddo.get('yo',None)
            if foo is not None:
                found = found + 1
                notfound = notfound + 1
                notfound = notfound + 1
                foo = "oooo"
    timetotal = timetotal + (time.time() - start)

totalget = timetotal / 12

print "total time for IN: ", totalin
print 'total time for GET: ', totalget
print 'times found:', found
print 'times not found:', notfound

import timeit

d = dict.fromkeys(range(10000))

def d_get_has(d):
    return d.get(1)

def d_get_not_has(d):
    return d.get(-1)

def d_in_has(d):
    if 1 in d:
        return d[1]

def d_in_not_has(d):
    if -1 in d:
        return d[-1]

print timeit.timeit('d_get_has(d)', 'from __main__ import d, d_get_has')
print timeit.timeit('d_get_not_has(d)', 'from __main__ import d, d_get_not_has')
print timeit.timeit('d_in_has(d)', 'from __main__ import d, d_in_has')
print timeit.timeit('d_in_not_has(d)', 'from __main__ import d, d_in_not_has')


还值得指出的是,如果我只使用 pypy ,我会得到巨大的优化:-):

$ python ~/sandbox/

$ pypy ~/sandbox/

