python解决循环引用的逻辑还是比较简单,今晚写了一个脚本层的查找unreachable object的方法,模拟的就是查找循环引用的算法
# -*- coding:utf-8 -*-
import gc
import sys
REACHABLE = -10001
TENTATIVELY_UNREACHABLE = -10002
def get_unreachable_objects():
# 1. update refs
gc_refs = {}
for obj in gc.get_objects():
assert id(obj) not in gc_refs
# 在这个过程中会增加三个引用
# 1. 放入list一个引用
# 2. obj一个引用
# 3. 调用getrefcount(obj)会产生一个临时引用
gc_refs[id(obj)] = sys.getrefcount(obj) - 3
obj = None
# 2. substract refs
for obj in gc.get_objects():
for child in gc.get_referents(obj):
if id(child) in gc_refs:
gc_refs[id(child)] -= 1
child = None
obj = None
# 3. get unreachable
is_running = True
while is_running:
for obj in gc.get_objects():
assert id(obj) in gc_refs
if gc_refs[id(obj)] == REACHABLE:
continue
if gc_refs[id(obj)] > 0:
gc_refs[id(obj)] = REACHABLE
for child in gc.get_referents(obj):
if id(child) in gc_refs:
if gc_refs[id(child)] == 0:
gc_refs[id(child)] = 1
elif gc_refs[id(child)] == TENTATIVELY_UNREACHABLE:
gc_refs[id(child)] = 1
else:
assert gc_refs[id(child)] > 0 or gc_refs[id(child)] == REACHABLE
child = None
elif gc_refs[id(obj)] == 0:
gc_refs[id(obj)] = TENTATIVELY_UNREACHABLE
else:
assert gc_refs[id(obj)] in (REACHABLE, TENTATIVELY_UNREACHABLE)
obj = None
is_running = False
for v in gc_refs.itervalues():
if v not in (REACHABLE, TENTATIVELY_UNREACHABLE):
is_running = True
break
v = None
unreachable = [k for k, v in gc_refs.iteritems() if v == TENTATIVELY_UNREACHABLE]
ret = []
for obj in gc.get_objects():
if id(obj) in unreachable:
ret.append(obj)
obj = None
return ret
if __name__ == "__main__":
class A(object): pass
a = A()
b = A()
c = A()
b.v = c
c.v = b
b_id = id(b)
a_id = id(c)
a = None
b = None
c = None
print get_unreachable_objects()
gc.set_debug(gc.DEBUG_LEAK | gc.DEBUG_STATS)
gc.collect()
脚本分为三步:
- update_refs
- substract refs
- find unreachable objects
与Python GC的逻辑基本一样,目前在我们的开发服上测试还是有些问题