第六单元
如何拥有无穷力量
本单元解决搜索引擎对给定查询只返回最佳页面的方法。
下图是到目前为止完成的部分
下面要对页面进行排名,首先是如何定义页面好坏。
每个页面的排名由时间步长和指向它的链接数决定,另外定义阻尼系数d和页面总个数N,每个页面的初始排名都为1/N,然后根据d的概率从指向它的链接跳转而来,(1 - d)/ N的概率从新页面打开,迭代计算其排名。
这实际上是pagerank算法的思路。要实现这个算法,就要将整个页面跳转关系用图的形式表示出来。修改 crawl_web函数
def crawl_web(seed): # returns index, graph of inlinks
tocrawl = [seed]
crawled = []
graph = {} # <url>, [list of pages it links to]
index = {}
while tocrawl:
page = tocrawl.pop()
if page not in crawled:
content = get_page(page)
add_page_to_index(index, page, content)
outlinks = get_all_links(content) #获取page内容中所有的链接,即该page的出边
graph[page] = outlinks #将page所有的出边加入图中
union(tocrawl, outlinks)
crawled.append(page)
return index, graph
然后就可以根据建好的图实现页面排名函数compute_ranks(graph):
def compute_ranks(graph):
d = 0.8 # damping factor
numloops = 10
ranks = {}
npages = len(graph)
for page in graph:
ranks[page] = 1.0 / npages
for i in range(0, numloops):
newranks = {}
for page in graph:
newrank = (1 - d) / npages
#Insert Code Here
for node in graph:
if page in graph[node]:
newrank += d * ranks[node] / len(graph[node])
#sum (d * rank(p, t - 1) / number of outlinks from p)
newranks[page] = newrank
ranks = newranks
return ranks
完整排名测试代码:
#Finishing the page ranking algorithm.
def compute_ranks(graph):
d = 0.8 # damping factor
numloops = 10
ranks = {}
npages = len(graph)
for page in graph:
ranks[page] = 1.0 / npages
for i in range(0, numloops):
newranks = {}
for page in graph:
newrank = (1 - d) / npages
#Insert Code Here
for node in graph:
if page in graph[node]:
newrank += d * ranks[node] / len(graph[node])
#sum (d * rank(p, t - 1) / number of outlinks from p)
newranks[page] = newrank
ranks = newranks
return ranks
cache = {
'http://udacity.com/cs101x/urank/index.html': """<html>
<body>
<h1>Dave's Cooking Algorithms</h1>
<p>
Here are my favorite recipies:
<ul>
<li> <a href="http://udacity.com/cs101x/urank/hummus.html">Hummus Recipe</a>
<li> <a href="http://udacity.com/cs101x/urank/arsenic.html">World's Best Hummus</a>
<li> <a href="http://udacity.com/cs101x/urank/kathleen.html">Kathleen's Hummus Recipe</a>
</ul>
For more expert opinions, check out the
<a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a>
and <a href="http://udacity.com/cs101x/urank/zinc.html">Zinc Chef</a>.
</body>
</html>
""",
'http://udacity.com/cs101x/urank/zinc.html': """<html>
<body>
<h1>The Zinc Chef</h1>
<p>
I learned everything I know from
<a href="http://udacity.com/cs101x/urank/nickel.html">the Nickel Chef</a>.
</p>
<p>
For great hummus, try
<a href="http://udacity.com/cs101x/urank/arsenic.html">this recipe</a>.
</body>
</html>
""",
'http://udacity.com/cs101x/urank/nickel.html': """<html>
<body>
<h1>The Nickel Chef</h1>
<p>
This is the
<a href="http://udacity.com/cs101x/urank/kathleen.html">
best Hummus recipe!
</a>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/kathleen.html': """<html>
<body>
<h1>
Kathleen's Hummus Recipe
</h1>
<p>
<ol>
<li> Open a can of garbonzo beans.
<li> Crush them in a blender.
<li> Add 3 tablesppons of tahini sauce.
<li> Squeeze in one lemon.
<li> Add salt, pepper, and buttercream frosting to taste.
</ol>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/arsenic.html': """<html>
<body>
<h1>
The Arsenic Chef's World Famous Hummus Recipe
</h1>
<p>
<ol>
<li> Kidnap the <a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a>.
<li> Force her to make hummus for you.
</ol>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/hummus.html': """<html>
<body>
<h1>
Hummus Recipe
</h1>
<p>
<ol>
<li> Go to the store and buy a container of hummus.
<li> Open it.
</ol>
</body>
</html>
""",
}
def crawl_web(seed): # returns index, graph of inlinks
tocrawl = [seed]
crawled = []
graph = {} # <url>, [list of pages it links to]
index = {}
while tocrawl:
page = tocrawl.pop()
if page not in crawled:
content = get_page(page)
add_page_to_index(index, page, content)
outlinks = get_all_links(content)
graph[page] = outlinks
union(tocrawl, outlinks)
crawled.append(page)
return index, graph
def get_page(url):
if url in cache:
return cache[url]
else:
return None
def get_next_target(page):
start_link = page.find('<a href=')
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1:end_quote]
return url, end_quote
def get_all_links(page):
links = []
while True:
url, endpos = get_next_target(page)
if url:
links.append(url)
page = page[endpos:]
else:
break
return links
def union(a, b):
for e in b:
if e not in a:
a.append(e)
def add_page_to_index(index, url, content):
words = content.split()
for word in words:
add_to_index(index, word, url)
def add_to_index(index, keyword, url):
if keyword in index:
index[keyword].append(url)
else:
index[keyword] = [url]
def lookup(index, keyword):
if keyword in index:
return index[keyword]
else:
return None
index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
ranks = compute_ranks(graph)
print ranks
习题集
兔子倍增,除了按斐波那契数列递增外,规定5个月大的兔子将老死。
def rabbits(n):
l = [1,1]
for i in range(2, n):
if i < 5:
l.append( l[i-1] + l[i-2])
else:
l.append( l[i-1] + l[i-2] - l[i-5])
return l[n - 1]
print rabbits(10)
#>>> 35
s = ""
for i in range(1,12):
s = s + str(rabbits(i)) + " "
print s
#>>> 1 1 2 3 5 7 11 16 24 35 52
深度计数,计算列表中元素个数,包括嵌套列表中的元素
def is_list(p): #判断p是否是列表
return isinstance(p, list)
def deep_count(p):
if not is_list(p):
return 1
s = 0
for e in p:
s += 1
if is_list(e): #递归调用检查子列表
s += deep_count(e)
return s
print deep_count([1, 2, 3])
#>>> 3
# The empty list still counts as an element of the outer list
print deep_count([1, [], 3])
#>>> 3
print deep_count([1, [1, 2, [3, 4]]])
#>>> 7
print deep_count([[[[[[[[1, 2, 3]]]]]]]])
#>>> 10
感觉幸运,用本单元实现的页面排名函数改善之前实现的搜索引擎的查询结果,定义lucky_search函数,对于给定关键词,返回最佳页面。
def lucky_search(index, ranks, keyword):
if keyword not in index:
return None
mx = 0
for e in index[keyword]:
if mx < ranks[e]:
lucky = e
mx = ranks[e]
return lucky
cache = {
'http://udacity.com/cs101x/urank/index.html': """<html>
<body>
<h1>Dave's Cooking Algorithms</h1>
<p>
Here are my favorite recipies:
<ul>
<li> <a href="http://udacity.com/cs101x/urank/hummus.html">Hummus Recipe</a>
<li> <a href="http://udacity.com/cs101x/urank/arsenic.html">World's Best Hummus</a>
<li> <a href="http://udacity.com/cs101x/urank/kathleen.html">Kathleen's Hummus Recipe</a>
</ul>
For more expert opinions, check out the
<a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a>
and <a href="http://udacity.com/cs101x/urank/zinc.html">Zinc Chef</a>.
</body>
</html>
""",
'http://udacity.com/cs101x/urank/zinc.html': """<html>
<body>
<h1>The Zinc Chef</h1>
<p>
I learned everything I know from
<a href="http://udacity.com/cs101x/urank/nickel.html">the Nickel Chef</a>.
</p>
<p>
For great hummus, try
<a href="http://udacity.com/cs101x/urank/arsenic.html">this recipe</a>.
</body>
</html>
""",
'http://udacity.com/cs101x/urank/nickel.html': """<html>
<body>
<h1>The Nickel Chef</h1>
<p>
This is the
<a href="http://udacity.com/cs101x/urank/kathleen.html">
best Hummus recipe!
</a>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/kathleen.html': """<html>
<body>
<h1>
Kathleen's Hummus Recipe
</h1>
<p>
<ol>
<li> Open a can of garbonzo beans.
<li> Crush them in a blender.
<li> Add 3 tablesppons of tahini sauce.
<li> Squeeze in one lemon.
<li> Add salt, pepper, and buttercream frosting to taste.
</ol>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/arsenic.html': """<html>
<body>
<h1>
The Arsenic Chef's World Famous Hummus Recipe
</h1>
<p>
<ol>
<li> Kidnap the <a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a>.
<li> Force her to make hummus for you.
</ol>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/hummus.html': """<html>
<body>
<h1>
Hummus Recipe
</h1>
<p>
<ol>
<li> Go to the store and buy a container of hummus.
<li> Open it.
</ol>
</body>
</html>
""",
}
def get_page(url):
if url in cache:
return cache[url]
return ""
def get_next_target(page):
start_link = page.find('<a href=')
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1:end_quote]
return url, end_quote
def get_all_links(page):
links = []
while True:
url, endpos = get_next_target(page)
if url:
links.append(url)
page = page[endpos:]
else:
break
return links
def union(a, b):
for e in b:
if e not in a:
a.append(e)
def add_page_to_index(index, url, content):
words = content.split()
for word in words:
add_to_index(index, word, url)
def add_to_index(index, keyword, url):
if keyword in index:
index[keyword].append(url)
else:
index[keyword] = [url]
def lookup(index, keyword):
if keyword in index:
return index[keyword]
else:
return None
def crawl_web(seed): # returns index, graph of inlinks
tocrawl = [seed]
crawled = []
graph = {} # <url>, [list of pages it links to]
index = {}
while tocrawl:
page = tocrawl.pop()
if page not in crawled:
content = get_page(page)
add_page_to_index(index, page, content)
outlinks = get_all_links(content)
graph[page] = outlinks
union(tocrawl, outlinks)
crawled.append(page)
return index, graph
def compute_ranks(graph):
d = 0.8 # damping factor
numloops = 10
ranks = {}
npages = len(graph)
for page in graph:
ranks[page] = 1.0 / npages
for i in range(0, numloops):
newranks = {}
for page in graph:
newrank = (1 - d) / npages
for node in graph:
if page in graph[node]:
newrank = newrank + d * (ranks[node] / len(graph[node]))
newranks[page] = newrank
ranks = newranks
return ranks
#Here's an example of how your procedure should work on the test site:
index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
ranks = compute_ranks(graph)
print lucky_search(index, ranks, 'Hummus')
#>>> http://udacity.com/cs101x/urank/kathleen.html
print lucky_search(index, ranks, 'the')
#>>> http://udacity.com/cs101x/urank/nickel.html
print lucky_search(index, ranks, 'babaganoush')
#>>> None
家谱,找出家谱字典树中给定person的所有祖先
ada_family = { 'Judith Blunt-Lytton': ['Anne Isabella Blunt', 'Wilfrid Scawen Blunt'],
'Ada King-Milbanke': ['Ralph King-Milbanke', 'Fanny Heriot'],
'Ralph King-Milbanke': ['Augusta Ada King', 'William King-Noel'],
'Anne Isabella Blunt': ['Augusta Ada King', 'William King-Noel'],
'Byron King-Noel': ['Augusta Ada King', 'William King-Noel'],
'Augusta Ada King': ['Anne Isabella Milbanke', 'George Gordon Byron'],
'George Gordon Byron': ['Catherine Gordon', 'Captain John Byron'],
'John Byron': ['Vice-Admiral John Byron', 'Sophia Trevannion'] }
def ancestors(genealogy, person):
if person not in genealogy:
return []
l = []
for e in genealogy[person]:
l.append(e)
l = l + ancestors(genealogy, e)
return l
print ancestors(ada_family, 'Augusta Ada King')
#>>> ['Anne Isabella Milbanke', 'George Gordon Byron',
# 'Catherine Gordon','Captain John Byron']
print ancestors(ada_family, 'Judith Blunt-Lytton')
#>>> ['Anne Isabella Blunt', 'Wilfrid Scawen Blunt', 'Augusta Ada King',
# 'William King-Noel', 'Anne Isabella Milbanke', 'George Gordon Byron',
# 'Catherine Gordon', 'Captain John Byron']
print ancestors(ada_family, 'Dave')
#>>> []
杨辉三角
# 1
# 1 1
# 1 2 1
# 1 3 3 1
# 1 4 6 4 1
def triangle(n):
if n == 0:
return []
if n == 1:
return [[1]]
l = []
l.append(triangle(n-1))
l.append(1)
for i in range(1,n-1):
l= l + l[n-1][i-1] + l[n-1][i]
l.append(1)
return l
#For example:
print triangle(0)
#>>> []
print triangle(1)
#>>> [[1]]
print triangle(2)
#>> [[1], [1, 1]]
print triangle(3)
#>>> [[1], [1, 1], [1, 2, 1]]
print triangle(6)
#>>> [[1], [1, 1], [1, 2, 1], [1, 3, 3, 1], [1, 4, 6, 4, 1], [1, 5, 10, 10, 5, 1]]
只有小小幸运,同样实现页面排名,与lucky_search函数不同的是要按序返回所有URL。
#按C++ 的思路实现版快排
def qsort(p, l, r):
a = p[l]
while l < r:
i = l + 1
j = r
while p[j] <= a:
j -= 1
while p[i] >= a:
i += 1
t = p[j]
p[j] = p[i]
p[i] = t
p[i] = a
qsort(p, i + 1, r)
qsort(p, l, i)
return p
#Python实现更清楚简洁
#手动扩展递归栈深度,其实这里不需要
import sys
sys.setrecursionlimit(10000) #例如这里设置为一百万
def quicksort(pages, ranks):
if not pages or len(pages) <= 1:
return pages
worse = []
better = []
base = ranks[ pages[0] ]
#注意遍历一定要从pages[1]开始,否则死循环
for page in pages[1:]:
if ranks[page] <= base:
worse.append(page)
else:
better.append(page)
return quicksort(better,ranks) + [pages[0]] + quicksort(worse,ranks)
def ordered_search(index, ranks, keyword):
if keyword not in index:
return None
l = index[keyword]
if not l:
return None
ans = quicksort(l, ranks)
return ans
cache = {
'http://udacity.com/cs101x/urank/index.html': """<html>
<body>
<h1>Dave's Cooking Algorithms</h1>
<p>
Here are my favorite recipies:
<ul>
<li> <a href="http://udacity.com/cs101x/urank/hummus.html">Hummus Recipe</a>
<li> <a href="http://udacity.com/cs101x/urank/arsenic.html">World's Best Hummus</a>
<li> <a href="http://udacity.com/cs101x/urank/kathleen.html">Kathleen's Hummus Recipe</a>
</ul>
For more expert opinions, check out the
<a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a>
and <a href="http://udacity.com/cs101x/urank/zinc.html">Zinc Chef</a>.
</body>
</html>
""",
'http://udacity.com/cs101x/urank/zinc.html': """<html>
<body>
<h1>The Zinc Chef</h1>
<p>
I learned everything I know from
<a href="http://udacity.com/cs101x/urank/nickel.html">the Nickel Chef</a>.
</p>
<p>
For great hummus, try
<a href="http://udacity.com/cs101x/urank/arsenic.html">this recipe</a>.
</body>
</html>
""",
'http://udacity.com/cs101x/urank/nickel.html': """<html>
<body>
<h1>The Nickel Chef</h1>
<p>
This is the
<a href="http://udacity.com/cs101x/urank/kathleen.html">
best Hummus recipe!
</a>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/kathleen.html': """<html>
<body>
<h1>
Kathleen's Hummus Recipe
</h1>
<p>
<ol>
<li> Open a can of garbonzo beans.
<li> Crush them in a blender.
<li> Add 3 tablesppons of tahini sauce.
<li> Squeeze in one lemon.
<li> Add salt, pepper, and buttercream frosting to taste.
</ol>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/arsenic.html': """<html>
<body>
<h1>
The Arsenic Chef's World Famous Hummus Recipe
</h1>
<p>
<ol>
<li> Kidnap the <a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a>.
<li> Force her to make hummus for you.
</ol>
</body>
</html>
""",
'http://udacity.com/cs101x/urank/hummus.html': """<html>
<body>
<h1>
Hummus Recipe
</h1>
<p>
<ol>
<li> Go to the store and buy a container of hummus.
<li> Open it.
</ol>
</body>
</html>
""",
}
def get_page(url):
if url in cache:
return cache[url]
return ""
def get_next_target(page):
start_link = page.find('<a href=')
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1:end_quote]
return url, end_quote
def get_all_links(page):
links = []
while True:
url, endpos = get_next_target(page)
if url:
links.append(url)
page = page[endpos:]
else:
break
return links
def union(a, b):
for e in b:
if e not in a:
a.append(e)
def add_page_to_index(index, url, content):
words = content.split()
for word in words:
add_to_index(index, word, url)
def add_to_index(index, keyword, url):
if keyword in index:
index[keyword].append(url)
else:
index[keyword] = [url]
def lookup(index, keyword):
if keyword in index:
return index[keyword]
else:
return None
def crawl_web(seed): # returns index, graph of inlinks
tocrawl = [seed]
crawled = []
graph = {} # <url>, [list of pages it links to]
index = {}
while tocrawl:
page = tocrawl.pop()
if page not in crawled:
content = get_page(page)
add_page_to_index(index, page, content)
outlinks = get_all_links(content)
graph[page] = outlinks
union(tocrawl, outlinks)
crawled.append(page)
return index, graph
def compute_ranks(graph):
d = 0.8 # damping factor
numloops = 10
ranks = {}
npages = len(graph)
for page in graph:
ranks[page] = 1.0 / npages
for i in range(0, numloops):
newranks = {}
for page in graph:
newrank = (1 - d) / npages
for node in graph:
if page in graph[node]:
newrank = newrank + d * (ranks[node] / len(graph[node]))
newranks[page] = newrank
ranks = newranks
return ranks
index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html')
ranks = compute_ranks(graph)
print ordered_search(index, ranks, 'Hummus')
#>>> ['http://udacity.com/cs101x/urank/kathleen.html',
# 'http://udacity.com/cs101x/urank/nickel.html',
# 'http://udacity.com/cs101x/urank/arsenic.html',
# 'http://udacity.com/cs101x/urank/hummus.html',
# 'http://udacity.com/cs101x/urank/index.html']
print ordered_search(index, ranks, 'the')
#>>> ['http://udacity.com/cs101x/urank/nickel.html',
# 'http://udacity.com/cs101x/urank/arsenic.html',
# 'http://udacity.com/cs101x/urank/hummus.html',
# 'http://udacity.com/cs101x/urank/index.html']
print ordered_search(index, ranks, 'babaganoush')
#>>> None
第七单元
计算的过去、现在和未来
本单元先对前面的知识作了总结,然后介绍了计算的过去、现在和未来。计算的三个主题:抽象、通用性、递归定义
累积实践问题
三角数字,如1, 3, 6, 10, 15, 21, ...,计算方式如下:
# 1
# 1 + 2 = 3
# 1 + 2 + 3 = 6
# 1 + 2 + 3 + 4 = 10
# 1 + 2 + 3 + 4 + 5 = 15
def triangular(n):
s = 0
for i in range(1,n+1):
s += i
return s
print triangular(1)
#>>>1
print triangular(3)
#>>> 6
print triangular(10)
#>>> 55
删除标签
def remove_tags(s):
i = s.find('<')
while i != -1:
j = s.find('>',i)
s = s[:i] + ' ' +s[j+1:]
i = s.find('<')
return s.split()
print remove_tags('''<h1>Title</h1><p>This is a
<a href="http://www.udacity.com">link</a>.<p>''')
#>>> ['Title','This','is','a','link','.']
print remove_tags('''<table cellpadding='3'>
<tr><td>Hello</td><td>World!</td></tr>
</table>''')
#>>> ['Hello','World!']
print remove_tags("<hello><goodbye>")
#>>> []
print remove_tags("This is plain text.")
#>>> ['This', 'is', 'plain', 'text.']
查找替换
def make_converter(match, replacement):
return [match, replacement]
def apply_converter(converter, string):
i = string.find(converter[0])
l = len(converter[0])
if i == -1:
return string
return apply_converter(converter, string[:i] + converter[1] + string[i + l:])
# For example,
c1 = make_converter('aa', 'a')
print apply_converter(c1, 'aaaa')
#>>> a
c = make_converter('aba', 'b')
print apply_converter(c, 'aaaaaabaaaaa')
#>>> ab
最长重复
def longest_repetition(ls):
if not ls:
return None
l = len(ls)
mx = 0
cnt = 1
a = ls[0]
k = a
for i in range(1, l):
if a == ls[i]:
cnt += 1
else:
if mx < cnt:
k = a
mx = cnt
cnt = 1
a = ls[i]
if mx < cnt:
k = a
return k
#For example,
print longest_repetition([1, 2, 2, 3, 3, 3, 2, 2, 1])
# 3
print longest_repetition(['a', 'b', 'b', 'b', 'c', 'd', 'd', 'd'])
# b
print longest_repetition([1,2,3,4,5])
# 1
print longest_repetition([])
# None
深度翻转
def is_list(p):
return isinstance(p, list)
def deep_reverse(ls):
if not is_list(ls):
return ls
for i in range(len(ls)-1, -1, -1):
if is_list(ls[i]):
return deep_reverse(ls[i])
else :
return ls[i]
#For example,
p = [1, [2, 3, [4, [5, 6]]]]
print deep_reverse(p)
#>>> [[[[6, 5], 4], 3, 2], 1]
print p
#>>> [1, [2, 3, [4, [5, 6]]]]
q = [1, [2,3], 4, [5,6]]
print deep_reverse(q)
#>>> [ [6,5], 4, [3, 2], 1]
print q
#>>> [1, [2,3], 4, [5,6]]
挑战实践问题
斯特林数和贝尔数
第一类Stirling数:有正负但一般指正,其绝对值是将p个物体排成k个非空循环排列的方法数。
- 递推公式:S(p, k)= (p - 1) * S(p - 1, k) + S(p - 1, k - 1).
- 边界条件:S(p, 0) = 0, p >= 1. S(p, p) = 1, p >= 0.
递推关系推导:考虑第p个物品
该物品自成一个非空循环排列,就是S(p - 1, k - 1)种。
该物品加到已形成的k个非空循环排列中,可以放在其他p - 1个物品的任意一个位置,就是( p - 1) * S(p - 1, k)种。
第二类斯特林数:表示n的集合的划分为k个集合方法的数目。
- 递推公式:S(n, k) = k * S(n - 1, k) + S(n - 1, k - 1)
- 边界条件:S(n, n) = 1, n >= 0. S(n, 0) = 0, n >= 1.
递推关系推导:考虑第n个物品
把当前第n个物品作为单独一个集合,为S(n - 1, k - 1)种方法。
把当前第n个物品放在其他的集合中来构成k个集合,S(n - 1, k)种方法。
贝尔数:包含n个元素的集合的划分方法的数目。
递推公式:
Stirling数S(n, k)是把基数为n的集划分为正好k个非空集的方法的数目。
def stirling(n, m):
if n == 1 or m == 1 or n == m:
return 1
elif n < m:
return 0
return stirling(n - 1, m - 1) + stirling(n - 1, m) * m
def bell(n):
s = 0
for i in range(1, n+1):
s += stirling(n, i)
return s
打击垃圾链接
# One of the problems with our page ranking system is pages can
# collude with each other to improve their page ranks. We consider
# A->B a reciprocal link if there is a link path from B to A of length
# equal to or below the collusion level, k. The length of a link path
# is the number of links which are taken to travel from one page to the
# other.
# If k = 0, then a link from A to A is a reciprocal link for node A,
# since no links needs to be taken to get from A to A.
# If k=1, B->A would count as a reciprocal link if there is a link
# A->B, which includes one link and so is of length 1. (it requires
# two parties, A and B, to collude to increase each others page rank).
# If k=2, B->A would count as a reciprocal link for node A if there is
# a path A->C->B, for some page C, (link path of length 2),
# or a direct link A-> B (link path of length 1).
# Modify the compute_ranks code to
# - take an extra input k, which is a non-negative integer, and
# - exclude reciprocal links of length up to and including k from
# helping the page rank.
def is_rec_link(graph, source, destination, k):
if k == 0:
if destination == source:
return True
return False
if source in graph[destination]:
return True
for node in graph[destination]:
if is_rec_link(graph, source, destination, k - 1):
return True
return False
def compute_ranks(graph, k):
d = 0.8 # damping factor
numloops = 10
ranks = {}
npages = len(graph)
for page in graph:
ranks[page] = 1.0 / npages
for i in range(0, numloops):
newranks = {}
for page in graph:
newrank = (1 - d) / npages
for node in graph:
if page in graph[node]:
if not is_rec_link(graph, node, page, k):
newrank = newrank + d * (ranks[node]/len(graph[node]))
newranks[page] = newrank
ranks = newranks
return ranks
# For example
g = {'a': ['a', 'b', 'c'], 'b':['a'], 'c':['d'], 'd':['a']}
print compute_ranks(g, 0) # the a->a link is reciprocal
#>>> {'a': 0.26676872354238684, 'c': 0.1216391112164609,
# 'b': 0.1216391112164609, 'd': 0.1476647842238683}
print compute_ranks(g, 1) # a->a, a->b, b->a links are reciprocal
#>>> {'a': 0.14761759762962962, 'c': 0.08936469270123457,
# 'b': 0.04999999999999999, 'd': 0.12202199703703702}
print compute_ranks(g, 2)
# a->a, a->b, b->a, a->c, c->d, d->a links are reciprocal
# (so all pages end up with the same rank)
#>>> {'a': 0.04999999999999999, 'c': 0.04999999999999999,
# 'b': 0.04999999999999999, 'd': 0.04999999999999999}
元胞自动机
# A one-dimensional cellular automata takes in a string, which in our
# case, consists of the characters '.' and 'x', and changes it according
# to some predetermined rules. The rules consider three characters, which
# are a character at position k and its two neighbours, and determine
# what the character at the corresponding position k will be in the new
# string.
# For example, if the character at position k in the string is '.' and
# its neighbours are '.' and 'x', then the pattern is '..x'. We look up
# '..x' in the table below. In the table, '..x' corresponds to 'x' which
# means that in the new string, 'x' will be at position k.
# Rules:
# pattern in position k in contribution to
# Value current string new string pattern number
# is 0 if replaced by '.'
# and value if replaced
# by 'x'
# 1 '...' '.' 1 * 0
# 2 '..x' 'x' 2 * 1
# 4 '.x.' 'x' 4 * 1
# 8 '.xx' 'x' 8 * 1
# 16 'x..' '.' 16 * 0
# 32 'x.x' '.' 32 * 0
# 64 'xx.' '.' 64 * 0
# 128 'xxx' 'x' 128 * 1
# ----------
# 142
# To calculate the patterns which will have the central character x, work
# out the values required to sum to the pattern number. For example,
# 32 = 32 so only pattern 32 which is x.x changes the central position to
# an x. All the others have a . in the next line.
# 23 = 16 + 4 + 2 + 1 which means that 'x..', '.x.', '..x' and '...' all
# lead to an 'x' in the next line and the rest have a '.'
# For pattern 142, and starting string
# ...........x...........
# the new strings created will be
# ..........xx........... (generations = 1)
# .........xx............ (generations = 2)
# ........xx............. (generations = 3)
# .......xx.............. (generations = 4)
# ......xx............... (generations = 5)
# .....xx................ (generations = 6)
# ....xx................. (generations = 7)
# ...xx.................. (generations = 8)
# ..xx................... (generations = 9)
# .xx.................... (generations = 10)
# Note that the first position of the string is next to the last position
# in the string.
# Define a procedure, cellular_automaton, that takes three inputs:
# a non-empty string,
# a pattern number which is an integer between 0 and 255 that
# represents a set of rules, and
# a positive integer, n, which is the number of generations.
# The procedure should return a string which is the result of
# applying the rules generated by the pattern to the string n times.
def cellular_automaton(s, pn, g):
pas = {}
plist = ['...','..x','.x.','.xx','x..','x.x','xx.','xxx']
n = len(s)
for i in range(7,-1,-1):
if pn / (2**i) == 1:
pas[ plist[i] ] = 'x'
pn -= 2**i
else:
pas[ plist[i] ] = '.'
for j in range(0, g):
ns = ''
for i in range(0, n):
pa = s[i-1] + s[i] + s[(i+1)%n]
ns += pas[pa]
s = ns
return ns
print cellular_automaton('.x.x.x.x.', 17, 2)
#>>> xxxxxxx..
print cellular_automaton('.x.x.x.x.', 249, 3)
#>>> .x..x.x.x
print cellular_automaton('...x....', 125, 1)
#>>> xx.xxxxx
print cellular_automaton('...x....', 125, 2)
#>>> .xxx....
print cellular_automaton('...x....', 125, 3)
#>>> .x.xxxxx
print cellular_automaton('...x....', 125, 4)
#>>> xxxx...x
print cellular_automaton('...x....', 125, 5)
#>>> ...xxx.x
print cellular_automaton('...x....', 125, 6)
#>>> xx.x.xxx
print cellular_automaton('...x....', 125, 7)
#>>> .xxxxx..
print cellular_automaton('...x....', 125, 8)
#>>> .x...xxx
print cellular_automaton('...x....', 125, 9)
#>>> xxxx.x.x
print cellular_automaton('...x....', 125, 10)
#>>> ...xxxxx
最终项目
将简单的字符串,类似“Alex likes Carla, Ryan, and Priya” 转换为一个社交网络。即根据给定的形如 <user> is connected to <user1>, ..., <userM>.<user> likes to play <game1>, ..., <gameN>的字符串,自己构建数据结构存储网络。网络分为<user1>, ..., <userM>的相邻结点和<game1>, ..., <gameN>的游戏结点。
思路:整个network是一个大字典,每个user的名字是键。其值包括一个大列表,大列表分为两个小列表,第一个列表存其相连的user的名字,第二个列表存其喜欢玩的游戏。即形如{'Freda': [['Olive', 'John', 'Debra'], ['Starfleet Commander', 'Ninja Hamsters', 'Seahorse Adventures']]}
构建网络
example_input="John is connected to Bryant, Debra, Walter.\
John likes to play The Movie: The Game, The Legend of Corgi, Dinosaur Diner.\
Bryant is connected to Olive, Ollie, Freda, Mercedes.\
Bryant likes to play City Comptroller: The Fiscal Dilemma, Super Mushroom Man.\
Mercedes is connected to Walter, Robin, Bryant.\
Mercedes likes to play The Legend of Corgi, Pirates in Java Island, Seahorse Adventures.\
Olive is connected to John, Ollie.\
Olive likes to play The Legend of Corgi, Starfleet Commander.\
Debra is connected to Walter, Levi, Jennie, Robin.\
Debra likes to play Seven Schemers, Pirates in Java Island, Dwarves and Swords.\
Walter is connected to John, Levi, Bryant.\
Walter likes to play Seahorse Adventures, Ninja Hamsters, Super Mushroom Man.\
Levi is connected to Ollie, John, Walter.\
Levi likes to play The Legend of Corgi, Seven Schemers, City Comptroller: The Fiscal Dilemma.\
Ollie is connected to Mercedes, Freda, Bryant.\
Ollie likes to play Call of Arms, Dwarves and Swords, The Movie: The Game.\
Jennie is connected to Levi, John, Freda, Robin.\
Jennie likes to play Super Mushroom Man, Dinosaur Diner, Call of Arms.\
Robin is connected to Ollie.\
Robin likes to play Call of Arms, Dwarves and Swords.\
Freda is connected to Olive, John, Debra.\
Freda likes to play Starfleet Commander, Ninja Hamsters, Seahorse Adventures."
def create_data_structure(string_input):
network = {}
if string_input == ' ':
return network
#print len(string_input)
h = 0
i = string_input.find(".")
while i != -1:
con = []
play = []
ele = []
#print "i = " + str(i)
#print string_input[:i]
j = string_input.find(" ", h)
usr = string_input[h:j]
k = string_input.find("to ", j)
name = ""
for c in string_input[k + 3:i]:
if c == ',':
con.append(name)
name = ""
else:
if c != ' ':
name += c
con.append(name)
for e in con:
if e[0] == ' ':
e = e[1:]
#print con
h = i + 1
i = string_input.find(".", h)
k = string_input.find("play ", j)
name = ""
for c in string_input[k + 5:i]:
if c == ',':
if name[0] ==' ':
name = name[1:]
play.append(name)
name = ""
else:
name += c
if name[0] ==' ':
name = name[1:]
play.append(name)
#print play
ele.append(con)
ele.append(play)
network[usr] = ele
h = i + 1
i = string_input.find(".", h)
return network
#测试
net = create_data_structure(example_input)
print net
获取user的链接结点
def get_connections(network, user):
if user not in network:
return None
return network[user][0]
#测试
print get_connections(net, 'Walter')
=>
['John', 'Levi', 'Bryant']
获取user喜欢玩的游戏
def get_games_liked(network, user):
if user not in network:
return None
return network[user][1]
#测试
print get_games_liked(net, 'Walter')
=>
['Seahorse Adventures', ' Ninja Hamsters', ' Super Mushroom Man']
增加两用户的连接
def add_connection(network, user_A, user_B):
if user_A not in network or user_B not in network:
return False
if user_B not in network[user_A][0]:
network[user_A][0].append(user_B)
#print network[user_A][0]
return network
#测试
add_connection(net, 'Robin', 'Levi')
print get_connections(net, 'Robin')
print get_connections(net, 'Levi')
=>
['Ollie', 'Levi']
['Ollie', 'John', 'Walter']
增加新用户
def add_new_user(network, user, games):
if user not in network:
network[user] = []
network[user].append([])
network[user].append(games)
return network
#测试
games = ['tennis', 'pingpang']
print add_new_user(net, 'chandeler', games)
=>
['tennis', 'pingpang']
获取user的二次连接点
def get_secondary_connections(network, user):
if user not in network:
return None
if network[user][0] == []:
return []
l = []
for e in network[user][0]:
l.extend(network[e][0])
l = list(set(l))
return l
#测试
print get_secondary_connections(net, 'Robin')
print get_connections(net, 'Ollie')
print get_connections(net, 'Levi')
=>
['Mercedes', 'Freda', 'Bryant', 'Ollie', 'John', 'Walter']
['Mercedes', 'Freda', 'Bryant']
['Ollie', 'John', 'Walter']
获取两user的共同连接点
def count_common_connections(network, user_A, user_B):
if user_A not in network or user_B not in network:
return False
cnt = 0
for e in network[user_A][0]:
if e in network[user_B][0]:
print e
cnt += 1
return cnt
#测试
print count_common_connections(net, 'Walter', 'Olive')
=>
John
1
查找user_A到user_B的连接路径
def find_path_to_friend(network, user_A, user_B, crawled = None):
# your RECURSIVE solution here!
if user_A not in network or user_B not in network:
return None
if crawled is None:
crawled = []
crawled += [user_A]
if user_B in network[user_A][0]:
return [user_A, user_B]
for e in network[user_A][0]:
if e not in crawled:
l = find_path_to_friend(network, e, user_B, crawled)
if l:
return [user_A] + l
return None
#测试
print find_path_to_friend(net, 'John', 'Ollie')
=>
['John', 'Bryant', 'Ollie']