leetcode-六度分割

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

# _*_ coding: utf-8 _*_
import requests
from bs4 import BeautifulSoup
import re
import random
import datetime

page = set()
random.seed(datetime.datetime.now())
def getlink(articleurl):
html = requests.get("https://en.wikipedia.org"+articleurl)
soup = BeautifulSoup(html.text, "html.parser")
return soup.find("div", {'id':'bodyContent'}).findAll("a", {"href":re.compile('^(/wiki/)')})

links = getlink("/wiki/Christopher_Nolan")
i = 0
while len(links) > 0:
newArticle = links[random.randint(0, len(links)-1)]["href"]
i += 1
print "第 %d 个页面:" %i
print newArticle
if i == 6:
break
links = getlink(newArticle)

通过维基百科 只需要最多六次跳转