import re import time import Queue import urllib2 import threading from bs4 import BeautifulSoup import sys import urllib from pybloom import BloomFilter import time
# use Bloom Filter bf = BloomFilter(1000000, 0.01)
# translate the default code reload(sys) sys.setdefaultencoding("utf-8")
# define a queue url_wait = Queue.Queue(0)
classMyThread(threading.Thread): def__init__(self, url, num): threading.Thread.__init__(self) self.url = url # self.tnum = num defrun(self): # traverse the whole url time.sleep(5) traverse(self.url) # print "This is thread-%d" % self.tnum