以下是从163源下载openstack-ocata版软件包的脚本:
from html.parser import HTMLParserfrom urllib import requestimport urllibimport os,sysimport socketclass myparser(HTMLParser): '''找到a标签并把属性的值放到列表里''' def __init__(self): HTMLParser.__init__(self) self.links = [] def handle_starttag(self, tag, attrs): if tag == 'a': if len(attrs) == 0: pass else: for (variable,value) in attrs:# print(value) if variable == 'href': self.links.append(value)def callbackfunc(blocknum, blocksize, totalsize): '''回调函数,打印下载进度 @blocknum: 已经下载的数据块 @blocksize: 数据块的大小 @totalsize: 远程文件的大小 ''' percent = int(100.0 * blocknum * blocksize / totalsize) if totalsize > 505528: pass else: percent = 100 sys.stdout.write('\r') sys.stdout.write(file_name + percent * '>' + str(percent) + '%') sys.stdout.flush()def create_dir(root_tree,catalog): '''根据url的目录结构在本地穿件文件夹''' os.chdir(root_tree) try: os.makedirs(catalog) except FileExistsError as e: passdef download_file(url,down_path): '''下载文件保存到相应的目录,并把下载失败的放在一个字典里''' global file_name global error_download file_name = url.split('/')[-1] error_download = {} socket.setdefaulttimeout(30) try: request.urlretrieve(url,down_path,callbackfunc) except socket.gaierror as e: error_download[url] = down_path print('socket.gaierror' , url) except urllib.error.URLError as e: error_download[url] = down_path print('urllib.error.URLError',url) sys.stdout.write('\n')def get_url_tree(url_tree): '''获取一个字典,链接:目录,并把文件夹创建及把文件下载''' url_tree_dict = {} level = 0 for url in url_tree: response = request.urlopen(url) page = response.read().decode('utf-8') hp = myparser() hp.feed(page) hp.close() try: hp.links.remove("../") except ValueError as e: pass for file in hp.links: if '/' in file: create_dir(url_tree[url], file) url_tree_dict[url+file] = url_tree[url]+file else: download_file(url+file,url_tree[url]+file) if file.find('/') > 0: level += 1 return url_tree_dict , levelurl_tree = { "http://mirrors.163.com/centos/7/cloud/x86_64/openstack-ocata/":'/centos/7/cloud/x86_64/openstack-ocata/'}try: os.makedirs('/centos/7/cloud/x86_64/openstack-ocata/')except FileExistsError as e: passwhile True: url_tree,level = get_url_tree(url_tree) if level == 0: break print(url_tree,level)for key in error_download: download_file(key,error_download[key])