下载8000首儿歌的python的代码:
#-*- coding: UTF-8 -*-from pyquery import PyQuery as py from lxml import etree import urllib import re import os import sys import logging
def format(filename): tuple=(' ',''','\'') for char in tuple: if (filename.find(char)!=-1): filename=filename.replace(char,"_") return filename
def download_mp3(mp3_url, filename,dir): f = dir+"\\"+filename if os.path.exists(f): logger.debug(f+" is existed.") return try: open(f, 'wb').write(urllib.urlopen(mp3_url).read()) logger.debug( filename + ' is downloaded.') except: logger.debug( filename + ' is not downloaded.')
def download_all_mp3(start,end,dir,logger): for x in range(start,end): try: url = "http://www.youban.com/mp3-d" + str(x) + ".html" logger.debug(str(x) + ": "+url) doc = py(url=url) e = doc('.mp3downloadbox') if e is None or e == '': logger.debug(url+" is not existed.") return e = unicode(e) #logger.debug( e) regex = re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",re.UNICODE|re.S) m = regex.search(e) if m is not None: title = m.group(1).strip() title2 = str(x)+"_"+title + ".mp3" #title2 = re.sub(' ','_',title2) title2 = format(title2) link = m.group(2) #logger.debug( "title:" + title + " link:" + link) if link == '' or title == '': logger.debug(url + " is not useful") continue logger.debug(str(x)+": "+link) download_mp3(link,title2,dir) except: logger.debug(url+" met exception.") continue
if __name__ == "__main__": dir_root = "e:\\song" if sys.argv[3] != '': dir_root=sys.argv[3] start,end = 1,8000 if sys.argv[1] >= 0 and sys.argv[2]>=0: start,end = int(sys.argv[1]),int(sys.argv[2]) print ("Download from %s to %s.\n" % (start,end)) dir = dir_root + "\\"+str(start)+"-"+str(end) if not os.path.exists(dir): os.mkdir(dir) print "Download to " + dir + ".\n" logger = logging.getLogger("simple") logger.setLevel(logging.DEBUG) fh = logging.FileHandler(dir+"\\"+"download.log") ch = logging.StreamHandler() formatter = logging.Formatter("%(message)s") ch.setFormatter(formatter) fh.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) download_all_mp3(start,end,dir,logger)
有需要的可以参考继续修改。