Python实现的下载8000首儿歌的代码分享

下载8000首儿歌的python的代码:


#-*- coding: UTF-8 -*-

from pyquery import PyQuery as py from lxml import etree import urllib import re import os import sys import logging

def format(filename):     tuple=(' ',''','\'')     for char in tuple:         if (filename.find(char)!=-1):             filename=filename.replace(char,"_")     return filename

def download_mp3(mp3_url, filename,dir):           f = dir+"\\"+filename     if os.path.exists(f):       logger.debug(f+" is existed.")       return           try:         open(f, 'wb').write(urllib.urlopen(mp3_url).read())         logger.debug(  filename + ' is downloaded.')     except:         logger.debug( filename + ' is not downloaded.')

        def download_all_mp3(start,end,dir,logger):   for x in range(start,end):     try:         url = "http://www.youban.com/mp3-d" + str(x) + ".html"         logger.debug(str(x) + ": "+url)         doc = py(url=url)         e = doc('.mp3downloadbox')         if e is None or e == '':           logger.debug(url+" is not existed.")           return                   e = unicode(e)         #logger.debug( e)         regex = re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",re.UNICODE|re.S)         m = regex.search(e)         if m is not None:           title = m.group(1).strip()           title2 = str(x)+"_"+title + ".mp3"           #title2 = re.sub(' ','_',title2)           title2 = format(title2)           link = m.group(2)           #logger.debug( "title:" + title + " link:" + link)           if link == '' or title == '':             logger.debug(url + " is not useful")             continue           logger.debug(str(x)+": "+link)           download_mp3(link,title2,dir)     except:         logger.debug(url+" met exception.")         continue      

      if __name__ == "__main__":     dir_root = "e:\\song"     if sys.argv[3] != '': dir_root=sys.argv[3]         start,end = 1,8000     if sys.argv[1] >= 0 and sys.argv[2]>=0:       start,end = int(sys.argv[1]),int(sys.argv[2])       print ("Download from %s to %s.\n" % (start,end))              dir = dir_root + "\\"+str(start)+"-"+str(end)     if not os.path.exists(dir):       os.mkdir(dir)         print "Download to " + dir + ".\n"         logger = logging.getLogger("simple")     logger.setLevel(logging.DEBUG)      fh = logging.FileHandler(dir+"\\"+"download.log")     ch = logging.StreamHandler()     formatter = logging.Formatter("%(message)s")     ch.setFormatter(formatter)     fh.setFormatter(formatter)     logger.addHandler(ch)     logger.addHandler(fh)     download_all_mp3(start,end,dir,logger)

有需要的可以参考继续修改。