import requests

from bs4 import BeautifulSoup

#保存url列表为文件with open('a.txt', 'w') as f:

def url_list():

   for page in range(1,6):

      urls = 'http://www.zdfans.com/zd423/page/'+str(page)

      res = requests.get(urls)

      content = res.text

      soup = BeautifulSoup(content,"lxml")

      list1=soup.find('ul',attrs={'class':'excerpt'})

      for a_li in list1.find_all('li'):

         a_href = a_li.find_all('a')

         url = a_href[1]['href']

         print(url)

         content = contentparse(url)

   return content

# 解析文章页面  获取 content、time、title

def contentparse(url):

        res = requests.get(url)

        content = res.text

        soup = BeautifulSoup(content, 'lxml')

        title = soup.find('h1', attrs={'class': 'meta-tit'}).find('a').getText()

        time = soup.find('p', attrs={'class': 'meta-info'}).contents[0][1:11]

        context = soup.find('div',attrs={'class':'entry'}).getText()

        print(title)

        print(time)

        print(context)

        return content

        return content

if __name__ == '__main__':

    url_list()