{"id":170,"date":"2018-11-03T22:24:00","date_gmt":"2018-11-03T14:24:00","guid":{"rendered":"http:\/\/www.eait.co\/?p=170"},"modified":"2019-04-05T12:39:19","modified_gmt":"2019-04-05T04:39:19","slug":"python%e5%b0%8f%e8%af%b4%e7%bd%91%e7%ab%99%e6%89%b9%e9%87%8f%e4%b8%8b%e8%bd%bd%e8%84%9a%e6%9c%ac","status":"publish","type":"post","link":"https:\/\/notes.coremix.net\/?p=170","title":{"rendered":"python\u5c0f\u8bf4\u7f51\u7ad9\u6279\u91cf\u4e0b\u8f7d\u811a\u672c"},"content":{"rendered":"<pre>import urllib.request,re\r\n\r\n\r\ndef getNovelContent():\r\n    html = urllib.request.urlopen('http:\/\/www.quanshuwang.com\/book\/0\/742').read()\r\n    html = html.decode('gbk')\r\n    #print(html)\r\n    ###.*?\u662f\u5339\u914d\u6240\u6709\u7684\uff0c\u52a0\u62ec\u53f7\u662f\u6211\u4eec\u60f3\u8981\u7684\uff0c\u653e\u8fdb\u5217\u8868\u91cc\u9762###\r\n    reg = r'&lt;li&gt;&lt;a href=\"(.*?)\" title=\".*?\"&gt;(.*?)&lt;\/a&gt;&lt;\/li&gt;'\r\n\r\n    #\u589e\u52a0\u5339\u914d\u6548\u7387\r\n    reg = re.compile(reg)\r\n    urls = re.findall(reg,html)\r\n    for url in urls:\r\n        novel_url = url[0]\r\n        novel_title = url[1]\r\n        chapt = urllib.request.urlopen(novel_url).read()\r\n        chapt_html = chapt.decode('gbk')\r\n        reg = r'&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;(.*?)&lt;script type=\"text\/javascript\"&gt;'\r\n        reg = re.compile(reg,re.S)\r\n        chapt_content = re.findall(reg,chapt_html)\r\n\r\n        chapt_content = chapt_content[0].replace(\"&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;\",\"\")\r\n        #print(chapt_content)\r\n        chapt_content = chapt_content.replace(\"&lt;br \/&gt;\", \"\")\r\n\r\n\r\n        #print(chapt_content)\r\n        print('\u6b63\u5728\u4fdd\u5b58%s'%novel_title)\r\n        f = open('{}.txt'.format(novel_title),'w')\r\n        f.write(chapt_content)\r\n        f.close()\r\ngetNovelContent()<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>import urllib.request,re def getNovelContent(): html =  [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3,25,2],"tags":[],"class_list":["post-170","post","type-post","status-publish","format-standard","hentry","category-python","category-py","category-index"],"blocksy_meta":[],"_links":{"self":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/170","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=170"}],"version-history":[{"count":1,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/170\/revisions"}],"predecessor-version":[{"id":171,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/170\/revisions\/171"}],"wp:attachment":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=170"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=170"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=170"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}