{"id":1374,"date":"2022-05-21T11:03:13","date_gmt":"2022-05-21T03:03:13","guid":{"rendered":"http:\/\/www.eait.co\/?p=1374"},"modified":"2022-05-21T11:03:13","modified_gmt":"2022-05-21T03:03:13","slug":"nlp%e5%88%9d%e6%ad%a5-nltk","status":"publish","type":"post","link":"https:\/\/notes.coremix.net\/?p=1374","title":{"rendered":"NLP\u521d\u6b65\u2014\u2014NLTK"},"content":{"rendered":"<p>\u5b89\u88c5\u76f8\u5173\u5e93\uff1a<\/p>\n<p>conda\u00a0 install NLTK<\/p>\n<p>then\u00a0 import nltk\u00a0 \u00a0 \u00a0 # \u8c03\u7528<\/p>\n<p>\u9996\u6b21\u5728python\u4e2d\u6267\u884cnltk.download\uff08\uff09<\/p>\n<p>\u53ef\u4ee5\u8bbe\u7f6e\u76f8\u5173\u7684\u4e0b\u8f7d\u5730\u5740<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" width=\"831\" height=\"654\" class=\"alignnone wp-image-1375 size-full\" src=\"http:\/\/www.eait.co\/wp-content\/uploads\/2022\/05\/Pasted.png\" srcset=\"https:\/\/notes.coremix.net\/wp-content\/uploads\/2022\/05\/Pasted.png 831w, https:\/\/notes.coremix.net\/wp-content\/uploads\/2022\/05\/Pasted-300x236.png 300w, https:\/\/notes.coremix.net\/wp-content\/uploads\/2022\/05\/Pasted-768x604.png 768w\" sizes=\"auto, (max-width: 831px) 100vw, 831px\" \/><\/p>\n<p><strong>\u65e0\u6cd5\u652f\u6301\u5411\u91cf\u8f6c\u6362\uff0c\u53ef\u4ee5\u901a\u8fc7jieba\u8fdb\u884c\u5206\u8bcd\u548c\u5411\u91cf\u8f6c\u6362\uff0c\u6700\u540e\u518d\u7528nltk\u5904\u7406<\/strong><\/p>\n<p>&nbsp;<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\n#-*- coding:utf-8 -*-\r\n\r\nimport nltk\r\ntext = 'Join thousands of learners from around the world who are improving their English listening skills with our online courses. Join thousands of learners from around the world who are improving their English listening skills with our online courses.'   # \u5fc5\u987b\u540e\u9762\u53e5\u53f7\u540e\u9762\u6709\u7a7a\u683c\u624d\u80fd\u5206\u53e5\r\nsens = nltk.sent_tokenize(text,language='english')\r\nprint(sens)\r\nwords = &#x5B;]\r\nfor sent in sens:\r\n    words.append(nltk.word_tokenize(sent))\r\nprint(words)\r\n\r\n# \u8bcd\u6027\u6807\u6ce8\r\ntags = &#x5B;]\r\nfor token in words:\r\n    tags.append(nltk.pos_tag(token))\r\nprint(tags)\r\n\r\ntextzh = '\u672c\u4eba\u559c\u6b22\u6298\u817e\uff0c\u5012\u817e\u5927\u6570\u636e\u548cAI\u4eba\u5de5\u667a\u80fd\u6ef4\u4e00\u4e9b\u6280\u672f\u3002'\r\nsens_zh = nltk.sent_tokenize(textzh)  # \u76ee\u6d4b\u65e0\u6cd5\u5904\u7406\u4e2d\u6587\uff0c\u4e14\u53e5\u53f7\u540e\u8981\u52a0\u7a7a\u683c\r\nprint(sens_zh)\r\n\r\n<\/pre>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5b89\u88c5\u76f8\u5173\u5e93\uff1a conda\u00a0 install NLTK then\u00a0 import nltk\u00a0 \u00a0 \u00a0 # \u8c03\u7528  [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3],"tags":[32,20],"class_list":["post-1374","post","type-post","status-publish","format-standard","hentry","category-python","tag-nlp","tag-python"],"blocksy_meta":[],"_links":{"self":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1374","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1374"}],"version-history":[{"count":1,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1374\/revisions"}],"predecessor-version":[{"id":1376,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1374\/revisions\/1376"}],"wp:attachment":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1374"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1374"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1374"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}