{"id":1382,"date":"2022-05-21T20:13:26","date_gmt":"2022-05-21T12:13:26","guid":{"rendered":"http:\/\/www.eait.co\/?p=1382"},"modified":"2022-05-30T10:36:33","modified_gmt":"2022-05-30T02:36:33","slug":"nlp%e5%88%9d%e6%ad%a5-hannlp","status":"publish","type":"post","link":"https:\/\/notes.coremix.net\/?p=1382","title":{"rendered":"NLP\u521d\u6b65\u2014\u2014HanNLP"},"content":{"rendered":"<p>pip install JPype1-0.7.0-cp37-cp37m-win_amd64<br \/>\npip install pyhanlp<br \/>\n\u7136\u540e\u547d\u4ee4\u884cimport pyhanlp\u65f6\uff0c\u4f1a\u6709\u4e00\u4e2a\u4e0b\u8f7d\u7684\u8fc7\u7a0b<br \/>\n\u90e8\u5206\u53ef\u80fd\u8981\u624b\u52a8\u4e0b\u8f7d<\/p>\n<p>&nbsp;<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\n#-*- coding:utf-8 -*-\r\nimport pyhanlp\r\nimport jpype\r\njvm_path = jpype.getDefaultJVMPath()\r\nhanlp_classpath = r&quot;D:\\Users\\xabcd\\Anaconda3\\Lib\\site-packages\\pyhanlp\\static\\hanlp-1.8.3.jar&quot;\r\njvm_arg = '-Djava.class.path='+hanlp_classpath\r\n#\u5224\u65adjvm\u8fdb\u7a0b\u662f\u5426\u5f00\u542f\r\n# if not jpype.startJVM(jvm_path,jvm_arg):   # \u5982\u679c\u6ca1\u5f00\u542f\u8fdb\u7a0b  (\u53ef\u80fd\u8def\u5f84\u8f93\u9519\u4e86\uff0c\u62a5\u8fdb\u7a0b\u5df2\u5f00\u542f\u7684\u9519\u8bef)\r\n#     jpype.startJVM(jvm_path,jvm_arg)  # \u5219\u8fdb\u884c\u5f00\u542f\r\nHanLP = jpype.JClass('com.hankcs.hanlp.HanLP')\r\n\r\ntext = &quot;\u563f\u563f\u563f\uff0c\u8fd9\u662f\u7b2c\u4e00\u4e2a\u5173\u4e8eHanLP\u7684\u6d4b\u8bd5\uff01&quot;\r\nprint('1-standard segment',HanLP.segment(text))\r\n# &#x5B;\u563f\u563f\u563f\/o, \uff0c\/w, \u8fd9\/rzv, \u662f\/vshi, \u7b2c\/mq, \u4e00\u4e2a\/mq, \u5173\u4e8e\/p, HanLP\/nx, \u7684\/ude1, \u6d4b\u8bd5\/vn, \uff01\/w]\r\nHanLPTokensizer = jpype.JClass('com.hankcs.hanlp.tokenizer.NLPTokenizer')\r\nprint('2-NLP segment',HanLPTokensizer.segment(text))\r\n# 2-NLP segment &#x5B;\u563f\u563f\u563f\/o, \uff0c\/w, \u8fd9\/r, \u662f\/v, \u7b2c\u4e00\u4e2a\/m, \u5173\u4e8e\/p, HanLP\/nx, \u7684\/u, \u6d4b\u8bd5\/vn, \uff01\/w]\r\n\r\n# \u81ea\u5b9a\u4e49\u8bcd\u5178\r\nCustomDictionary = jpype.JClass('com.hankcs.hanlp.dictionary.CustomDictionary')\r\ntext2=&quot;\u7075\u5c71\u5b66\u9662\u653b\u57ce\u72ee\u9006\u88ad\u5355\u8eab\u72d7\uff0c\u8d70\u5411\u8ba4\u8bc1\u5dc5\u5cf0&quot;  #\u9519\u522b\u5b57\u770b\u770b\r\nprint('3-NLP segment',HanLPTokensizer.segment(text2))\r\n# 2-NLP segment &#x5B;\u7075\u5c71\u5b66\u9662\u653b\u57ce\u72ee\/nt, \u9006\u88ad\/v, \u5355\u8eab\/n, \u72d7\/n, \uff0c\/w, \u8d70\u5411\/v, \u8ba4\u8bc1\/v, \u5dc5\u5cf0\/n]\r\nCustomDictionary.add('\u653b\u57ce\u72ee')\r\nCustomDictionary.add('\u7075\u5c71')\r\nCustomDictionary.add('\u5b66\u9662')\r\nprint('4-NLP segment',HanLPTokensizer.segment(text2))\r\nparagraphs = &quot;SnowNLP\u662f\u4e00\u4e2apython\u5199\u7684\u7c7b\u5e93\uff0c\u53ef\u4ee5\u65b9\u4fbf\u7684\u5904\u7406\u4e2d\u6587\u6587\u672c\u5185\u5bb9\uff0c\u662f\u53d7\u5230\u4e86TextBlob\u7684\u542f\u53d1\u800c\u5199\u7684\uff0c\u7531\u4e8e\u73b0\u5728\u5927\u90e8\u5206\u7684\u81ea\u7136\u8bed\u8a00\u5904\u7406\u5e93\u57fa\u672c\u90fd\u662f\u9488\u5bf9\u82f1\u6587\u7684\uff0c\u4e8e\u662f\u5199\u4e86\u4e00\u4e2a\u65b9\u4fbf\u5904\u7406\u4e2d\u6587\u7684\u7c7b\u5e93\uff0c\u5e76\u4e14\u548cTextBlob\u4e0d\u540c\u7684\u662f\uff0c\u8fd9\u91cc\u6ca1\u6709\u7528NLTK\uff0c\u6240\u6709\u7684\u7b97\u6cd5\u90fd\u662f\u81ea\u5df1\u5b9e\u73b0\u7684\uff0c\u5e76\u4e14\u81ea\u5e26\u4e86\u4e00\u4e9b\u8bad\u7ec3\u597d\u7684\u5b57\u5178\u3002\u6ce8\u610f\u672c\u7a0b\u5e8f\u90fd\u662f\u5904\u7406\u7684unicode\u7f16\u7801\uff0c\u6240\u4ee5\u4f7f\u7528\u65f6\u8bf7\u81ea\u884cdecode\u6210unicode\u3002&quot;\r\nprint('abatract key words',HanLP.extractKeyword(paragraphs,5))\r\nprint('summary',HanLP.extractSummary(paragraphs,2))  # \u7ed3\u679c\u548csnownlp\u4e00\u6837\r\n\r\n\r\n\r\n\r\n\r\n<\/pre>\n<p>\u5176\u5b83\uff1a<\/p>\n<p><span class=\"md-line md-end-block md-focus\" contenteditable=\"true\"><span class=\"md-expand\"> java\\c++ \u53ef\u4ee5\u8c03\u7528hanlp<\/span><\/span><span class=\"md-line md-end-block\" contenteditable=\"true\"><span class=\"\"> \uff081\uff09pyhanlp<\/span><\/span><span class=\"md-line md-end-block\" contenteditable=\"true\"><span class=\"\"> \uff082\uff09\u5b89\u88c5\u6309\u7167\uff1a<\/span><\/span><\/p>\n<p>&nbsp;<\/p>\n<p><span class=\"md-line md-end-block\" contenteditable=\"true\"><span class=\"\">step1\uff1a\u4e0b\u8f7d jpyre1\u3002whl\uff0c\u7136\u540e\u5728Terminal\u6216\u8005 \u76f4\u63a5\u5728cmd\u91cc\u9762 <\/span><\/span><span class=\"md-line md-end-block\" contenteditable=\"true\"><span class=\"\"> pip install JPype1-0.7.0-cp37-cp37m-win_amd64.whl<\/span><\/span><\/p>\n<p>&nbsp;<\/p>\n<p><span class=\"md-line md-end-block\" contenteditable=\"true\"><span class=\"\">step2\uff1apip install pyhanlp<\/span><\/span><\/p>\n<p>&nbsp;<\/p>\n<p><span class=\"md-line md-end-block\" contenteditable=\"true\"><span class=\"\">step3\uff1a\u5728python consol\u91cc\u9762\uff0c\u7b2c\u4e00\u6b21\u8c03\u7528\uff1a<\/span><\/span><span class=\"md-line md-end-block\" contenteditable=\"true\"> import pyhanlp\uff0c\u81ea\u52a8\u4e0b\u8f7djar\u5305\u3001data\u6587\u4ef6\uff081\u4e2aG\uff09\u7b49\u3002<\/span><\/p>\n<p><span class=\"md-line md-end-block\" contenteditable=\"true\">\uff083\uff09Hanlp\u7684\u529f\u80fd\u6f14\u793a<\/span><span class=\"md-line md-end-block\" contenteditable=\"true\"> <span spellcheck=\"false\"><a href=\"http:\/\/hanlp.com\">http:\/\/hanlp.com<\/a><\/span>\/<\/span><span class=\"md-line md-end-block\" contenteditable=\"true\"> <span class=\"\" spellcheck=\"false\"><a href=\"https:\/\/github.com\/hankcs\/HanLP\">https:\/\/github.com\/hankcs\/HanLP<\/a><\/span><\/span><\/p>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>pip install JPype1-0.7.0-cp37-cp37m-win_amd64 pip insta [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3],"tags":[32,20],"class_list":["post-1382","post","type-post","status-publish","format-standard","hentry","category-python","tag-nlp","tag-python"],"blocksy_meta":[],"_links":{"self":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1382","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1382"}],"version-history":[{"count":3,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1382\/revisions"}],"predecessor-version":[{"id":1412,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1382\/revisions\/1412"}],"wp:attachment":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1382"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1382"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1382"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}