{"id":1350,"date":"2022-04-19T23:33:39","date_gmt":"2022-04-19T15:33:39","guid":{"rendered":"http:\/\/www.eait.co\/?p=1350"},"modified":"2022-05-05T09:36:18","modified_gmt":"2022-05-05T01:36:18","slug":"spark-wordcount-%e5%88%9d%e6%ad%a5","status":"publish","type":"post","link":"https:\/\/notes.coremix.net\/?p=1350","title":{"rendered":"spark wordcount \u521d\u6b65"},"content":{"rendered":"<pre class=\"brush: scala; title: ; notranslate\" title=\"\">\r\npackage wordcount\r\n\r\nimport org.apache.spark.{SparkConf, SparkContext}\r\n\r\nobject wordCountScala extends App{\r\nval conf = new SparkConf().setAppName(&quot;Wordcount&quot;).setMaster(&quot;local&quot;);\r\n val sc = new SparkContext(conf)\r\n\r\n val line = sc.textFile(&quot;F:\\\\\u5927\u6570\u636e\\\\wordcount.txt&quot;)\r\n\r\n val result = line.flatMap(_.split(&quot; &quot;)).map((_,1)).reduceByKey(_+_)\r\n\r\n result.foreach(println)\r\n}\r\n\r\n<\/pre>\n<p>java\u7248\u672c<\/p>\n<pre class=\"brush: java; title: ; notranslate\" title=\"\">\r\npackage wordcount;\r\n\r\nimport org.apache.spark.SparkConf;\r\nimport org.apache.spark.api.java.JavaPairRDD;\r\nimport org.apache.spark.api.java.JavaRDD;\r\nimport org.apache.spark.api.java.JavaSparkContext;\r\nimport org.apache.spark.api.java.function.FlatMapFunction;\r\nimport org.apache.spark.api.java.function.Function2;\r\nimport org.apache.spark.api.java.function.PairFunction;\r\nimport org.apache.spark.api.java.function.VoidFunction;\r\nimport scala.Tuple2;\r\n\r\nimport java.util.Arrays;\r\nimport java.util.Iterator;\r\n\r\npublic class wordCountJava {\r\n    public static void main(String&#x5B;] args) {\r\n        SparkConf conf = new SparkConf().setAppName(&quot;javaWordCount&quot;).setMaster(&quot;local&quot;);\r\n        JavaSparkContext jsc = new JavaSparkContext(conf);\r\n\r\n        JavaRDD&lt;String&gt; javardd = jsc.textFile(&quot;F:\\\\\u5927\u6570\u636e\\\\wordcount.txt&quot;);\r\n\r\n        JavaRDD&lt;String&gt; word = javardd.flatMap(new FlatMapFunction&lt;String, String&gt;() {\r\n            @Override\r\n\r\n            public Iterator&lt;String&gt; call(String s) throws Exception {\r\n                String&#x5B;] s1 = s.split(&quot; &quot;);\r\n                return Arrays.asList(s1).iterator();\r\n            }\r\n        });\r\n\/\/ \u6570\u636e\u62fc\u63a5\r\n        JavaPairRDD&lt;String, Integer&gt; javaPairRDD = word.mapToPair(new PairFunction&lt;String, String, Integer&gt;() {\r\n            @Override\r\n            public Tuple2&lt;String, Integer&gt; call(String s) throws Exception {\r\n\/\/                Tuple2&lt;String, Integer&gt; dd = new Tuple2&lt;&gt;(s, 1);\r\n\/\/                return dd;\r\n                return new Tuple2&lt;&gt;(s,1);\r\n            }\r\n        });\r\n\r\n        \/\/ reduce \u5408\u5e76\r\n        JavaPairRDD&lt;String, Integer&gt; result = javaPairRDD.reduceByKey(new Function2&lt;Integer, Integer, Integer&gt;() {\r\n            @Override\r\n            public Integer call(Integer integer, Integer integer2) throws Exception {\r\n                return integer + integer2;\r\n            }\r\n        });\r\n\r\n        \/\/\u6253\u5370\r\n        result.foreach(new VoidFunction&lt;Tuple2&lt;String, Integer&gt;&gt;() {\r\n            @Override\r\n            public void call(Tuple2&lt;String, Integer&gt; stringIntegerTuple2) throws Exception {\r\n                System.out.println(stringIntegerTuple2);\r\n            }\r\n        });\r\n\r\n\r\n    }\r\n\r\n<\/pre>\n<p>java \u7b80\u6d01\u7248\u672c<\/p>\n<pre class=\"brush: java; title: ; notranslate\" title=\"\">\r\npackage wordcount;\r\n\r\nimport org.apache.spark.SparkConf;\r\nimport org.apache.spark.api.java.JavaPairRDD;\r\nimport org.apache.spark.api.java.JavaRDD;\r\nimport org.apache.spark.api.java.JavaSparkContext;\r\nimport org.apache.spark.api.java.function.FlatMapFunction;\r\nimport org.apache.spark.api.java.function.Function2;\r\nimport org.apache.spark.api.java.function.PairFunction;\r\nimport scala.Tuple2;\r\n\r\nimport java.util.Arrays;\r\nimport java.util.Iterator;\r\n\r\npublic class javaWordcountSimple {\r\n    public static void main(String&#x5B;] args) {\r\n        SparkConf conf = new SparkConf().setAppName(&quot;javaWordCount&quot;).setMaster(&quot;local&quot;);\r\n        JavaSparkContext jsc = new JavaSparkContext(conf);\r\n\r\n        JavaRDD&lt;String&gt; javardd = jsc.textFile(&quot;F:\\\\\u5927\u6570\u636e\\\\wordcount.txt&quot;);\r\n\/\/\u6570\u636e\u5207\u5206\r\n        JavaRDD&lt;String&gt; word = javardd.flatMap((FlatMapFunction&lt;String, String&gt;) s -&gt; {\r\n            String&#x5B;] s1 = s.split(&quot; &quot;);\r\n            return Arrays.asList(s1).iterator();\r\n        });\r\n\/\/ \u6570\u636e\u62fc\u63a5\r\n        JavaPairRDD&lt;String, Integer&gt; javaPairRDD = word.mapToPair((PairFunction&lt;String, String, Integer&gt;) m -&gt; new Tuple2&lt;&gt;(m, 1));\r\n\r\n        \/\/ reduce \u5408\u5e76\r\n        JavaPairRDD&lt;String, Integer&gt; result = javaPairRDD.reduceByKey((Function2&lt;Integer, Integer, Integer&gt;) Integer::sum);\r\n\r\n        result.foreach(s-&gt;{\r\n            System.out.println(s);\r\n        });\r\n    }\r\n}\r\n\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>package wordcount import org.apache.spark.{SparkConf, S [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-1350","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"blocksy_meta":[],"_links":{"self":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1350","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1350"}],"version-history":[{"count":3,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1350\/revisions"}],"predecessor-version":[{"id":1366,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/1350\/revisions\/1366"}],"wp:attachment":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1350"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1350"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1350"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}