{"id":844,"date":"2020-11-05T19:20:57","date_gmt":"2020-11-05T11:20:57","guid":{"rendered":"https:\/\/www.specialwu.com\/?p=844"},"modified":"2021-10-02T11:01:31","modified_gmt":"2021-10-02T03:01:31","slug":"%e5%a4%a7%e6%95%b0%e6%8d%ae%e8%ae%a1%e7%ae%97%e4%b9%8b-spark","status":"publish","type":"post","link":"http:\/\/www.specialwu.com\/?p=844","title":{"rendered":"\u5927\u6570\u636e\u8ba1\u7b97\u4e4b&#8211;SparkCore"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_61 ez-toc-wrap-center counter-hierarchy ez-toc-counter ez-toc-custom ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title \" >\u76ee\u5f55<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #000000;color:#000000\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #000000;color:#000000\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Scala%E7%9A%84%E4%B8%80%E4%BA%9B%E7%9F%A5%E8%AF%86\" title=\"Scala\u7684\u4e00\u4e9b\u77e5\u8bc6\">Scala\u7684\u4e00\u4e9b\u77e5\u8bc6<\/a><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"http:\/\/www.specialwu.com\/?p=844\/#1%E3%80%81Scala%E5%92%8CJava%E7%9A%84%E5%8C%BA%E5%88%AB\" title=\"1\u3001Scala\u548cJava\u7684\u533a\u522b\">1\u3001Scala\u548cJava\u7684\u533a\u522b<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Scala%E8%AF%AD%E8%A8%80%E7%9A%84%E7%89%B9%E7%82%B9%EF%BC%9F%E4%BB%80%E4%B9%88%E6%98%AF%E5%87%BD%E6%95%B0%E5%BC%8F%E7%BC%96%E7%A8%8B%EF%BC%9F\" title=\"Scala\u8bed\u8a00\u7684\u7279\u70b9\uff1f\u4ec0\u4e48\u662f\u51fd\u6570\u5f0f\u7f16\u7a0b\uff1f\">Scala\u8bed\u8a00\u7684\u7279\u70b9\uff1f\u4ec0\u4e48\u662f\u51fd\u6570\u5f0f\u7f16\u7a0b\uff1f<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Scala%E4%B8%AD%E7%9A%84%E9%97%AD%E5%8C%85\" title=\"Scala\u4e2d\u7684\u95ed\u5305\">Scala\u4e2d\u7684\u95ed\u5305<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Scala%E4%B8%AD%E7%9A%84%E6%9F%AF%E9%87%8C%E5%8C%96\" title=\"Scala\u4e2d\u7684\u67ef\u91cc\u5316\">Scala\u4e2d\u7684\u67ef\u91cc\u5316<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Scala%E4%B8%AD%E7%9A%84%E6%A8%A1%E5%BC%8F%E5%8C%B9%E9%85%8D\" title=\"Scala\u4e2d\u7684\u6a21\u5f0f\u5339\u914d\">Scala\u4e2d\u7684\u6a21\u5f0f\u5339\u914d<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"http:\/\/www.specialwu.com\/?p=844\/#trait%E7%89%B9%E8%B4%A8_%E5%92%8Cabstract_class%E6%8A%BD%E8%B1%A1%E7%B1%BB%E7%9A%84%E5%8C%BA%E5%88%AB\" title=\"trait(\u7279\u8d28) \u548cabstract class(\u62bd\u8c61\u7c7b)\u7684\u533a\u522b\">trait(\u7279\u8d28) \u548cabstract class(\u62bd\u8c61\u7c7b)\u7684\u533a\u522b<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"http:\/\/www.specialwu.com\/?p=844\/#2%E3%80%81scala%E7%9A%84%E4%B8%80%E4%BA%9B%E5%87%BD%E6%95%B0\" title=\"2\u3001scala\u7684\u4e00\u4e9b\u51fd\u6570\">2\u3001scala\u7684\u4e00\u4e9b\u51fd\u6570<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"http:\/\/www.specialwu.com\/?p=844\/#spark211\" title=\"spark2.1.1\">spark2.1.1<\/a><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"http:\/\/www.specialwu.com\/?p=844\/#RDD%E7%9A%84%E4%BA%94%E5%A4%A7%E7%89%B9%E6%80%A7\" title=\"RDD\u7684\u4e94\u5927\u7279\u6027\">RDD\u7684\u4e94\u5927\u7279\u6027<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"http:\/\/www.specialwu.com\/?p=844\/#RDD%E5%BC%B9%E6%80%A7\" title=\"RDD\u5f39\u6027\">RDD\u5f39\u6027<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"http:\/\/www.specialwu.com\/?p=844\/#RDD_%E6%8C%81%E4%B9%85%E5%8C%96\" title=\"RDD \u6301\u4e45\u5316\">RDD \u6301\u4e45\u5316<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-13\" href=\"http:\/\/www.specialwu.com\/?p=844\/#RDD%E7%9A%84%E4%BE%9D%E8%B5%96%E5%85%B3%E7%B3%BB\" title=\"RDD\u7684\u4f9d\u8d56\u5173\u7cfb\">RDD\u7684\u4f9d\u8d56\u5173\u7cfb<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-14\" href=\"http:\/\/www.specialwu.com\/?p=844\/#DAG%E7%9A%84%E7%94%9F%E6%88%90\" title=\"DAG\u7684\u751f\u6210\">DAG\u7684\u751f\u6210<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-15\" href=\"http:\/\/www.specialwu.com\/?p=844\/#%E8%B0%83%E4%BC%98\" title=\"\u8c03\u4f18\">\u8c03\u4f18<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-16\" href=\"http:\/\/www.specialwu.com\/?p=844\/#%E9%94%AE%E5%80%BC%E5%AF%B9RDD%E7%9A%84%E6%95%B0%E6%8D%AE%E5%88%86%E5%8C%BA\" title=\"\u952e\u503c\u5bf9RDD\u7684\u6570\u636e\u5206\u533a\">\u952e\u503c\u5bf9RDD\u7684\u6570\u636e\u5206\u533a<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-17\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Standalone%E6%A8%A1%E5%BC%8F%E9%85%8D%E7%BD%AE\" title=\"Standalone\u6a21\u5f0f\u914d\u7f6e\">Standalone\u6a21\u5f0f\u914d\u7f6e<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-18\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Job_history_%E9%85%8D%E7%BD%AE\" title=\"Job history \u914d\u7f6e\">Job history \u914d\u7f6e<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-19\" href=\"http:\/\/www.specialwu.com\/?p=844\/#spark%E5%BA%94%E7%94%A8%E6%89%A7%E8%A1%8C%E8%BF%87%E7%A8%8B\" title=\"spark\u5e94\u7528\u6267\u884c\u8fc7\u7a0b\">spark\u5e94\u7528\u6267\u884c\u8fc7\u7a0b<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-20\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Spark_on_Yarn%E6%A8%A1%E5%BC%8F%E4%B8%8B%E5%86%85%E5%AD%98%E5%88%86%E9%85%8D\" title=\"Spark on Yarn\u6a21\u5f0f\u4e0b\u5185\u5b58\u5206\u914d\">Spark on Yarn\u6a21\u5f0f\u4e0b\u5185\u5b58\u5206\u914d<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-21\" href=\"http:\/\/www.specialwu.com\/?p=844\/#%E5%B1%95%E7%A4%BA%E6%AF%8F%E5%A4%A9%E8%AE%BF%E9%97%AE%E7%9A%84IP%E6%95%B0\" title=\"\u5c55\u793a\u6bcf\u5929\u8bbf\u95ee\u7684IP\u6570\">\u5c55\u793a\u6bcf\u5929\u8bbf\u95ee\u7684IP\u6570<\/a><ul class='ez-toc-list-level-5' ><li class='ez-toc-heading-level-5'><a class=\"ez-toc-link ez-toc-heading-22\" href=\"http:\/\/www.specialwu.com\/?p=844\/#spark%E5%90%84%E7%A7%8D%E8%BF%90%E8%A1%8C%E6%A8%A1%E5%BC%8F\" title=\"spark\u5404\u79cd\u8fd0\u884c\u6a21\u5f0f\">spark\u5404\u79cd\u8fd0\u884c\u6a21\u5f0f<\/a><\/li><\/ul><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-23\" href=\"http:\/\/www.specialwu.com\/?p=844\/#spark%E4%B8%8EMapreduce%E7%9A%84%E6%AF%94%E8%BE%83\" title=\"spark\u4e0eMapreduce\u7684\u6bd4\u8f83\">spark\u4e0eMapreduce\u7684\u6bd4\u8f83<\/a><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-24\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Mapreduce_shuffle%E5%92%8CSpark_shuffle%E7%9A%84%E5%8C%BA%E5%88%AB\" title=\"Mapreduce shuffle\u548cSpark shuffle\u7684\u533a\u522b\">Mapreduce shuffle\u548cSpark shuffle\u7684\u533a\u522b<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-25\" href=\"http:\/\/www.specialwu.com\/?p=844\/#Spark_shuffle%E8%B0%83%E4%BC%98\" title=\"Spark shuffle\u8c03\u4f18\">Spark shuffle\u8c03\u4f18<\/a><\/li><\/ul><\/li><\/ul><\/li><\/ul><\/nav><\/div>\n<h2><span class=\"ez-toc-section\" id=\"Scala%E7%9A%84%E4%B8%80%E4%BA%9B%E7%9F%A5%E8%AF%86\"><\/span>Scala\u7684\u4e00\u4e9b\u77e5\u8bc6<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<h4><span class=\"ez-toc-section\" id=\"1%E3%80%81Scala%E5%92%8CJava%E7%9A%84%E5%8C%BA%E5%88%AB\"><\/span>1\u3001Scala\u548cJava\u7684\u533a\u522b<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<table>\n<thead>\n<tr>\n<th>\u5bf9\u6bd4\u7684\u5185\u5bb9<\/th>\n<th>Scala<\/th>\n<th>java<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td>\u53d8\u91cf\u58f0\u660e<\/td>\n<td>\u53ea\u9700\u8981\u58f0\u660e\u662fval\u6216\u662fvar\uff0c\u5177\u4f53\u7684\u7c7b\u578b\u7531\u7f16\u8bd1\u5668\u81ea\u884c\u63a8\u65ad<\/td>\n<td>\u9700\u8981\u5728\u53d8\u91cf\u524d\u9762\u5148\u6ce8\u660e\u53d8\u91cf\u7684\u7c7b\u578b<\/td>\n<\/tr>\n<tr>\n<td>\u8fd4\u56de\u503c<\/td>\n<td>\u7533\u660e\u8fd4\u56de\u503c\u662f\u5728\u540e\u9762\uff0c\u5e76\u4e0d\u9700\u8981return\u8bed\u53e5\uff0c\u4e5f\u53ef\u4ee5\u7528<\/td>\n<td>\u5982\u679c\u6709\u8fd4\u56de\u503c\uff0c\u9700\u8981return\u8bed\u53e5<\/td>\n<\/tr>\n<tr>\n<td>\u7ed3\u675f\u7b26<\/td>\n<td>\u4e0d\u9700\u8981\u4f7f\u7528\u5206\u53f7\u4f5c\u4e3a\u7ed3\u675f\u7b26<\/td>\n<td>\u6bcf\u4e2a\u8bed\u53e5\u7ed3\u675f\u9700\u8981\u4f5c\u4e3a\u7ed3\u675f\u7b26<\/td>\n<\/tr>\n<tr>\n<td>\u5faa\u73af<\/td>\n<td>\u5faa\u73af\u8bed\u53e5\u53ef\u4ee5\u7528\u4e8e\u5b88\u536b<\/td>\n<td>\u4e0d\u80fd<\/td>\n<\/tr>\n<tr>\n<td>\u901a\u914d\u7b26<\/td>\n<td>_<\/td>\n<td>*<\/td>\n<\/tr>\n<tr>\n<td>\u6784\u9020\u5668<\/td>\n<td>\u6784\u9020\u5668\u540d\u79f0\u4e3athis\uff0cScala\u7684\u8f85\u52a9\u6784\u9020\u5668\u4e4b\u524d\u9700\u8981\u6709\u4e00\u4e2a\u4e3b\u6784\u9020\u5668\u6216\u8005\u5176\u4ed6\u8f85\u52a9\u6784\u9020\u5668\uff0c\u5e76\u4e14Scala\u7684\u6784\u9020\u5668\u53ef\u4ee5\u653e\u5728\u7c7b\u7684\u540e\u9762<\/td>\n<td>\u6784\u9020\u5668\u540d\u79f0\u9700\u8981\u4e0e\u7c7b\u540d\u79f0\u4e00\u6837<\/td>\n<\/tr>\n<tr>\n<td>\u5185\u90e8\u7c7b<\/td>\n<td>scala\u5b9e\u4f8b\u5316\u7684\u5185\u90e8\u7c7b\u662f\u4e0d\u540c\u7684\uff0c\u53ef\u4ee5\u4f7f\u7528\u7c7b\u578b\u6295\u5f71<\/td>\n<td>\u5185\u90e8\u7c7b\u4ece\u5c5e\u4e8e\u5916\u90e8\u7c7b<\/td>\n<\/tr>\n<tr>\n<td>\u63a5\u53e3<\/td>\n<td>Scala\u4e2d\u63a5\u53e3\u79f0\u4e3a\u7279\u8d28\uff08trait\uff09\uff0c\u7279\u8d28\u4e2d\u662f\u53ef\u4ee5\u5199\u62bd\u8c61\u65b9\u6cd5\uff0c\u4e5f\u53ef\u4ee5\u5199\u5177\u4f53\u7684\u65b9\u6cd5\u4f53\u72b6\u6001\uff0c\u4e14\u7c7b\u53ef\u4ee5\u5b9e\u73b0\u591a\u4e2a\u7279\u8d28\u7684\u3002\u7279\u8d28\u53ef\u4ee5\u7ee7\u627f\u666e\u901a\u7684\u7c7b<\/td>\n<td>\u63a5\u53e3\u4e2d\u7684\u65b9\u6cd5\u53ea\u80fd\u662f\u62bd\u8c61\u65b9\u6cd5\uff0c\u4e0d\u53ef\u4ee5\u5199\u5177\u4f53\u7684\u65b9\u6cd5\u4f53\u7684\u65b9\u6cd5\uff0c\u63a5\u53e3\u4e0d\u80fd\u7ee7\u627f\u666e\u901a\u7684\u7c7b<\/td>\n<\/tr>\n<tr>\n<td>\u8d4b\u503c<\/td>\n<td>Scala\u4e2d\u7684\u8d4b\u503c\u8bed\u53e5\u8fd4\u56de\u7ed3\u679c\u662funit\u7684\u4e0d\u53ef\u4ee5\u4e32\u8054\uff0c\u4f8b\u5982x=y=1\uff0c\u8fd9\u6837\u662f\u6709\u95ee\u9898\u7684\uff0cx\u5e76\u6ca1\u6709\u88ab\u8d4b\u503c\u4e3a1<\/td>\n<td>x=y=1\uff0c\u8fd9\u6837\u662f\u6ca1\u95ee\u9898\u7684<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h4><span class=\"ez-toc-section\" id=\"Scala%E8%AF%AD%E8%A8%80%E7%9A%84%E7%89%B9%E7%82%B9%EF%BC%9F%E4%BB%80%E4%B9%88%E6%98%AF%E5%87%BD%E6%95%B0%E5%BC%8F%E7%BC%96%E7%A8%8B%EF%BC%9F\"><\/span>Scala\u8bed\u8a00\u7684\u7279\u70b9\uff1f\u4ec0\u4e48\u662f\u51fd\u6570\u5f0f\u7f16\u7a0b\uff1f<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>Scala\u8bed\u8a00\u96c6\u6210\u9762\u5411\u5bf9\u8c61\u548c\u51fd\u6570\u5f0f\u7f16\u7a0b\u3002<\/li>\n<li>\u51fd\u6570\u5f0f\u7f16\u7a0b\u662f\u4e00\u79cd\u5178\u8303\uff0c\u5c06\u7535\u8111\u7684\u8fd0\u7b97\u89c6\u4e3a\u662f\u51fd\u6570\u5f0f\u7684\u8fd0\u7b97\u3002<\/li>\n<li>\u4e0e\u8fc7\u7a0b\u5316\u7f16\u7a0b\u76f8\u6bd4\uff0c\u51fd\u6570\u5f0f\u7f16\u7a0b\u91cc\u7684\u51fd\u6570\u8ba1\u7b97\u53ef\u4ee5\u968f\u65f6\u8c03\u7528\u3002<\/li>\n<li>\u51fd\u6570\u5f0f\u7f16\u7a0b\u4e2d\u51fd\u6570\u662f\u4e00\u7b49\u529f\u540d\u3002<\/li>\n<\/ul>\n<h4><span class=\"ez-toc-section\" id=\"Scala%E4%B8%AD%E7%9A%84%E9%97%AD%E5%8C%85\"><\/span>Scala\u4e2d\u7684\u95ed\u5305<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>\u4f60\u53ef\u4ee5\u5728\u4efb\u4f55\u4f5c\u7528\u57df\u5b9a\u4e49\u51fd\u6570:\u5305\uff0c\u7c7b\u751a\u81f3\u662f\u53e6\u4e00\u4e2a\u51fd\u6570\u6216\u65b9\u6cd5\uff0c\u5728\u51fd\u6570\u4f53\u5185\uff0c\u53ef\u4ee5\u8bbf\u95ee\u5230\u76f8\u5e94\u4f5c\u7528\u57df\u5185\u7684\u4efb\u4f55\u53d8\u91cf\u3002\uff08\u91cd\u70b9\uff09\u51fd\u6570\u53ef\u4ee5\u4e0d\u518d\u5904\u4e8e\u4f5c\u7528\u57df\u5185\u65f6\u88ab\u8c03\u7528<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">def main(args: Array[String]): Unit = {\n    def sss():Int={\n      def s1():Int={\n        println(\"s1\u8fd9\u4e2a\u65b9\u6cd5\")\n        1\n      }\n      println(\"sss\u8fd9\u4e2a\u65b9\u6cd5\")\n      s1()\n      2\n    }\n    sss()\n  }\n#\u63a7\u5236\u53f0\u7ed3\u679c\nsss\u8fd9\u4e2a\u65b9\u6cd5\ns1\u8fd9\u4e2a\u65b9\u6cd5\n<\/code><\/pre>\n<h4><span class=\"ez-toc-section\" id=\"Scala%E4%B8%AD%E7%9A%84%E6%9F%AF%E9%87%8C%E5%8C%96\"><\/span>Scala\u4e2d\u7684\u67ef\u91cc\u5316<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>\u5b9a\u4e49\uff1a\u5c06\u539f\u6765\u63a5\u53d7\u4e24\u4e2a\u53c2\u6570\u7684\u51fd\u6570\u53d8\u6210\u65b0\u7684\u63a5\u53d7\u4e00\u4e2a\u53c2\u6570\u7684\u51fd\u6570\u7684\u8fc7\u7a0b\u3002\u65b0\u7684\u51fd\u6570\u8fd4\u56de\u4e00\u4e2a\u4ee5\u539f\u6709\u7684\u7b2c\u4e8c\u4e2a\u53c2\u6570\u4f5c\u4e3a\u53c2\u6570\u7684\u51fd\u6570\u3002<\/li>\n<\/ul>\n<h4><span class=\"ez-toc-section\" id=\"Scala%E4%B8%AD%E7%9A%84%E6%A8%A1%E5%BC%8F%E5%8C%B9%E9%85%8D\"><\/span>Scala\u4e2d\u7684\u6a21\u5f0f\u5339\u914d<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<pre><code class=\"language-scala line-numbers\">    var sign = 77\n    var ch: Char = '+'\n    ch match {\n      case '+' =&gt; sign = 1\n      case '-' =&gt; sign = -1\n      case _ =&gt; sign = 0\n    }\n    print(sign)\n<\/code><\/pre>\n<h4><span class=\"ez-toc-section\" id=\"trait%E7%89%B9%E8%B4%A8_%E5%92%8Cabstract_class%E6%8A%BD%E8%B1%A1%E7%B1%BB%E7%9A%84%E5%8C%BA%E5%88%AB\"><\/span>trait(\u7279\u8d28) \u548cabstract class(\u62bd\u8c61\u7c7b)\u7684\u533a\u522b<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>\u4e00\u4e2a\u7c7b\u53ea\u80fd\u96c6\u6210\u4e00\u4e2a\u62bd\u8c61\u7c7b\uff0c\u4f46\u53ef\u4ee5\u901a\u8fc7width\u5173\u952e\u5b57\u7ee7\u627f\u591a\u4e2a\u7279\u8d28<\/li>\n<li>\u62bd\u8c61\u7c7b\u6709\u53c2\u6570\u7684\u6784\u9020\u51fd\u6570\uff0c\u7279\u8d28\u4e0d\u884c\uff08trait t(i:Int){},\u9519\u8bef\u5199\u6cd5\uff09<\/li>\n<\/ul>\n<h4><span class=\"ez-toc-section\" id=\"2%E3%80%81scala%E7%9A%84%E4%B8%80%E4%BA%9B%E5%87%BD%E6%95%B0\"><\/span>2\u3001scala\u7684\u4e00\u4e9b\u51fd\u6570<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>\u533f\u540d\u51fd\u6570<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\"> (a:Int)=&gt;{}\n\n      def fun1=(a:Int) =&gt; {\n      }\n      def funq(s:String*): Unit ={\n        s.foreach(print(_))\n      }\n<\/code><\/pre>\n<ul>\n<li>\u504f\u5e94\u7528\u51fd\u6570\uff08\u65b9\u6cd5\u4e2d\u67d0\u4e00\u51fd\u6570\u53c2\u6570\u7279\u522b\u591a\uff0c\u53c8\u9700\u8981\u9891\u7e41\u8c03\u7528\uff0c\u53c8\u53ea\u60f3\u6539\u53d8\u4e00\u90e8\u5206\u53c2\u6570\u7684\u503c\u53ef\u7528\u8fd9\u79cd\u51fd\u6570\uff09<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">def showlog(logdate:Date,log:String): Unit ={\n        println(s\"logtime is <span class=\"katex math inline\">logdate,content is<\/span>log\")\n      }\n    val date =new Date()\n    showlog(date,\"a\")\n    showlog(date,\"b\")\n    showlog(date,\"c\")\ndef fun = showlog(date,_:String)\n    fun(\"ss\")\n\/\/\u8f93\u51fa\nlogtime is Fri Nov 06 12:37:41 CST 2020,content is a\nlogtime is Fri Nov 06 12:37:41 CST 2020,content is b\nlogtime is Fri Nov 06 12:37:41 CST 2020,content is c\nlogtime is Fri Nov 06 12:37:41 CST 2020,content is ss\n<\/code><\/pre>\n<ul>\n<li>\u9ad8\u9636\u51fd\u6570<\/li>\n<\/ul>\n<ol>\n<li>\u65b9\u6cd5\u7684\u53c2\u6570\u662f\u51fd\u6570<\/li>\n<\/ol>\n<pre><code class=\"language-scala line-numbers\">    def sum(a:Int,b:Int): Int ={\n      a+b\n    }\n    def sum1(f:(Int,Int)=&gt;Int,s:String): String ={\n      val i:Int=f(100,200)\n      i+\"&amp;&amp;&amp;\"+s\n    }\n    val result=sum1((a:Int,b:Int)=&gt;{a+b},\"scala\")\n    \/\/\u8fd9\u4e24\u4e2a\u7ed3\u679c\u662f\u4e00\u6837\u7684\n    val result1=sum1(sum,\"scala\")\n<\/code><\/pre>\n<ol start=\"2\">\n<li>\u65b9\u6cd5\u7684\u8fd4\u56de\u503c\u662f\u51fd\u6570\uff08\u8981\u663e\u5f0f\u7684\u5199\u51fa\u65b9\u6cd5\u7684\u8fd4\u56de\u503c\u7c7b\u578b\uff09<\/li>\n<\/ol>\n<pre><code class=\"language-scala line-numbers\">                        \/\/\u6b64\u5904\u662f\u8fd4\u56de\u503c\u7c7b\u578b,\u53ea\u6709\u8be5sum\u51fd\u6570\u7684\u8fd4\u56de\u503c\u7b26\u5408\u8be5\u7c7b\u578b\uff0c\u8c03\u7528\u8be5\u51fd\u6570\u624d\u4f1a\u663e\u793a\n    def sum3(a:Int,b:Int):(String,String)=&gt;String={\n        \/\/\u51fd\u6570\u503c\u53ef\u4ee5\u5d4c\u5957\u51fd\u6570\uff0c\u8be5\u51fd\u6570\u7c7b\u578b\u7b26\u5408sum3\u51fd\u6570\u8fd4\u56de\u7c7b\u578b\n      def fun3(c:String,d:String):String={\n        c+\"#\"+d+\"\"+c+\"***\"+a\n      }\n      \/\/\u6b64\u5904\u662f\u4e00\u4e2a\u8fd4\u56de\u503c\n      fun3\n    }\n    print(sum3(1,2)(\"c1\",\"d1\"))\n<\/code><\/pre>\n<ul>\n<li>\u67ef\u91cc\u5316\u51fd\u6570<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">    def keli(a:Int,b:Int)(c:Int,d:Int)={\n      a+b+c+d\n    }\n    print(keli(1,2)(3,4))\n<\/code><\/pre>\n<hr \/>\n<h3><span class=\"ez-toc-section\" id=\"spark211\"><\/span>spark2.1.1<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<ul>\n<li>\u662f\u4ec0\u4e48<\/li>\n<\/ul>\n<ol>\n<li>Apache Spark\u2122\u662f\u4e00\u4e2a\u7528\u4e8e\u5927\u89c4\u6a21\u6570\u636e\u5904\u7406\u7684\u7edf\u4e00\u5206\u6790\u5f15\u64ce<\/li>\n<li>Spark\u56f4\u7ed5\u7740\u5f39\u6027\u5206\u5e03\u5f0f\u6570\u636e\u96c6\uff08RDD\uff09\u7684\u6982\u5ff5\uff0cRDD\u662f\u53ef\u5e76\u884c\u64cd\u4f5c\u7684\u5143\u7d20\u7684\u5bb9\u9519\u96c6\u5408;<\/li>\n<li>RDD\uff08Resilient Distributed Dataset\uff09\u53eb\u505a\u5f39\u6027\u5206\u5e03\u5f0f\u6570\u636e\u96c6\uff0c\u662fSpark\u4e2d\u6700\u57fa\u672c\u7684\u6570\u636e\u62bd\u8c61\uff0c\u5b83\u4ee3\u8868\u4e00\u4e2a\u4e0d\u53ef\u53d8\u3001\u53ef\u5206\u533a\u3001\u91cc\u9762\u7684\u5143\u7d20\u53ef\u5e76\u884c\u8ba1\u7b97\u7684\u96c6\u5408\uff0c\u6bcf\u4e2a RDD \u90fd\u88ab\u5206\u4e3a\u591a\u4e2a\u5206\u533a\uff0c\u8fd9\u4e9b\u5206\u533a\u8fd0\u884c\u5728\u96c6\u7fa4\u4e2d\u7684\u4e0d\u540c\u8282\u70b9\u4e0a\u3002RDD \u53ef\u4ee5\u5305\u542b Python\u3001Java\u3001Scala \u4e2d\u4efb\u610f\u7c7b\u578b\u7684\u5bf9\u8c61\uff0c \u751a\u81f3\u53ef\u4ee5\u5305\u542b\u7528\u6237\u81ea\u5b9a\u4e49\u7684\u5bf9\u8c61.<\/li>\n<li>RDD\u7684\u64cd\u4f5c\u7b97\u5b50\u5305\u62ec\u4e24\u7c7b\uff0c\u4e00\u7c7b\u53eb\u505atransformations\uff0c\u5b83\u662f\u7528\u6765\u5c06RDD\u8fdb\u884c\u8f6c\u5316\uff0c\u6784\u5efaRDD\u7684\u8840\u7f18\u5173\u7cfb\uff1b\u53e6\u4e00\u7c7b\u53eb\u505aactions\uff0c\u5b83\u662f\u7528\u6765\u89e6\u53d1RDD\u7684\u8ba1\u7b97\uff0c\u5f97\u5230RDD\u7684\u76f8\u5173\u8ba1\u7b97\u7ed3\u679c\u6216\u8005\u5c06RDD\u4fdd\u5b58\u7684\u6587\u4ef6\u7cfb\u7edf\u4e2d.<\/li>\n<li>\u5728Spark\u4e2d\u521b\u5efaRDD\u7684\u521b\u5efa\u65b9\u5f0f\u5927\u6982\u53ef\u4ee5\u5206\u4e3a\u4e09\u79cd\uff1a\uff081\uff09\u3001\u4ece\u96c6\u5408\u4e2d\u521b\u5efaRDD\uff1b\uff082\uff09\u3001\u4ece\u5916\u90e8\u5b58\u50a8\u521b\u5efaRDD\uff1b\uff083\uff09\u3001\u4ece\u5176\u4ed6RDD\u521b\u5efa\u3002<\/li>\n<\/ol>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"RDD%E7%9A%84%E4%BA%94%E5%A4%A7%E7%89%B9%E6%80%A7\"><\/span>RDD\u7684\u4e94\u5927\u7279\u6027<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ol>\n<li><code>A list of partitions<\/code><\/li>\n<\/ol>\n<p><code>RDD\u7531\u5f88\u591apartition\u6784\u6210<\/code>\uff0c\u5728spark\u4e2d\uff0c\u8ba1\u7b97\u5f0f\uff0c\u6709\u591a\u5c11partition\u5c31\u5bf9\u5e94\u6709\u591a\u5c11\u4e2atask\u6765\u6267\u884c,\u5bf9\u4e8eRDD\u6765\u8bf4\uff0c\u6bcf\u4e2a\u5206\u7247\u90fd\u4f1a\u88ab\u4e00\u4e2a\u8ba1\u7b97\u4efb\u52a1\u5904\u7406\uff0c\u5e76\u51b3\u5b9a\u5e76\u884c\u8ba1\u7b97\u7684\u7c92\u5ea6\u3002\u7528\u6237\u53ef\u4ee5\u5728\u521b\u5efaRDD\u65f6\u6307\u5b9aRDD\u7684\u5206\u7247\u4e2a\u6570\uff0c\u5982\u679c\u6ca1\u6709\u6307\u5b9a\uff0c\u90a3\u4e48\u5c31\u4f1a\u91c7\u7528\u9ed8\u8ba4\u503c\u3002\u9ed8\u8ba4\u503c\u5c31\u662f\u7a0b\u5e8f\u6240\u5206\u914d\u5230\u7684CPU Core\u7684\u6570\u76ee\u3002<\/p>\n<ol start=\"2\">\n<li><code>A function for computing each split<\/code><\/li>\n<\/ol>\n<p><code>\u5bf9RDD\u505a\u8ba1\u7b97\uff0c\u76f8\u5f53\u4e8e\u5bf9RDD\u7684\u6bcf\u4e2asplit\u6216partition\u505a\u8ba1\u7b97<\/code>\u30022)Spark\u4e2dRDD\u7684\u8ba1\u7b97\u662f\u4ee5\u5206\u7247\u4e3a\u5355\u4f4d\u7684\uff0c\u6bcf\u4e2aRDD\u90fd\u4f1a\u5b9e\u73b0compute\u51fd\u6570\u4ee5\u8fbe\u5230\u8fd9\u4e2a\u76ee\u7684\u3002compute\u51fd\u6570\u4f1a\u5bf9\u8fed\u4ee3\u5668\u8fdb\u884c\u590d\u5408\uff0c\u4e0d\u9700\u8981\u4fdd\u5b58\u6bcf\u6b21\u8ba1\u7b97\u7684\u7ed3\u679c\u3002<\/p>\n<ol start=\"3\">\n<li><code>A list of dependencies on other RDDs<\/code><\/li>\n<\/ol>\n<p><code>RDD\u4e4b\u95f4\u6709\u4f9d\u8d56\u5173\u7cfb\uff0c\u53ef\u6eaf\u6e90<\/code>\u3002RDD\u7684\u6bcf\u6b21\u8f6c\u6362\u90fd\u4f1a\u751f\u6210\u4e00\u4e2a\u65b0\u7684RDD\uff0c\u6240\u4ee5RDD\u4e4b\u95f4\u5c31\u4f1a\u5f62\u6210\u7c7b\u4f3c\u4e8e\u6d41\u6c34\u7ebf\u4e00\u6837\u7684\u524d\u540e\u4f9d\u8d56\u5173\u7cfb\u3002\u5728\u90e8\u5206\u5206\u533a\u6570\u636e\u4e22\u5931\u65f6\uff0cSpark\u53ef\u4ee5\u901a\u8fc7\u8fd9\u4e2a\u4f9d\u8d56\u5173\u7cfb\u91cd\u65b0\u8ba1\u7b97\u4e22\u5931\u7684\u5206\u533a\u6570\u636e\uff0c\u800c\u4e0d\u662f\u5bf9RDD\u7684\u6240\u6709\u5206\u533a\u8fdb\u884c\u91cd\u65b0\u8ba1\u7b97\u3002<\/p>\n<p>4.<code>Optionally, a Partitioner for key-value RDDs (e.g. to say that the RDD is hash-partitioned)<\/code><\/p>\n<p>\u5982\u679cRDD\u91cc\u9762\u5b58\u7684\u6570\u636e\u662fkey-value\u5f62\u5f0f\uff0c\u5219\u53ef\u4ee5\u4f20\u9012\u4e00\u4e2a\u81ea\u5b9a\u4e49\u7684Partitioner\u8fdb\u884c\u91cd\u65b0\u5206\u533a\uff0c\u6bd4\u5982<code>\u53ef\u4ee5\u6309key\u7684hash\u503c\u5206\u533a<\/code>\u3002\u4e00\u4e2aPartitioner\uff0c\u5373RDD\u7684\u5206\u7247\u51fd\u6570\u3002\u5f53\u524dSpark\u4e2d\u5b9e\u73b0\u4e86\u4e24\u79cd\u7c7b\u578b\u7684\u5206\u7247\u51fd\u6570\uff0c\u4e00\u4e2a\u662f\u57fa\u4e8e\u54c8\u5e0c\u7684HashPartitioner\uff0c\u53e6\u5916\u4e00\u4e2a\u662f\u57fa\u4e8e\u8303\u56f4\u7684RangePartitioner\u3002\u53ea\u6709\u5bf9\u4e8e\u4e8ekey-value\u7684RDD\uff0c\u624d\u4f1a\u6709Partitioner\uff0c\u975ekey-value\u7684RDD\u7684Parititioner\u7684\u503c\u662fNone\u3002Partitioner\u51fd\u6570\u4e0d\u4f46\u51b3\u5b9a\u4e86RDD\u672c\u8eab\u7684\u5206\u7247\u6570\u91cf\uff0c\u4e5f\u51b3\u5b9a\u4e86parent RDD Shuffle\u8f93\u51fa\u65f6\u7684\u5206\u7247\u6570\u91cf\u3002<\/p>\n<ol start=\"5\">\n<li><code>Optionally, a list of preferred locations to compute each split on (e.g. block locations for an HDFS file)<\/code><\/li>\n<\/ol>\n<p>\u6700\u4f18\u7684\u4f4d\u7f6e\u53bb\u8ba1\u7b97\uff0c\u4e5f\u5c31\u662f<code>\u6570\u636e\u7684\u672c\u5730\u6027<\/code><br \/>\n\u8ba1\u7b97\u6bcf\u4e2asplit\u65f6\uff0c\u5728split\u6240\u5728\u673a\u5668\u7684\u672c\u5730\u4e0a\u8fd0\u884ctask\u662f\u6700\u597d\u7684\uff0c\u907f\u514d\u4e86\u6570\u636e\u7684\u79fb\u52a8\uff1bsplit\u6709\u591a\u4e2a\u526f\u672c\uff0c\u6240\u4ee5preferred location\u4e0d\u6b62\u4e00\u4e2a<br \/>\n\u6570\u636e\u5728\u54ea\u91cc\uff0c\u5e94\u4f18\u5148\u628a\u4f5c\u4e1a\u8c03\u5ea6\u5230\u6570\u636e\u6240\u5728\u673a\u5668\u4e0a\uff0c\u51cf\u5c11\u6570\u636e\u7684IO\u548c\u7f51\u7edc\u4f20\u8f93\uff0c\u8fd9\u6837\u624d\u80fd\u66f4\u597d\u5730\u51cf\u5c11\u4f5c\u4e1a\u8fd0\u884c\u65f6\u95f4\uff08\u6728\u6876\u539f\u7406\uff1a\u4f5c\u4e1a\u8fd0\u884c\u65f6\u95f4\u53d6\u51b3\u4e8e\u8fd0\u884c\u6700\u6162\u7684task\u6240\u9700\u7684\u65f6\u95f4\uff09\uff0c\u63d0\u9ad8\u6027\u80fd\u3002\u5bf9\u4e8e\u4e00\u4e2aHDFS\u6587\u4ef6\u6765\u8bf4\uff0c\u8fd9\u4e2a\u5217\u8868\u4fdd\u5b58\u7684\u5c31\u662f\u6bcf\u4e2aPartition\u6240\u5728\u7684\u5757\u7684\u4f4d\u7f6e\u3002\u6309\u7167\u201c\u79fb\u52a8\u6570\u636e\u4e0d\u5982\u79fb\u52a8\u8ba1\u7b97\u201d\u7684\u7406\u5ff5\uff0cSpark\u5728\u8fdb\u884c\u4efb\u52a1\u8c03\u5ea6\u7684\u65f6\u5019\uff0c\u4f1a\u5c3d\u53ef\u80fd\u5730\u5c06\u8ba1\u7b97\u4efb\u52a1\u5206\u914d\u5230\u5176\u6240\u8981\u5904\u7406\u6570\u636e\u5757\u7684\u5b58\u50a8\u4f4d\u7f6e\u3002<\/p>\n<hr \/>\n<ul>\n<li>RDD\u4e24\u79cd\u521b\u5efa\u65b9\u5f0f<\/li>\n<\/ul>\n<ol>\n<li>\u5e76\u884c\u5316 \u9a71\u52a8\u7a0b\u5e8f\u4e2d\u7684\u73b0\u6709\u96c6\u5408<\/li>\n<\/ol>\n<pre><code class=\"language-scala line-numbers\">val data = Array(1, 2, 3, 4, 5)\nval distData = sc.parallelize(data)\n<\/code><\/pre>\n<ol start=\"2\">\n<li>\u6216\u5f15\u7528\u5916\u90e8\u5b58\u50a8\u7cfb\u7edf\uff08\u4f8b\u5982\u5171\u4eab\u6587\u4ef6\u7cfb\u7edf\uff0cHDFS\uff0cHBase\u6216\u63d0\u4f9bHadoop InputFormat\u7684\u4efb\u4f55\u6570\u636e\u6e90\uff09\u4e2d\u7684\u6570\u636e\u96c6\u3002<\/li>\n<\/ol>\n<pre><code class=\"language-scala line-numbers\">val distFile = sc.textFile(\"data.txt\")\n<\/code><\/pre>\n<p><a class=\"wp-editor-md-post-content-link\" href=\"http:\/\/spark.apache.org\/docs\/latest\/rdd-programming-guide.html\" title=\"\u70b9\u51fb\u67e5\u770bspark RDD\u5b98\u65b9\u624b\u518c\">\u70b9\u51fb\u67e5\u770bspark RDD\u5b98\u65b9\u624b\u518c<\/a><\/p>\n<blockquote><p>\n  Spark\u652f\u6301Java\u3001Python\u548cScala\u7684API\u3002\u5927\u81f4\u628aSpark\u7684\u7528\u4f8b\u5206\u4e3a\u4e24\u7c7b\uff1a\u6570\u636e\u79d1\u5b66\u5e94\u7528\u548c\u6570\u636e\u5904\u7406\u5e94\u7528,\u662f\u4e00\u79cd\u5feb\u901f\u3001\u901a\u7528\u3001\u53ef\u6269\u5c55\u7684\u5927\u6570\u636e\u5206\u6790\u5f15\u64ce,\u57fa\u4e8e\u5185\u5b58\u8ba1\u7b97\u7684\u5927\u6570\u636e\u5e76\u884c\u8ba1\u7b97\u6846\u67b6,Spark \u9002\u7528\u4e8e\u5404\u79cd\u5404\u6837\u539f\u5148\u9700\u8981\u591a\u79cd\u4e0d\u540c\u7684\u5206\u5e03\u5f0f\u5e73\u53f0\u7684\u573a\u666f\uff0c\u5305\u62ec\u6279\u5904\u7406\u3001\u8fed\u4ee3\u7b97\u6cd5\u3001\u4ea4\u4e92\u5f0f\u67e5\u8be2\u3001\u6d41\u5904\u7406\u3002\n<\/p><\/blockquote>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"RDD%E5%BC%B9%E6%80%A7\"><\/span>RDD\u5f39\u6027<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ol>\n<li>\u81ea\u52a8\u8fdb\u884c\u5185\u5b58\u548c\u78c1\u76d8\u6570\u636e\u5b58\u50a8\u7684\u5207\u6362<\/li>\n<\/ol>\n<p>Spark\u4f18\u5148\u628a\u6570\u636e\u653e\u5230\u5185\u5b58\u4e2d\uff0c\u5982\u679c\u5185\u5b58\u653e\u4e0d\u4e0b\uff0c\u5c31\u4f1a\u653e\u5230\u78c1\u76d8\u91cc\u9762\uff0c\u7a0b\u5e8f\u8fdb\u884c\u81ea\u52a8\u7684\u5b58\u50a8\u5207\u6362<br \/>\n2. \u57fa\u4e8e\u8840\u7edf\u7684\u9ad8\u6548\u5bb9\u9519\u673a\u5236<\/p>\n<p>\u5728RDD\u8fdb\u884c\u8f6c\u6362\u548c\u52a8\u4f5c\u7684\u65f6\u5019\uff0c\u4f1a\u5f62\u6210RDD\u7684Lineage\u4f9d\u8d56\u94fe\uff0c\u5f53\u67d0\u4e00\u4e2aRDD\u5931\u6548\u7684\u65f6\u5019\uff0c\u53ef\u4ee5\u901a\u8fc7\u91cd\u65b0\u8ba1\u7b97\u4e0a\u6e38\u7684RDD\u6765\u91cd\u65b0\u751f\u6210\u4e22\u5931\u7684RDD\u6570\u636e\u3002<\/p>\n<ol start=\"3\">\n<li>Task\u5982\u679c\u5931\u8d25\u4f1a\u81ea\u52a8\u8fdb\u884c\u7279\u5b9a\u6b21\u6570\u7684\u91cd\u8bd5<\/li>\n<\/ol>\n<p>RDD\u7684\u8ba1\u7b97\u4efb\u52a1\u5982\u679c\u8fd0\u884c\u5931\u8d25\uff0c\u4f1a\u81ea\u52a8\u8fdb\u884c\u4efb\u52a1\u7684\u91cd\u65b0\u8ba1\u7b97\uff0c\u9ed8\u8ba4\u6b21\u6570\u662f4\u6b21\u3002<br \/>\n4. Stage\u5982\u679c\u5931\u8d25\u4f1a\u81ea\u52a8\u8fdb\u884c\u7279\u5b9a\u6b21\u6570\u7684\u91cd\u8bd5<\/p>\n<p>\u5982\u679cJob\u7684\u67d0\u4e2aStage\u9636\u6bb5\u8ba1\u7b97\u5931\u8d25\uff0c\u6846\u67b6\u4e5f\u4f1a\u81ea\u52a8\u8fdb\u884c\u4efb\u52a1\u7684\u91cd\u65b0\u8ba1\u7b97\uff0c\u9ed8\u8ba4\u6b21\u6570\u4e5f\u662f4\u6b21\u3002<br \/>\n5. Checkpoint\u548cPersist\u53ef\u4e3b\u52a8\u6216\u88ab\u52a8\u89e6\u53d1<\/p>\n<p>RDD\u53ef\u4ee5\u901a\u8fc7Persist\u6301\u4e45\u5316\u5c06RDD\u7f13\u5b58\u5230\u5185\u5b58\u6216\u8005\u78c1\u76d8\uff0c\u5f53\u518d\u6b21\u7528\u5230\u8be5RDD\u65f6\u76f4\u63a5\u8bfb\u53d6\u5c31\u884c\u3002\u4e5f\u53ef\u4ee5\u5c06RDD\u8fdb\u884c\u68c0\u67e5\u70b9\uff0c\u68c0\u67e5\u70b9\u4f1a\u5c06\u6570\u636e\u5b58\u50a8\u5728HDFS\u4e2d\uff0c\u8be5RDD\u7684\u6240\u6709\u7236RDD\u4f9d\u8d56\u90fd\u4f1a\u88ab\u79fb\u9664\u3002<br \/>\n6. \u6570\u636e\u8c03\u5ea6\u5f39\u6027<\/p>\n<p>Spark\u628a\u8fd9\u4e2aJOB\u6267\u884c\u6a21\u578b\u62bd\u8c61\u4e3a\u901a\u7528\u7684\u6709\u5411\u65e0\u73af\u56feDAG\uff0c\u53ef\u4ee5\u5c06\u591aStage\u7684\u4efb\u52a1\u4e32\u8054\u6216\u5e76\u884c\u6267\u884c\uff0c\u8c03\u5ea6\u5f15\u64ce\u81ea\u52a8\u5904\u7406Stage\u7684\u5931\u8d25\u4ee5\u53caTask\u7684\u5931\u8d25\u3002<br \/>\n7. \u6570\u636e\u5206\u7247\u7684\u9ad8\u5ea6\u5f39\u6027<\/p>\n<p>\u53ef\u4ee5\u6839\u636e\u4e1a\u52a1\u7684\u7279\u5f81\uff0c\u52a8\u6001\u8c03\u6574\u6570\u636e\u5206\u7247\u7684\u4e2a\u6570\uff0c\u63d0\u5347\u6574\u4f53\u7684\u5e94\u7528\u6267\u884c\u6548\u7387\u3002<\/p>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"RDD_%E6%8C%81%E4%B9%85%E5%8C%96\"><\/span>RDD \u6301\u4e45\u5316<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ol>\n<li>RDD\u7684\u7f13\u5b58<\/li>\n<\/ol>\n<p>Spark\u901f\u5ea6\u975e\u5e38\u5feb\u7684\u539f\u56e0\u4e4b\u4e00\uff0c\u5c31\u662f\u5728\u4e0d\u540c\u64cd\u4f5c\u4e2d\u53ef\u4ee5\u5728\u5185\u5b58\u4e2d\u6301\u4e45\u5316\u6216\u7f13\u5b58\u4e2a\u6570\u636e\u96c6\u3002\u5f53\u6301\u4e45\u5316\u67d0\u4e2aRDD\u540e\uff0c\u6bcf\u4e00\u4e2a\u8282\u70b9\u90fd\u5c06\u628a\u8ba1\u7b97\u7684\u5206\u7247\u7ed3\u679c\u4fdd\u5b58\u5728\u5185\u5b58\u4e2d\uff0c\u5e76\u5728\u5bf9\u6b64RDD\u6216\u884d\u751f\u51fa\u7684RDD\u8fdb\u884c\u7684\u5176\u4ed6\u52a8\u4f5c\u4e2d\u91cd\u7528<br \/>\n2. RDD\u7f13\u5b58\u65b9\u5f0f<\/p>\n<p>RDD\u901a\u8fc7persist\u65b9\u6cd5\u6216cache\u65b9\u6cd5\u53ef\u4ee5\u5c06\u524d\u9762\u7684\u8ba1\u7b97\u7ed3\u679c\u7f13\u5b58\uff0c\u9ed8\u8ba4\u60c5\u51b5\u4e0b persist() \u4f1a\u628a\u6570\u636e\u4ee5\u5e8f\u5217\u5316\u7684\u5f62\u5f0f\u7f13\u5b58\u5728 JVM \u7684\u5806\u7a7a \u95f4\u4e2d\u3002<br \/>\n\u4f46\u662f\u5e76\u4e0d\u662f\u8fd9\u4e24\u4e2a\u65b9\u6cd5\u88ab\u8c03\u7528\u65f6\u7acb\u5373\u7f13\u5b58\uff0c\u800c\u662f\u89e6\u53d1\u540e\u9762\u7684action\u65f6\uff0c\u8be5RDD\u5c06\u4f1a\u88ab\u7f13\u5b58\u5728\u8ba1\u7b97\u8282\u70b9\u7684\u5185\u5b58\u4e2d\uff0c\u5e76\u4f9b\u540e\u9762\u91cd\u7528\u3002\u901a\u8fc7\u67e5\u770b\u6e90\u7801\u53d1\u73b0cache\u6700\u7ec8\u4e5f\u662f\u8c03\u7528\u4e86persist\u65b9\u6cd5\uff0c\u9ed8\u8ba4\u7684\u5b58\u50a8\u7ea7\u522b\u90fd\u662f\u4ec5\u5728\u5185\u5b58\u5b58\u50a8\u4e00\u4efd\uff0cSpark\u7684\u5b58\u50a8\u7ea7\u522b\u8fd8\u6709\u597d\u591a\u79cd\uff0c\u5b58\u50a8\u7ea7\u522b\u5728object StorageLevel\u4e2d\u5b9a\u4e49\u7684\u3002\u5728\u5b58\u50a8\u7ea7\u522b\u7684\u672b\u5c3e\u52a0\u4e0a\u201c_2\u201d\u6765\u628a\u6301\u4e45\u5316\u6570\u636e\u5b58\u4e3a\u4e24\u4efd\u3002<br \/>\n\u901a\u8fc7\u57fa\u4e8eRDD\u7684\u4e00\u7cfb\u5217\u8f6c\u6362\uff0c\u4e22\u5931\u7684\u6570\u636e\u4f1a\u88ab\u91cd\u7b97\uff0c\u7531\u4e8eRDD\u7684\u5404\u4e2aPartition\u662f\u76f8\u5bf9\u72ec\u7acb\u7684\uff0c\u56e0\u6b64\u53ea\u9700\u8981\u8ba1\u7b97\u4e22\u5931\u7684\u90e8\u5206\u5373\u53ef\uff0c\u5e76\u4e0d\u9700\u8981\u91cd\u7b97\u5168\u90e8Partition\u3002<br \/>\n3. RDD\u68c0\u67e5\u70b9\u673a\u5236<\/p>\n<p>Spark\u4e2d\u5bf9\u4e8e\u6570\u636e\u7684\u4fdd\u5b58\u9664\u4e86\u6301\u4e45\u5316\u64cd\u4f5c\u4e4b\u5916\uff0c\u8fd8\u63d0\u4f9b\u4e86\u4e00\u79cd\u68c0\u67e5\u70b9\u7684\u673a\u5236\uff0c\u68c0\u67e5\u70b9\uff08\u672c\u8d28\u662f\u901a\u8fc7\u5c06RDD\u5199\u5165Disk\u505a\u68c0\u67e5\u70b9\uff09\u662f\u4e3a\u4e86\u901a\u8fc7lineage\u505a\u5bb9\u9519\u7684\u8f85\u52a9\uff0clineage\u8fc7\u957f\u4f1a\u9020\u6210\u5bb9\u9519\u6210\u672c\u8fc7\u9ad8\uff0c\u8fd9\u6837\u5c31\u4e0d\u5982\u5728\u4e2d\u95f4\u9636\u6bb5\u505a\u68c0\u67e5\u70b9\u5bb9\u9519\uff0c\u5982\u679c\u4e4b\u540e\u6709\u8282\u70b9\u51fa\u73b0\u95ee\u9898\u800c\u4e22\u5931\u5206\u533a\uff0c\u4ece\u505a\u68c0\u67e5\u70b9\u7684RDD\u5f00\u59cb\u91cd\u505aLineage\uff0c\u5c31\u4f1a\u51cf\u5c11\u5f00\u9500\u3002\u68c0\u67e5\u70b9\u901a\u8fc7\u5c06\u6570\u636e\u5199\u5165\u5230HDFS\u6587\u4ef6\u7cfb\u7edf\u5b9e\u73b0\u4e86RDD\u7684\u68c0\u67e5\u70b9\u529f\u80fd\u3002<br \/>\ncache \u548c checkpoint \u662f\u6709\u663e\u8457\u533a\u522b\u7684\uff0c \u00a0\u7f13\u5b58\u628a RDD \u8ba1\u7b97\u51fa\u6765\u7136\u540e\u653e\u5728\u5185\u5b58\u4e2d\uff0c\u4f46\u662fRDD \u7684\u4f9d\u8d56\u94fe\uff08\u76f8\u5f53\u4e8e\u6570\u636e\u5e93\u4e2d\u7684redo \u65e5\u5fd7\uff09\uff0c \u4e5f\u4e0d\u80fd\u4e22\u6389\uff0c \u5f53\u67d0\u4e2a\u70b9\u67d0\u4e2a executor \u5b95\u4e86\uff0c\u4e0a\u9762cache \u7684RDD\u5c31\u4f1a\u4e22\u6389\uff0c \u9700\u8981\u901a\u8fc7 \u4f9d\u8d56\u94fe\u91cd\u653e\u8ba1\u7b97\u51fa\u6765\uff0c \u4e0d\u540c\u7684\u662f\uff0c checkpoint \u662f\u628a RDD \u4fdd\u5b58\u5728 HDFS\u4e2d\uff0c \u662f\u591a\u526f\u672c\u53ef\u9760\u5b58\u50a8\uff0c\u6240\u4ee5\u4f9d\u8d56\u94fe\u5c31\u53ef\u4ee5\u4e22\u6389\u4e86\uff0c\u5c31\u65a9\u65ad\u4e86\u4f9d\u8d56\u94fe\uff0c \u662f\u901a\u8fc7\u590d\u5236\u5b9e\u73b0\u7684\u9ad8\u5bb9\u9519\u3002<\/p>\n<pre><code class=\"language-scala line-numbers\">    val conf = new SparkConf()\n      .setAppName(\"demo1\")\n      .setMaster(\"local[2]\")\n    val sc = new SparkContext(conf)\n#\u8bbe\u7f6eHDFS\u4e0a\u68c0\u67e5\u70b9\u5b58\u50a8\u7684\u8def\u5f84\n    sc.setCheckpointDir(\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\mapdata\\\\\")\n<\/code><\/pre>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"RDD%E7%9A%84%E4%BE%9D%E8%B5%96%E5%85%B3%E7%B3%BB\"><\/span>RDD\u7684\u4f9d\u8d56\u5173\u7cfb<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>RDD\u548c\u5b83\u4f9d\u8d56\u7684\u7236RDD\uff08s\uff09\u7684\u5173\u7cfb\u6709\u4e24\u79cd\u4e0d\u540c\u7684\u7c7b\u578b\uff0c\u5373\u7a84\u4f9d\u8d56\uff08narrow dependency\uff09\u548c\u5bbd\u4f9d\u8d56\uff08wide dependency\uff09\u3002<\/li>\n<li>\u7a84\u4f9d\u8d56\u6307\u7684\u662f\u6bcf\u4e00\u4e2a\u7236RDD\u7684Partition\u6700\u591a\u88ab\u5b50RDD\u7684\u4e00\u4e2aPartition\u4f7f\u7528.\u7a84\u4f9d\u8d56\u6211\u4eec\u5f62\u8c61\u7684\u6bd4\u55bb\u4e3a\u72ec\u751f\u5b50\u5973<\/li>\n<li>\u5bbd\u4f9d\u8d56\u6307\u7684\u662f\u591a\u4e2a\u5b50RDD\u7684Partition\u4f1a\u4f9d\u8d56\u540c\u4e00\u4e2a\u7236RDD\u7684Partition\uff0c\u4f1a\u5f15\u8d77shuffle.\u5bbd\u4f9d\u8d56\u6211\u4eec\u5f62\u8c61\u7684\u6bd4\u55bb\u4e3a\u8d85\u751f<\/li>\n<\/ul>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"DAG%E7%9A%84%E7%94%9F%E6%88%90\"><\/span>DAG\u7684\u751f\u6210<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<p>DAG(Directed Acyclic Graph)\u53eb\u505a\u6709\u5411\u65e0\u73af\u56fe\uff0c\u539f\u59cb\u7684RDD\u901a\u8fc7\u4e00\u7cfb\u5217\u7684\u8f6c\u6362\u5c31\u5c31\u5f62\u6210\u4e86DAG\uff0c\u6839\u636eRDD\u4e4b\u95f4\u7684\u4f9d\u8d56\u5173\u7cfb\u7684\u4e0d\u540c\u5c06DAG\u5212\u5206\u6210\u4e0d\u540c\u7684Stage\uff0c\u5bf9\u4e8e\u7a84\u4f9d\u8d56\uff0cpartition\u7684\u8f6c\u6362\u5904\u7406\u5728Stage\u4e2d\u5b8c\u6210\u8ba1\u7b97\u3002\u5bf9\u4e8e\u5bbd\u4f9d\u8d56\uff0c\u7531\u4e8e\u6709Shuffle\u7684\u5b58\u5728\uff0c\u53ea\u80fd\u5728parent RDD\u5904\u7406\u5b8c\u6210\u540e\uff0c\u624d\u80fd\u5f00\u59cb\u63a5\u4e0b\u6765\u7684\u8ba1\u7b97\uff0c\u56e0\u6b64\u5bbd\u4f9d\u8d56\u662f\u5212\u5206Stage\u7684\u4f9d\u636e\u3002<\/p>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"%E8%B0%83%E4%BC%98\"><\/span>\u8c03\u4f18<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<p>\u7533\u8bf7\u7684\u8ba1\u7b97\u8282\u70b9\uff08Executor\uff09\u6570\u76ee\u548c\u6bcf\u4e2a\u8ba1\u7b97\u8282\u70b9\u6838\u6570\uff0c\u51b3\u5b9a\u4e86\u4f60\u540c\u4e00\u65f6\u523b\u53ef\u4ee5\u5e76\u884c\u6267\u884c\u7684task\u3002<br \/>\n\u6bd4\u5982\u7684RDD\u6709100\u4e2a\u5206\u533a\uff0c\u90a3\u4e48\u8ba1\u7b97\u7684\u65f6\u5019\u5c31\u4f1a\u751f\u6210100\u4e2atask\uff0c\u4f60\u7684\u8d44\u6e90\u914d\u7f6e\u4e3a10\u4e2a\u8ba1\u7b97\u8282\u70b9\uff0c\u6bcf\u4e2a2\u4e2a\u6838\uff0c\u540c\u4e00\u65f6\u523b\u53ef\u4ee5\u5e76\u884c\u7684task\u6570\u76ee\u4e3a20\uff0c\u8ba1\u7b97\u8fd9\u4e2aRDD\u5c31\u9700\u89815\u4e2a\u8f6e\u6b21\u3002<br \/>\n\u5982\u679c\u8ba1\u7b97\u8d44\u6e90\u4e0d\u53d8\uff0c\u4f60\u6709101\u4e2atask\u7684\u8bdd\uff0c\u5c31\u9700\u89816\u4e2a\u8f6e\u6b21\uff0c\u5728\u6700\u540e\u4e00\u8f6e\u4e2d\uff0c\u53ea\u6709\u4e00\u4e2atask\u5728\u6267\u884c\uff0c\u5176\u4f59\u6838\u90fd\u5728\u7a7a\u8f6c\u3002<br \/>\n\u5982\u679c\u8d44\u6e90\u4e0d\u53d8\uff0c\u4f60\u7684RDD\u53ea\u67092\u4e2a\u5206\u533a\uff0c\u90a3\u4e48\u540c\u4e00\u65f6\u523b\u53ea\u67092\u4e2atask\u8fd0\u884c\uff0c\u5176\u4f5918\u4e2a\u6838\u7a7a\u8f6c\uff0c\u9020\u6210\u8d44\u6e90\u6d6a\u8d39\u3002\u8fd9\u5c31\u662f\u5728spark\u8c03\u4f18\u4e2d\uff0c\u589e\u5927RDD\u5206\u533a\u6570\u76ee\uff0c\u589e\u5927\u4efb\u52a1\u5e76\u884c\u5ea6\u7684\u505a\u6cd5\u3002<br \/>\n\u5728sparkUI\u4e0a\u67e5\u770b\u8c03\u53d6\u4efb\u52a1\u7684\u6240\u7528\u65f6\u95f4\u4e0e\u8ba1\u7b97\u6240\u7528\u65f6\u95f4\uff0c\u5982\u679c\u8ba1\u7b97\u4efb\u52a1\u7528\u65f6\u5c0f\u4e8e\u8c03\u53d6\u4efb\u52a1\u7528\u65f6\u5219\u8bf4\u660e\u5206\u533a\u6570\u592a\u591a<\/p>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"%E9%94%AE%E5%80%BC%E5%AF%B9RDD%E7%9A%84%E6%95%B0%E6%8D%AE%E5%88%86%E5%8C%BA\"><\/span>\u952e\u503c\u5bf9RDD\u7684\u6570\u636e\u5206\u533a<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>Spark\u76ee\u524d\u652f\u6301Hash\u5206\u533a\u548cRange\u5206\u533a\uff0c\u7528\u6237\u4e5f\u53ef\u4ee5\u81ea\u5b9a\u4e49\u5206\u533a\uff0cHash\u5206\u533a\u4e3a\u5f53\u524d\u7684\u9ed8\u8ba4\u5206\u533a\uff0cSpark\u4e2d\u5206\u533a\u5668\u76f4\u63a5\u51b3\u5b9a\u4e86:RDD\u4e2d\u5206\u533a\u7684\u4e2a\u6570\u3001RDD\u4e2d\u6bcf\u6761\u6570\u636e\u7ecf\u8fc7Shuffle\u8fc7\u7a0b\u5c5e\u4e8e\u54ea\u4e2a\u5206\u533a\u548cReduce\u7684\u4e2a\u6570\u3002<\/li>\n<\/ul>\n<p>(1)\u53ea\u6709Key-Value\u7c7b\u578b\u7684RDD\u624d\u6709\u5206\u533a\u7684\uff0c\u975eKey-Value\u7c7b\u578b\u7684RDD\u5206\u533a\u7684\u503c\u662fNone<\/p>\n<pre><code class=\"language-scala line-numbers\">scala&gt; val pp=sc.parallelize(List(1,1))\npp: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[1] at parallelize at &lt;console&gt;:24\nscala&gt; pp.partitioner\nres1: Option[org.apache.spark.Partitioner] = None\n<\/code><\/pre>\n<p>(2)\u6bcf\u4e2aRDD\u7684\u5206\u533aID\u8303\u56f4\uff1a0~numPartitions-1\uff0c\u51b3\u5b9a\u8fd9\u4e2a\u503c\u662f\u5c5e\u4e8e\u90a3\u4e2a\u5206\u533a\u7684\u3002<\/p>\n<ol>\n<li>\u53ef\u4ee5\u901a\u8fc7\u4f7f\u7528RDD\u7684partitioner \u5c5e\u6027\u6765\u83b7\u53d6 RDD \u7684\u5206\u533a\u65b9\u5f0f\u3002\u5b83\u4f1a\u8fd4\u56de\u4e00\u4e2a scala.Option \u5bf9\u8c61\uff0c \u901a\u8fc7get\u65b9\u6cd5\u83b7\u53d6\u5176\u4e2d\u7684\u503c\u3002<\/p>\n<\/li>\n<li>\n<p>Hash\u5206\u533a\u65b9\u5f0f<\/p>\n<\/li>\n<\/ol>\n<p>HashPartitioner\u5206\u533a\u7684\u539f\u7406\uff1a\u5bf9\u4e8e\u7ed9\u5b9a\u7684key\uff0c\u8ba1\u7b97\u5176hashCode\uff0c\u5e76\u9664\u4e8e\u5206\u533a\u7684\u4e2a\u6570\u53d6\u4f59\uff0c\u5982\u679c\u4f59\u6570\u5c0f\u4e8e0\uff0c\u5219\u7528\u4f59\u6570+\u5206\u533a\u7684\u4e2a\u6570\uff0c\u6700\u540e\u8fd4\u56de\u7684\u503c\u5c31\u662f\u8fd9\u4e2akey\u6240\u5c5e\u7684\u5206\u533aID\u3002<\/p>\n<ol start=\"3\">\n<li>Range\u5206\u533a\u65b9\u5f0f<\/li>\n<\/ol>\n<p>HashPartitioner\u5206\u533a\u5f0a\u7aef\uff1a\u53ef\u80fd\u5bfc\u81f4\u6bcf\u4e2a\u5206\u533a\u4e2d\u6570\u636e\u91cf\u7684\u4e0d\u5747\u5300\uff0c\u6781\u7aef\u60c5\u51b5\u4e0b\u4f1a\u5bfc\u81f4\u67d0\u4e9b\u5206\u533a\u62e5\u6709RDD\u7684\u5168\u90e8\u6570\u636e\u3002<br \/>\nRangePartitioner\u5206\u533a\u4f18\u52bf\uff1a\u5c3d\u91cf\u4fdd\u8bc1\u6bcf\u4e2a\u5206\u533a\u4e2d\u6570\u636e\u91cf\u7684\u5747\u5300\uff0c\u800c\u4e14\u5206\u533a\u4e0e\u5206\u533a\u4e4b\u95f4\u662f\u6709\u5e8f\u7684\uff0c\u4e00\u4e2a\u5206\u533a\u4e2d\u7684\u5143\u7d20\u80af\u5b9a\u90fd\u662f\u6bd4\u53e6\u4e00\u4e2a\u5206\u533a\u5185\u7684\u5143\u7d20\u5c0f\u6216\u8005\u5927\uff1b<br \/>\n\u4f46\u662f\u5206\u533a\u5185\u7684\u5143\u7d20\u662f\u4e0d\u80fd\u4fdd\u8bc1\u987a\u5e8f\u7684\u3002\u7b80\u5355\u7684\u8bf4\u5c31\u662f\u5c06\u4e00\u5b9a\u8303\u56f4\u5185\u7684\u6570\u6620\u5c04\u5230\u67d0\u4e00\u4e2a\u5206\u533a\u5185\u3002<br \/>\nRangePartitioner\u4f5c\u7528\uff1a\u5c06\u4e00\u5b9a\u8303\u56f4\u5185\u7684\u6570\u6620\u5c04\u5230\u67d0\u4e00\u4e2a\u5206\u533a\u5185\uff0c\u5728\u5b9e\u73b0\u4e2d\uff0c\u5206\u754c\u7684\u7b97\u6cd5\u5c24\u4e3a\u91cd\u8981\u3002\u7528\u5230\u4e86\u6c34\u5858\u62bd\u6837\u7b97\u6cd5\u3002<\/p>\n<ol start=\"4\">\n<li>\u81ea\u5b9a\u4e49\u5206\u533a\u65b9\u5f0f<\/li>\n<\/ol>\n<p>\u8981\u5b9e\u73b0\u81ea\u5b9a\u4e49\u7684\u5206\u533a\u5668\uff0c\u4f60\u9700\u8981\u7ee7\u627f org.apache.spark.Partitioner \u7c7b\u5e76\u5b9e\u73b0\u4e0b\u9762\u4e09\u4e2a\u65b9\u6cd5\u3002<br \/>\n<code>numPartitions<\/code>: Int:\u8fd4\u56de\u521b\u5efa\u51fa\u6765\u7684\u5206\u533a\u6570\u3002 \u2028<br \/>\n<code>getPartition(key: Any)<\/code>: Int:\u8fd4\u56de\u7ed9\u5b9a\u952e\u7684\u5206\u533a\u7f16\u53f7(0\u5230numPartitions-1)\u3002<br \/>\n<code>equals():Java<\/code> \u5224\u65ad\u76f8\u7b49\u6027\u7684\u6807\u51c6\u65b9\u6cd5\u3002\u8fd9\u4e2a\u65b9\u6cd5\u7684\u5b9e\u73b0\u975e\u5e38\u91cd\u8981\uff0cSpark \u9700\u8981\u7528\u8fd9\u4e2a\u65b9\u6cd5\u6765\u68c0\u67e5\u4f60\u7684\u5206\u533a\u5668\u5bf9\u8c61\u662f\u5426\u548c\u5176\u4ed6\u5206\u533a\u5668\u5b9e\u4f8b\u76f8\u540c\uff0c\u8fd9\u6837 Spark \u624d\u53ef\u4ee5\u5224\u65ad\u4e24\u4e2a RDD \u7684\u5206\u533a\u65b9\u5f0f\u662f\u5426\u76f8\u540c\u3002 \u2028<\/p>\n<pre><code class=\"language-scala line-numbers\">package com.wq1.spark\nimport org.apache.spark.{Partitioner, SparkConf, SparkContext}\nclass CustomerPartitioner(numParts:Int) extends Partitioner {\n  \/\/\u8986\u76d6\u5206\u533a\u6570\n  override def numPartitions: Int = numParts\n  \/\/\u8986\u76d6\u5206\u533a\u53f7\u83b7\u53d6\u51fd\u6570\n  override def getPartition(key: Any): Int = {\n    val ckey: String = key.toString\n    ckey.substring(ckey.length-1).toInt%numParts\n  }\n}\nobject CustomerPartitioner {\n  def main(args: Array[String]) {\n    val conf=new SparkConf().setAppName(\"partitioner\")\n    val sc=new SparkContext(conf)\n    val data=sc.parallelize(List(\"aa.2\",\"bb.2\",\"cc.3\",\"dd.3\",\"ee.5\"))\n    data.map((_,1)).partitionBy(new CustomerPartitioner(5)).keys.saveAsTextFile(\"hdfs:\/\/hadoop01:9000\/partitioner\")\n  }\n}\n<\/code><\/pre>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"Standalone%E6%A8%A1%E5%BC%8F%E9%85%8D%E7%BD%AE\"><\/span>Standalone\u6a21\u5f0f\u914d\u7f6e<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>slaves<\/li>\n<\/ul>\n<pre><code class=\"language-conf line-numbers\"># A Spark Worker will be started on each of the machines listed below.\nwq1\nwq2\nwq3\n<\/code><\/pre>\n<ul>\n<li>spark-site.conf<\/li>\n<\/ul>\n<pre><code class=\"language-conf line-numbers\"># Example:\n# spark.master                     spark:\/\/master:7077\n# spark.eventLog.enabled           true\n# spark.eventLog.dir               hdfs:\/\/namenode:8021\/directory\n# spark.serializer                 org.apache.spark.serializer.KryoSerializer\n# spark.driver.memory              5g\n# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers=\"one two three\"\n# sparkMaster\u7684\u914d\u7f6e\nspark.master                     spark:\/\/wq1:7077\n# # \u5f00\u542fSpark\u4efb\u52a1\u5386\u53f2\u8bb0\u5f55\nspark.eventLog.enabled           true\n# # \u8bbe\u7f6e\u4efb\u52a1\u5386\u53f2\u8bb0\u5f55\u7684\u5b58\u50a8\u76ee\u5f55\nspark.eventLog.dir               hdfs:\/\/wq1:9000\/sparklogs\n<\/code><\/pre>\n<ul>\n<li>spark-env.sh<\/li>\n<\/ul>\n<pre><code class=\"language-conf line-numbers\"># \u914d\u7f6eHadoop\u96c6\u7fa4\u7684\u914d\u7f6e\u6587\u4ef6\u76ee\u5f55\nHADOOP_CONF_DIR=\/opt\/hadoop-2.7.7\/etc\/hadoop\n# \u914d\u7f6eSparkStandalone\u96c6\u7fa4\u7684Master\u8282\u70b9\u4e3b\u673a\u540d\nSPARK_MASTER_HOST=wq3\n# \u914d\u7f6eSparkStandalone\u96c6\u7fa4\u7684Master\u8282\u70b9\u670d\u52a1\u7aef\u53e3\nSPARK_MASTER_PORT=7077\nJAVA_HOME=\/opt\/jdk1.8\n<\/code><\/pre>\n<h4><span class=\"ez-toc-section\" id=\"Job_history_%E9%85%8D%E7%BD%AE\"><\/span>Job history \u914d\u7f6e<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li><code>vim spark-env.sh<\/code><\/li>\n<\/ul>\n<pre><code class=\"language-bash line-numbers\">#\u8c03\u6574WEBUI\u8bbf\u95ee\u7684\u7aef\u53e3\u53f7\u4e3a4000\n#\u6307\u5b9a\u4fdd\u5b58Application\u5386\u53f2\u8bb0\u5f55\u7684\u4e2a\u6570\uff0c\u5982\u679c\u8d85\u8fc7\u8fd9\u4e2a\u503c\uff0c\u65e7\u7684\u5e94\u7528\u7a0b\u5e8f\u4fe1\u606f\u5c06\u88ab\u5220\u9664\uff0c\u8fd9\u4e2a\u662f\u5185\u5b58\u4e2d\u7684\u5e94\u7528\u6570\uff0c\u800c\u4e0d\u662f\u9875\u9762\u4e0a\u663e\u793a\u7684\u5e94\u7528\u6570\u3002\nexport SPARK_HISTORY_OPTS=\"-Dspark.history.ui.port=4000\n-Dspark.history.retainedApplications=3\n-Dspark.history.fs.logDirectory=hdfs:\/\/wq1:9000\/sparklog\"\n<\/code><\/pre>\n<h4><span class=\"ez-toc-section\" id=\"spark%E5%BA%94%E7%94%A8%E6%89%A7%E8%A1%8C%E8%BF%87%E7%A8%8B\"><\/span>spark\u5e94\u7528\u6267\u884c\u8fc7\u7a0b<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>\u8fd0\u884c\u524d\u7684\u5185\u5b58\u60c5\u51b5<\/li>\n<\/ul>\n<pre><code class=\"language-bash line-numbers\">[root@wq1 bin]# free -h\n              total        used        free      shared  buff\/cache   available\nMem:           1.4G        1.1G        131M        1.6M        197M        178M\nSwap:          2.0G        1.5G        531M\n<\/code><\/pre>\n<ol>\n<li>spark\u4efb\u52a1\u8c03\u5ea6\u8fc7\u7a0b<br \/>\n<img src=\"https:\/\/img-blog.csdnimg.cn\/20201114202552123.png\" alt=\"\" \/><br \/>\n(1)master\uff1a\u7ba1\u7406\u96c6\u7fa4\u548c\u8282\u70b9\uff0c\u4e0d\u53c2\u4e0e\u8ba1\u7b97\u3002<br \/>\n(2)worker\uff1a\u8ba1\u7b97\u8282\u70b9\uff0c\u8fdb\u7a0b\u672c\u2f9d\u4e0d\u53c2\u4e0e\u8ba1\u7b97\uff0c\u548cmaster\u6c47\u62a5\u3002<br \/>\n(3)Driver\uff1a\u8fd0\u2f8f\u7a0b\u5e8f\u7684main\u2f45\u6cd5\uff0c\u521b\u5efaspark context\u5bf9\u8c61\u3002<br \/>\n(4)spark context\uff1a\u63a7\u5236\u6574\u4e2aapplication\u7684\u2f63\u547d\u5468\u671f\uff0c\u5305\u62ecdagsheduler\u548ctask scheduler\u7b49\u7ec4\u4ef6\u3002<br \/>\n(5)client\uff1a\u2f64\u6237\u63d0\u4ea4\u7a0b\u5e8f\u7684\u5165\u53e3\u3002<br \/>\n\uff086\uff09DAG Scheduler:\u6839\u636ejob\u6784\u5efa\u57fa\u4e8estage\u7684DAG\uff0c\u5e76\u63d0\u4ea4stage\u7ed9taskscheduler.<br \/>\n(7)TaskScheduler:\u5c06\u4efb\u52a1task\u5206\u53d1\u7ed9Excutor<br \/>\n(8)SparkEnv:\u7ebf\u7a0b\u7ea7\u522b\u7684\u4e0a\u4e0b\u6587\uff0c\u5b58\u50a8\u8fd0\u884c\u65f6\u7684\u91cd\u8981\u7ec4\u4ef6\u7684\u5f15\u7528<\/li>\n<\/ol>\n<ul>\n<li>Spark-submit\u542f\u52a8\u8fdb\u7a0b\uff0c\u521d\u59cb\u5316\u521b\u5efaSparkContext<\/li>\n<li>SparkContext\u6784\u5efaDAGSchedular\u548cTaskSchedular<\/li>\n<li>\u5ba2\u6237\u7aef\u8fde\u63a5master\u7533\u8bf7\u6ce8\u518capplication\u00a0<\/li>\n<li>master\u63a5\u6536application\u6ce8\u518c\u7533\u8bf7\uff0c\u6839\u636e\u8d44\u6e90\u8c03\u5ea6\u7b97\u6cd5\uff08FIFO\u3001FAIR\uff09\u5728worker\u8282\u70b9\u4e0a\u542f\u52a8\u591a\u4e2aexecutor<\/li>\n<li>\u901a\u77e5worker\u542f\u52a8executor<\/li>\n<li>\u6240\u6709\u542f\u52a8\u597d\u7684executor\uff0c\u53cd\u5411\u6ce8\u518c\u5230TaskSchedular<\/li>\n<li>\u6b64\u65f6\u5404\u65b9\u9762\u8d44\u6e90\u90fd\u51c6\u5907\u597d\u4e86\uff0c\u7ed3\u675fSparkContext\u521d\u59cb\u5316<\/li>\n<li>SparkContext\u5f00\u59cb\u6267\u884c\u5904\u7406\u4e1a\u52a1\u903b\u8f91\uff0c\u6bcf\u6267\u884c\u5230\u4e00\u4e2aaction\u7b97\u5b50\uff0c\u5373\u521b\u5efa\u4e00\u4e2ajob\uff0c\u5e76\u4e14\u628ajob\u63d0\u4ea4\u7ed9DAGSchedular<\/li>\n<li>DAGSchedular\u5c06job\u5212\u5206\u6210\u591a\u4e2astage\uff0c\u5212\u5206\u4f9d\u636e\uff1a\u5bbd\u4f9d\u8d56\u3002\u6bcf\u4e2astage\u5bf9\u5e94\u4e00\u4e2aTaskSet\u3002\u5e76\u63d0\u4ea4\u7ed9TaskSchedular<\/li>\n<li>TaskSchedular\u5c06task\u4efb\u52a1\uff0c\u5206\u53d1\u5230Executor\u6267\u884c<\/li>\n<\/ul>\n<ol start=\"2\">\n<li>\u64cd\u4f5c\u8fc7\u7a0b<br \/>\nspark\u5b9e\u73b0\u5355\u8bcd\u8ba1\u6570\u7684\u4ee3\u7801<br \/>\n\u7b80\u5199\u7248<\/li>\n<\/ol>\n<pre><code class=\"language-scala line-numbers\"> val conf = new SparkConf().setMaster(\"local[2]\").setAppName(\"demo\")\n    val sc = new SparkContext(conf)\n    val rdd1: RDD[String] = sc.textFile(\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\mapdata\\\\words.txt\")\nrdd1.flatMap(_.split(\"\")).map((_,1)).reduceByKey((_+_)).foreach(println)\n<\/code><\/pre>\n<ul>\n<li>\u7ec6\u5316\u7248<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">object WCSpark {\n  def main(args: Array[String]): Unit = {\n    \/\/\u521b\u5efa\u914d\u7f6e\u5bf9\u8c61\n    val conf = new SparkConf()\n    \/\/\u8bbe\u7f6eApp\u7684\u540d\u79f0   \u6709\u5565\u7528\uff1f \u65b9\u4fbf\u5728\u76d1\u63a7\u9875\u9762\u627e\u5230  MR-\u300bYarn 8088\n    conf.setAppName(\"WCSpark\")\n    \/\/\u8bbe\u7f6eSpark\u7684\u8fd0\u884c\u6a21\u5f0f  local\u672c\u5730\u8fd0\u884c  \u7528\u4e8e\u6d4b\u8bd5\u73af\u5883\n    conf.setMaster(\"local\")\n\n    \/\/\u521b\u5efaSpark\u4e0a\u4e0b\u6587 \u4ed6\u662f\u901a\u5f80\u96c6\u7fa4\u7684\u552f\u4e00\u901a\u9053\n    val sc = new SparkContext(conf)\n\n    \/**\n      * \u5904\u7406\u6570\u636e   \u5728SparkCore\u4e2d\u4e00\u5207\u5f97\u8ba1\u7b97\u90fd\u662f\u57fa\u4e8eRDD\n      * R\uff08Resilient\uff09D\uff08Distributed \uff09D\uff08Dataset\uff09\n      * RDD \u5f39\u6027\u5206\u5e03\u5f0f\u6570\u636e\u96c6\n      *\/\n    val lineRDD = sc.textFile(\"d:\/test.txt\")\n    \/\/\u57fa\u4e8elineRDD\u4e2d\u7684\u6570\u636e \u8fdb\u884c\u5206\u8bcd\n    val wordRDD = lineRDD.flatMap { _.split(\" \") }\n    \/\/\u6bcf\u4e00\u4e2a\u5355\u8bcd\u8ba1\u6570\u4e3a1  pairRDD  K:word V:1\n    val pairRDD = wordRDD.map { (_,1) }\n    \/\/\u76f8\u540c\u7684\u5355\u8bcd\u8fdb\u884c\u5206\u7ec4\uff0c\u5bf9\u7ec4\u5185\u7684\u6570\u636e\u8fdb\u884c\u7d2f\u52a0\n    \/\/restRDD K:word V:count\n    val restRDD = pairRDD.reduceByKey((v1,v2)=&gt;v1+v2)\n    \/**\n      * \u6839\u636e\u5355\u8bcd\u51fa\u73b0\u7684\u6b21\u6570\u6765\u6392\u5e8f\n      * sortByKey \u6839\u636ekey\u6765\u6392\u5e8f\n      * sortBy\n      *\/\n    \/\/    restRDD\n    \/\/      .map(_.swap)\n    \/\/      .sortByKey(false)\n    \/\/      .map(_.swap)\n    \/\/      .foreach(println)\n\n    \/\/\u91ca\u653e\u8d44\u6e90\n    sc.stop()\n      \/\/\u6700\u5168\u7684\u5199\u6cd5\n       rdd1.flatMap(line=&gt;{\n      line.split(\"\")\n    })\n    val value = words.map(word =&gt; {\n      new Tuple2(word, 1)\n    })\n    val value1 = value.reduceByKey((v1, v2) =&gt; {\n      v1 + v2\n    })\n    value1.foreach(one=&gt;{\n      print(one)\n    })\n  }\n}\n<\/code><\/pre>\n<blockquote><p>\n  \u6839\u636e\u7f51\u7ad9\u65e5\u5fd7\u5206\u6790\u51fa\u8fd1\u51e0\u5929\u6709\u591a\u5c11IP\u8bbf\u95ee\u672c\u7f51\u7ad9\n<\/p><\/blockquote>\n<p><img src=\"https:\/\/img-blog.csdnimg.cn\/20201107100448100.png\" alt=\"\u63d0\u53d6\u603bIP\u6570\u6d41\u7a0b\" title=\"\u63d0\u53d6\u603bIP\u6570\u6d41\u7a0b\" \/><\/p>\n<pre><code class=\"language-scala line-numbers\">  def main(args: Array[String]): Unit = {\n    val conf = new SparkConf()\n      .setAppName(\"demo1\")\n      .setMaster(\"local[2]\")\n    val sc = new SparkContext(conf)\n    sc.setCheckpointDir(\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\mapdata\\\\\")\n    val rdd: RDD[String] = sc.textFile(\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\mapdata\\\\access1.log\")\n    val rdd1: RDD[(String, String, Int, String)] = rdd.map(line =&gt; {\n      val strs = line.split(\"\\\\s+\")\n      val ip = strs(0)\n      val str = DateUtils.dateFormat(strs(3).tail)\n      var rCode = 0\n      try {\n        rCode = strs(8).toInt\n      } catch {\n        case e: Exception =&gt;\n      }\n      val osType = \"\"\n      (ip, str, rCode, osType)\n    })\n    rdd1.map((x=&gt;{\n      new Tuple2(x._1,1)\n    })).distinct().map(x=&gt;{\n      new Tuple2(1,1)\n    }).reduceByKey(_+_).foreach(println)\n  }\n<\/code><\/pre>\n<blockquote><p>\n  \u63d0\u53d6\u7f51\u7ad9\u8bbf\u95ee\u65e5\u5fd7\u4e2d\u7684\u8bbf\u95eeIP\uff0c\u8bbf\u95ee\u65e5\u671f\uff0c\u8bbf\u95ee\u54cd\u5e94\u7801\u4e3a404\u7684<br \/>\n  \u7edf\u8ba1\u8bbf\u95eeIP\u4e2d\u6570\u5b571\u52309\u51fa\u73b0\u7684\u6b21\u6570\n<\/p><\/blockquote>\n<pre><code class=\"language-scala line-numbers\"> def main(args: Array[String]): Unit = {\n    val conf = new SparkConf()\n      .setAppName(\"demo1\")\n      .setMaster(\"local[2]\")\n    val sc = new SparkContext(conf)\n    val rdd: RDD[String] = sc.textFile(\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\mapdata\\\\access.log\")\n    val rdd1: RDD[(String, String, Int, String)] = rdd.map(line =&gt; {\n      val strs = line.split(\"\\\\s+\")\n      val ip = strs(0)\n      val str = DateUtils.dateFormat(strs(3).tail)\n      var rCode = 0\n      try {\n        rCode = strs(8).toInt\n      } catch {\n        case e: Exception =&gt;\n      }\n      val osType = \"\"\n      (ip, str, rCode, osType)\n    }).filter(trufalse=&gt;{\n      trufalse._3==404\n    })\n\n    val rdd2: RDD[String] = rdd1.flatMap(x =&gt; {\n      val res1: String = x._1\n      val res2: String = res1.replace(\".\", \"\")\n      val res3: Array[String] = res2.split(\"\")\n      res3\n    })\n\n    val rdd3: RDD[String] = rdd1.flatMap(_._1.replace(\".\", \"\").split(\"\"))\n\n    rdd3.map((_, 1))\n      .reduceByKey(_ + _)\n    .foreach(println)\n\n  }\n<\/code><\/pre>\n<h4><span class=\"ez-toc-section\" id=\"Spark_on_Yarn%E6%A8%A1%E5%BC%8F%E4%B8%8B%E5%86%85%E5%AD%98%E5%88%86%E9%85%8D\"><\/span>Spark on Yarn\u6a21\u5f0f\u4e0b\u5185\u5b58\u5206\u914d<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>\u6267\u884c\u7b2c\u4e00\u4e2aspark\u7a0b\u5e8f<br \/>\n\u8fd9\u662f<code>\/opt\/spark-2.4.7\/examples\/jars\/spark-examples_2.11-2.4.7.jar<\/code>\u5b89\u88c5spark\u65f6\u5c31\u6709\u7684\u793a\u4f8b\u67b6\u5305<br \/>\n<code>\u8be5\u7b97\u6cd5\u662f\u5229\u7528\u8499\u7279\u00b7\u5361\u7f57\u7b97\u6cd5\u6c42PI<\/code><\/li>\n<\/ul>\n<pre><code class=\"language-bash line-numbers\">export HADOOP_CONF_DIR=XXX\n.\/bin\/spark-submit \\\n  --class org.apache.spark.examples.SparkPi \\\n  --master yarn \\\n  --deploy-mode cluster \\  # can be client for client mode\n  --executor-memory 20G \\\n  --num-executors 50 \\\n  \/path\/to\/examples.jar \\\n  1000\n<\/code><\/pre>\n<p>\u6211\u4eec\u9700\u8981\u9884\u7559\u4e00\u4e9b\u8d44\u6e90\u7ed9ApplicationMaster\uff0cAM\u5927\u7ea6\u9700\u89811024MB\u7684\u5185\u5b58\u548c\u4e00\u4e2aExecutor<br \/>\nHDFS\u5f53\u8fbe\u5230\u5168\u5199\u5165\u541e\u5410\u91cf\u65f6\uff0c\u9700\u8981\u6bcf\u4e2aexecutor\u6267\u884c\u7ea65\u4e2a\u4efb\u52a1\u3002 \u56e0\u6b64\uff0c\u6700\u597d\u63a7\u5236\u6bcf\u4e2aexecutor\u4e2dcore\u7684\u6570\u76ee\u4f4e\u4e8e\u90a3\u4e2a\u6570\u5b57<br \/>\n\u5982\u679c\u6211\u4eec\u7533\u8bf7\u4e86\u6bcf\u4e2aexecutor\u7684\u5185\u5b58\u4e3a20G\u65f6\uff0c\u5bf9\u6211\u4eec\u800c\u8a00\uff0cAM\u5c06\u5b9e\u9645\u5f97\u523020G+ memoryOverhead = 20 + 7% * 20GB = \uff5e22G\u5185\u5b58<\/p>\n<p>\u6211\u4eec\u4e0d\u80fd\u60f3\u5f53\u7136\u7684\u8ba4\u4e3a\u7ed9excutor\u5206\u914d\u8d8a\u591a\u7684\u5185\u5b58\u8d8a\u597d<\/p>\n<pre><code class=\"language-txt line-numbers\">\u6267\u884c\u62e5\u6709\u592a\u591a\u5185\u5b58\u7684executor\u4f1a\u4ea7\u751f\u8fc7\u591a\u7684\u5783\u573e\u56de\u6536\u5ef6\u8fdf\n\u6267\u884c\u8fc7\u5c0f\u7684executor\uff08\u4e3e\u4f8b\u800c\u8a00\uff0c\u4e00\u4e2a\u53ea\u6709\u4e00\u6838\u548c\u4ec5\u4ec5\u8db3\u591f\u5185\u5b58\u8dd1\u4e00\u4e2atask\u7684executor\uff09\uff0c\n\u5c06\u4f1a\u4e22\u5931\u5728\u5355\u4e2aJVM\u4e2d\u8fd0\u884c\u591a\u4efb\u52a1\u7684\u597d\u5904\u3002\n<\/code><\/pre>\n<ul>\n<li>\u6a21\u62df\u5b9e\u9645\u60c5\u51b5<\/li>\n<\/ul>\n<p>\u5047\u8bbe\u96c6\u7fa4\u662f\u5982\u4e0b\u60c5\u51b5<\/p>\n<pre><code class=\"language-txt line-numbers\">**\u96c6\u7fa4\u914d\u7f6e:**\n10\u4e2a\u8282\u70b9\n\u6bcf\u4e2a\u8282\u70b916\u6838\n\u6bcf\u4e2a\u8282\u70b964G\u5185\u5b58\n<\/code><\/pre>\n<p>\u90a3\u4e48\u6700\u4f18\u7684\u914d\u7f6e\u53ef\u4ee5\u662f\u8fd9\u79cd\u60c5\u51b5<\/p>\n<pre><code class=\"language-java line-numbers\">\u57fa\u4e8e\u4e0a\u8ff0\u7684\u5efa\u8bae\uff0c\u6211\u4eec\u7ed9\u6bcf\u4e2aexecutor\u5206\u914d5\u4e2acore =&gt; -- executor-cores = 5 (\u4fdd\u8bc1\u826f\u597d\u7684HDFS\u541e\u5410)\n\u6bcf\u4e2a\u8282\u70b9\u7559\u4e00\u4e2acore\u7ed9Hadoop\/Yarn\u5b88\u62a4\u8fdb\u7a0b =&gt; \u6bcf\u4e2a\u8282\u70b9\u53ef\u7528\u7684core\u7684\u6570\u76ee = 16 - 1\n\u6240\u4ee5\uff0c\u96c6\u7fa4\u4e2d\u603b\u5171\u53ef\u7528\u7684core\u7684\u6570\u76ee\u662f 15 * 10 = 150\n\u53ef\u7528\u7684executor\u7684\u6570\u76ee = \uff08\u603b\u7684\u53ef\u7528\u7684core\u7684\u6570\u76ee \/ \u6bcf\u4e2aexecutor\u7684core\u7684\u6570\u76ee\uff09= 150 \/ 5 = 30\n\u7559\u4e00\u4e2aexecutor\u7ed9ApplicationManager =&gt; --num-executors = 29\n\u6bcf\u4e2a\u8282\u70b9\u7684executor\u7684\u6570\u76ee = 30 \/ 10 = 3\n\u6bcf\u4e2aexecutor\u7684\u5185\u5b58 = 64GB \/ 3 = 21GB\n\u8ba1\u7b97\u5806\u5f00\u9500 = 7% * 21GB = 3GB\u3002\u56e0\u6b64\uff0c\u5b9e\u9645\u7684 --executor-memory = 21 - 3 = 18GB\n<\/code><\/pre>\n<ul>\n<li>\u907f\u514d\u5185\u5b58\u6ea2\u51fa<\/li>\n<\/ul>\n<p>\u5728\u5185\u5b58\u4e0d\u8db3\u7684\u4f7f\u7528\uff0c\u4f7f\u7528rdd.persist(StorageLevel.MEMORY_AND_DISK_SER)\u4ee3\u66ffrdd.cache():<\/p>\n<blockquote><p>\n  spark\u4e2d\u4f1a\u5bfc\u81f4shuffle\u64cd\u4f5c\u7684\u6709\u4ee5\u4e0b\u51e0\u79cd\u7b97\u5b50\u3001\n<\/p><\/blockquote>\n<p>1\u3001repartition\u7c7b\u7684\u64cd\u4f5c\uff1a\u6bd4\u5982repartition\u3001repartitionAndSortWithinPartitions\u3001coalesce\u7b49<br \/>\n2\u3001byKey\u7c7b\u7684\u64cd\u4f5c\uff1a\u6bd4\u5982reduceByKey\u3001groupByKey\u3001sortByKey\u7b49<br \/>\n3\u3001join\u7c7b\u7684\u64cd\u4f5c\uff1a\u6bd4\u5982join\u3001cogroup\u7b49<\/p>\n<p>\u91cd\u5206\u533a: \u4e00\u822c\u4f1ashuffle\uff0c\u56e0\u4e3a\u9700\u8981\u5728\u6574\u4e2a\u96c6\u7fa4\u4e2d\uff0c\u5bf9\u4e4b\u524d\u6240\u6709\u7684\u5206\u533a\u7684\u6570\u636e\u8fdb\u884c\u968f\u673a\uff0c\u5747\u5300\u7684\u6253\u4e71\uff0c\u7136\u540e\u628a\u6570\u636e\u653e\u5165\u4e0b\u6e38\u65b0\u7684\u6307\u5b9a\u6570\u91cf\u7684\u5206\u533a\u5185<br \/>\nbyKey\u7c7b\u7684\u64cd\u4f5c\uff1a\u56e0\u4e3a\u4f60\u8981\u5bf9\u4e00\u4e2akey\uff0c\u8fdb\u884c\u805a\u5408\u64cd\u4f5c\uff0c\u90a3\u4e48\u80af\u5b9a\u8981\u4fdd\u8bc1\u96c6\u7fa4\u4e2d\uff0c\u6240\u6709\u8282\u70b9\u4e0a\u7684\uff0c\u76f8\u540c\u7684key\uff0c\u4e00\u5b9a\u662f\u5230\u540c\u4e00\u4e2a\u8282\u70b9\u4e0a\u8fdb\u884c\u5904\u7406<br \/>\njoin\u7c7b\u7684\u64cd\u4f5c\uff1a\u4e24\u4e2ardd\u8fdb\u884cjoin\uff0c\u5c31\u5fc5\u987b\u5c06\u76f8\u540cjoin<br \/>\nkey\u7684\u6570\u636e\uff0cshuffle\u5230\u540c\u4e00\u4e2a\u8282\u70b9\u4e0a\uff0c\u7136\u540e\u8fdb\u884c\u76f8\u540ckey\u7684\u4e24\u4e2ardd\u6570\u636e\u7684\u7b1b\u5361\u5c14\u4e58\u79ef<\/p>\n<h4><span class=\"ez-toc-section\" id=\"%E5%B1%95%E7%A4%BA%E6%AF%8F%E5%A4%A9%E8%AE%BF%E9%97%AE%E7%9A%84IP%E6%95%B0\"><\/span>\u5c55\u793a\u6bcf\u5929\u8bbf\u95ee\u7684IP\u6570<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<blockquote><p>\n  \u9996\u5148\u8fd9\u7ec4\u6570\u636e\u7ed3\u6784\u5982\u4e0b\uff0c\u96be\u70b9\uff1a\u6bcf\u5929\u7684\u65e5\u671f\u8fd8\u5f97\u53bb\u9664\u65f6\u5206\u79d2\u540e\u4f5c\u4e3a\u4e00\u4e2akey\u503c\uff0cIP\u4f5c\u4e3a\u4e00\u4e2aKEY\u503c\n<\/p><\/blockquote>\n<pre><code class=\"language-scala line-numbers\">(113.200.107.37,2020-11-27 14:39:26,200,)\n(83.97.20.196,2020-11-27 14:39:28,0,)\n(159.89.15.153,2020-11-27 14:39:35,400,)\n<\/code><\/pre>\n<blockquote><p>\n  combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner)\n<\/p><\/blockquote>\n<pre><code class=\"language-scala line-numbers\">def combineByKey[C](\n      createCombiner: V =&gt; C,\n      mergeValue: (C, V) =&gt; C,\n      mergeCombiners: (C, C) =&gt; C,\n      partitioner: Partitioner,\n      mapSideCombine: Boolean = true,\n      serializer: Serializer = null): RDD[(K, C)] = self.withScope {}\n<\/code><\/pre>\n<p>combineByKey\u7684\u4f5c\u7528\u662f\uff1aCombine values with the same key using a different result type.<\/p>\n<p>createCombiner\u51fd\u6570\u662f\u901a\u8fc7value\u6784\u9020\u5e76\u8fd4\u56de\u4e00\u4e2a\u65b0\u7684\u7c7b\u578b\u4e3aC\u7684\u503c\uff0c\u8fd9\u4e2a\u7c7b\u578b\u4e5f\u662fcombineByKey\u51fd\u6570\u8fd4\u56de\u503c\u4e2dvalue\u7684\u7c7b\u578b\uff08key\u7684\u7c7b\u578b\u4e0d\u53d8\uff09\u3002<\/p>\n<p>mergeValue\u51fd\u6570\u662f\u628a\u5177\u6709\u76f8\u540c\u7684key\u7684value\u5408\u5e76\u5230C\u4e2d\u3002\u8fd9\u65f6\u5019C\u76f8\u5f53\u4e8e\u4e00\u4e2a\u7d2f\u8ba1\u5668\u3002\uff08\u540c\u4e00\u4e2apartition\u5185\uff09<\/p>\n<p>mergeCombiners\u51fd\u6570\u628a\u4e24\u4e2aC\u5408\u5e76\u6210\u4e00\u4e2aC\u3002\uff08partitions\u4e4b\u95f4\uff09<\/p>\n<pre><code class=\"language-scala line-numbers\">scala&gt;  val textRDD = sc.parallelize(List((\"A\", \"aa\"), (\"B\",\"bb\"),(\"C\",\"cc\"),(\"C\",\"cc\"), (\"D\",\"dd\"), (\"D\",\"dd\")))\ntextRDD: org.apache.spark.rdd.RDD[(String, String)] = ParallelCollectionRDD[0] at parallelize at &lt;console&gt;:24\n\nscala&gt;     val combinedRDD = textRDD.combineByKey(\n     |       value =&gt; (1, value),\n     |       (c:(Int, String), value) =&gt; (c._1+1, c._2),\n     |       (c1:(Int, String), c2:(Int, String)) =&gt; (c1._1+c2._1, c1._2)\n     |     )\ncombinedRDD: org.apache.spark.rdd.RDD[(String, (Int, String))] = ShuffledRDD[1] at combineByKey at &lt;console&gt;:26\n\nscala&gt; \n\nscala&gt;     combinedRDD.collect.foreach(x=&gt;{\n     |       println(x._1+\",\"+x._2._1+\",\"+x._2._2)\n     |     })\n\nD,2,dd\nA,1,aa\nB,1,bb\nC,2,cc\n<\/code><\/pre>\n<blockquote><p>\n  aggregate\n<\/p><\/blockquote>\n<p>aggregate\u7528\u6237\u805a\u5408RDD\u4e2d\u7684\u5143\u7d20\uff0c\u5148\u4f7f\u7528seqOp\u5c06RDD\u4e2d\u6bcf\u4e2a\u5206\u533a\u4e2d\u7684T\u7c7b\u578b\u5143\u7d20\u805a\u5408\u6210U\u7c7b\u578b\uff0c\u518d\u4f7f\u7528combOp\u5c06\u4e4b\u524d\u6bcf\u4e2a\u5206\u533a\u805a\u5408\u540e\u7684U\u7c7b\u578b\u805a\u5408\u6210U\u7c7b\u578b\uff0c\u7279\u522b\u6ce8\u610fseqOp\u548ccombOp\u90fd\u4f1a\u4f7f\u7528zeroValue\u7684\u503c\uff0czeroValue\u7684\u7c7b\u578b\u4e3aU\u3002\u8fd9\u4e2a\u65b9\u6cd5\u7684\u53c2\u6570\u548ccombineByKey\u51fd\u6570\u5dee\u4e0d\u591a\u3002\u6211\u4eec\u9700\u8981\u6ce8\u610f\u7684\u662f\uff0caggregate\u51fd\u6570\u662f\u5148\u8ba1\u7b97\u6bcf\u4e2apartition\u4e2d\u7684\u6570\u636e\uff0c\u5728\u8ba1\u7b97partition\u4e4b\u95f4\u7684\u6570\u636e\u3002<\/p>\n<pre><code class=\"language-scala line-numbers\">  def aggregate[U: ClassTag](zeroValue: U)(seqOp: (U, T) =&gt; U, combOp: (U, U) =&gt; U): U = withScope {\n    \/\/ Clone the zero value since we will also be serializing it as part of tasks\n    var jobResult = Utils.clone(zeroValue, sc.env.serializer.newInstance())\n    val cleanSeqOp = sc.clean(seqOp)\n    val cleanCombOp = sc.clean(combOp)\n    val aggregatePartition = (it: Iterator[T]) =&gt; it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)\n    val mergeResult = (index: Int, taskResult: U) =&gt; jobResult = combOp(jobResult, taskResult)\n    sc.runJob(this, aggregatePartition, mergeResult)\n    jobResult\n  }\n<\/code><\/pre>\n<pre><code class=\"language-scala line-numbers\">scala&gt; val textRDD = sc.parallelize(List(\"A\", \"B\", \"C\", \"D\", \"D\", \"E\"))\ntextRDD: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[3] at parallelize at &lt;console&gt;:24\n\nscala&gt; val resultRDD = textRDD.aggregate((0, \"\"))((acc, value)=&gt;{(acc._1+1, acc._2+\":\"+value)}, (acc1, acc2)=&gt; {(acc1._1+acc2._1, acc1._2+\":\"+acc2._2)})\nresultRDD: (Int, String) = (6,::D:E::D::A::B:C)\n<\/code><\/pre>\n<blockquote><p>\n  groupbykey\u548creducebykey\u7684\u533a\u522b\n<\/p><\/blockquote>\n<pre><code class=\"language-scala line-numbers\">    var testrdd: SparkConf = new SparkConf().setMaster(\"local[2]\").setAppName(\"test1\")\n    var sc: SparkContext = new SparkContext(testrdd)\n    val words = Array(\"one\", \"two\", \"two\", \"three\", \"three\", \"three\")\n\n    val wordPairsRDD = sc.parallelize(words).map(word =&gt; (word, 1))\n\n    val wordCountsWithReduce = wordPairsRDD.reduceByKey(_ + _).foreach(println)\n\n    val wordCountsWithGroup = wordPairsRDD.groupByKey().foreach(println)\n<\/code><\/pre>\n<p>\u4ece\u8fd0\u884c\u7ed3\u679c\u6765\u89c2\u5bdf<\/p>\n<pre><code class=\"language-scala line-numbers\">\/\/reducebykey \u8fd0\u884c\u7ed3\u679c\n(two,2)\n(one,1)\n(three,3)\n\/\/groupbykey \u8fd0\u884c\u7ed3\u679c\n\/\/\u5982\u679cgroupbykey\u7b97\u5b50\u518d\u8fdb\u884cmap\u7b97\u5b50\u7684\u8fd9\u4e48\u64cd\u4f5c\u540e\u90a3\u4e48\u7ed3\u679c\u548c\u4e0a\u9762\u5c31\u4e00\u6837.map(t =&gt; (t._1, t._2.sum))\n(two,CompactBuffer(1, 1))\n(one,CompactBuffer(1))\n(three,CompactBuffer(1, 1, 1))\n<\/code><\/pre>\n<p>\u4ece\u6e90\u7801\u6765\u770b<br \/>\n\u67e5\u770breducebykey\u7684\u6e90\u7801<\/p>\n<pre><code class=\"language-scala line-numbers\">\/**\n   * Merge the values for each key using an associative and commutative reduce function. This will\n   * also perform the merging locally on each mapper before sending results to a reducer, similarly\n   * to a \"combiner\" in MapReduce. Output will be hash-partitioned with the existing partitioner\/\n   * parallelism level.\n   *\/\n\/\/reduceByKey\u7528\u4e8e\u5bf9\u6bcf\u4e2akey\u5bf9\u5e94\u7684\u591a\u4e2avalue\u8fdb\u884cmerge\u64cd\u4f5c\uff0c\n\/\/\u6700\u91cd\u8981\u7684\u662f\u5b83\u80fd\u591f\u5728\u672c\u5730\u5148\u8fdb\u884cmerge\u64cd\u4f5c\uff0c\u5e76\u4e14merge\u64cd\u4f5c\u53ef\u4ee5\u901a\u8fc7\u51fd\u6570\u81ea\u5b9a\u4e49\u3002\ndef reduceByKey(func: (V, V) =&gt; V): RDD[(K, V)] = self.withScope {\n    reduceByKey(defaultPartitioner(self), func)\n  }\n\/**\n   * Merge the values for each key using an associative and commutative reduce function. This will\n   * also perform the merging locally on each mapper before sending results to a reducer, similarly\n   * to a \"combiner\" in MapReduce.\n   *\/\ndef reduceByKey(partitioner: Partitioner, func: (V, V) =&gt; V): RDD[(K, V)] = self.withScope {\n    combineByKeyWithClassTag[V]((v: V) =&gt; v, func, func, partitioner)\n  }\n @Experimental\n  def combineByKeyWithClassTag[C](\n      createCombiner: V =&gt; C,\n      mergeValue: (C, V) =&gt; C,\n      mergeCombiners: (C, C) =&gt; C,\n      partitioner: Partitioner,\n      mapSideCombine: Boolean = true,\n      serializer: Serializer = null)(implicit ct: ClassTag[C]): RDD[(K, C)]\n<\/code><\/pre>\n<p>\u67e5\u770bgroupbykey\u6e90\u7801<\/p>\n<pre><code class=\"language-scala line-numbers\">  \/**\n   * Group the values for each key in the RDD into a single sequence. Hash-partitions the\n   * resulting RDD with the existing partitioner\/parallelism level. The ordering of elements\n   * within each group is not guaranteed, and may even differ each time the resulting RDD is\n   * evaluated.\n   *\n   * @note This operation may be very expensive. If you are grouping in order to perform an\n   * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`\n   * or `PairRDDFunctions.reduceByKey` will provide much better performance.\n   *\/\n\/\/groupByKey\u4e5f\u662f\u5bf9\u6bcf\u4e2akey\u8fdb\u884c\u64cd\u4f5c\uff0c\u4f46\u53ea\u751f\u6210\u4e00\u4e2asequence\u3002\n\/\/\u9700\u8981\u7279\u522b\u6ce8\u610f\u201cNote\u201d\u4e2d\u7684\u8bdd\uff0c\u5b83\u544a\u8bc9\u6211\u4eec\uff1a\u5982\u679c\u9700\u8981\u5bf9sequence\u8fdb\u884caggregation\u64cd\u4f5c\uff08\u6ce8\u610f\uff0cgroupByKey\u672c\u8eab\u4e0d\u80fd\u81ea\u5b9a\u4e49\u64cd\u4f5c\u51fd\u6570\uff09\uff0c\n\/\/\u90a3\u4e48\uff0c\u9009\u62e9reduceByKey\/aggregateByKey\u66f4\u597d\u3002\n\/\/\u8fd9\u662f\u56e0\u4e3agroupByKey\u4e0d\u80fd\u81ea\u5b9a\u4e49\u51fd\u6570\uff0c\u6211\u4eec\u9700\u8981\u5148\u7528groupByKey\u751f\u6210RDD\uff0c\u7136\u540e\u624d\u80fd\u5bf9\u6b64RDD\u901a\u8fc7map\u8fdb\u884c\u81ea\u5b9a\u4e49\u51fd\u6570\u64cd\u4f5c\u3002\ndef groupByKey(): RDD[(K, Iterable[V])] = self.withScope {\n    groupByKey(defaultPartitioner(self))\n  }\ndef groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = self.withScope {\n    \/\/ groupByKey shouldn't use map side combine because map side combine does not\n    \/\/ reduce the amount of data shuffled and requires all map side data be inserted\n    \/\/ into a hash table, leading to more objects in the old gen.\n    val createCombiner = (v: V) =&gt; CompactBuffer(v)\n    val mergeValue = (buf: CompactBuffer[V], v: V) =&gt; buf += v\n    val mergeCombiners = (c1: CompactBuffer[V], c2: CompactBuffer[V]) =&gt; c1 ++= c2\n    val bufs = combineByKeyWithClassTag[CompactBuffer[V]](\n      createCombiner, mergeValue, mergeCombiners, partitioner, mapSideCombine = false)\n    bufs.asInstanceOf[RDD[(K, Iterable[V])]]\n  }\n<\/code><\/pre>\n<p>\u901a\u8fc7\u4e0a\u9762\u7684\u4e3e\u4f8b\u8bf4\u660e\uff0c\u6211\u4eec\u4e0d\u96be\u53d1\u73b0\uff0creducebykey\u7b97\u5b50\u5728\u7f16\u5199\u65f6\u5c31\u53ef\u4ee5\u81ea\u5b9a\u4e49\u51fd\u6570\uff0c\u800cgroupbykey\u8fd9\u4e2a\u7b97\u5b50\u9700\u8981map\u7b97\u5b50\u5904\u7406\u540e\u90a3\u4e48\u624d\u80fd\u8fbe\u5230\u4e00\u6837\u7684\u6548\u679c\u3002\u6211\u4eec\u518d\u67e5\u770b\u6e90\u7801\u53d1\u73b0\u8fd9\u4e24\u4e2a\u7b97\u5b50\u90fd\u662f\u9760combineByKeyWithClassTag\u5b9e\u73b0\u7684\uff0c\u800creducebykey\u7684 mapSideCombine\u9ed8\u8ba4 = true\uff0c\u4e5f\u5c31\u662f\u8bf4\u4f1a\u5728shuffle\u524d\u8fdb\u884c\u5408\u5e76\uff0c\u8fd9\u6837\u5c31\u4f1a\u51cf\u5c11\u78c1\u76d8\u7684IO\uff0c\u5b83\u53ef\u4ee5\u5728\u6bcf\u4e2a\u5206\u533a\u79fb\u52a8\u6570\u636e\u4e4b\u524d\u5c06\u8f93\u51fa\u6570\u636e\u4e0e\u4e00\u4e2a\u5171\u7528\u7684key\u7ed3\u5408\u3002reducebykey\u5219\u4e0d\u76f8\u540c\u3002<\/p>\n<h5><span class=\"ez-toc-section\" id=\"spark%E5%90%84%E7%A7%8D%E8%BF%90%E8%A1%8C%E6%A8%A1%E5%BC%8F\"><\/span>spark\u5404\u79cd\u8fd0\u884c\u6a21\u5f0f<span class=\"ez-toc-section-end\"><\/span><\/h5>\n<ul>\n<li>local\uff1a\u8c03\u8bd5\u7528<\/li>\n<li>standalone\uff1a\u5206\u5e03\u5f0f\u90e8\u7f72\u96c6\u7fa4\uff0c\u8d44\u6e90\u7ba1\u7406\u548c\u4efb\u52a1\u76d1\u7ba1\u90fd\u662fspark\u81ea\u5df1\u76d1\u63a7<\/li>\n<li>spark on yarn:\u5206\u5e03\u5f0f\u90e8\u7f72\u96c6\u7fa4\uff0c\u8d44\u6e90\u548c\u4efb\u52a1\u76d1\u63a7\u4ea4\u7ed9yarn\u7ba1\u7406<\/li>\n<li>spark\u9876\u5c42\u8c03\u5ea6\u5c42\u4f7f\u7528RDD\u7684\u4f9d\u8d56\u4e3a\u6bcf\u4e2ajob\u521b\u5efa\u4e00\u4e2a\u7531stages\u7ec4\u6210\u7684DAG\uff08\u6709\u5411\u65e0\u73af\u56fe\uff09<\/li>\n<\/ul>\n<h3><span class=\"ez-toc-section\" id=\"spark%E4%B8%8EMapreduce%E7%9A%84%E6%AF%94%E8%BE%83\"><\/span>spark\u4e0eMapreduce\u7684\u6bd4\u8f83<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<ul>\n<li>\u7ebf\u7a0b\u548c\u8fdb\u7a0b\u7684\u533a\u522b<br \/>\n<table>\n<thead>\n<tr>\n<th>\u533a\u522b<\/th>\n<th>\u8fdb\u7a0b<\/th>\n<th>\u7ebf\u7a0b<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td>\u6839\u672c\u533a\u522b<\/td>\n<td>\u4f5c\u4e3a\u8d44\u6e90\u5206\u914d\u7684\u5355\u4f4d<\/td>\n<td>\u8c03\u5ea6\u548c\u6267\u884c\u7684\u5355\u4f4d<\/td>\n<\/tr>\n<tr>\n<td>\u5f00\u9500<\/td>\n<td>\u6bcf\u4e2a\u8fdb\u7a0b\u4e4b\u90fd\u6709\u72ec\u7acb\u7684\u4ee3\u7801\u548c\u6570\u636e\u7a7a\u95f4\uff0c\u8fdb\u7a0b\u95f4\u5207\u6362\u4f1a\u6709\u8f83\u5927\u7684\u5f00\u9500<\/td>\n<td>\u7ebf\u7a0b\u53ef\u4ee5\u770b\u505a\u8f7b\u91cf\u7ea7\u7684\u8fdb\u7a0b\uff0c\u540c\u4e00\u7c7b\u7ebf\u7a0b\u5171\u4eab\u4ee3\u7801\u548c\u6570\u636e\u7a7a\u95f4\uff0c\u6bcf\u4e2a\u7ebf\u7a0b\u6709\u72ec\u7acb\u7684\u8fd0\u884c\u6808\u548c\u7a0b\u5e8f\u8ba1\u6570\u5668\uff0c\u7ebf\u7a0b\u5207\u6362\u5f00\u9500\u5c0f<\/td>\n<\/tr>\n<tr>\n<td>\u6240\u5904\u73af\u5883<\/td>\n<td>\u5728\u64cd\u4f5c\u7cfb\u7edf\u4e2d\u80fd\u540c\u65f6\u8fd0\u884c\u591a\u4e2a\u4efb\u52a1\uff08\u7a0b\u5e8f\uff09<\/td>\n<td>\u5728\u540c\u4e00\u5e94\u7528\u7a0b\u5e8f\u4e2d\u6709\u591a\u4e2a\u987a\u5e8f\u6d41\u540c\u65f6\u6267\u884c<\/td>\n<\/tr>\n<tr>\n<td>\u5206\u914d\u5185\u5b58<\/td>\n<td>\u540c\u4e00\u8fdb\u7a0b\u7684\u7ebf\u7a0b\u5171\u4eab\u672c\u8fdb\u7a0b\u7684\u5730\u5740\u7a7a\u95f4\u548c\u8d44\u6e90<\/td>\n<td>\u800c\u8fdb\u7a0b\u4e4b\u95f4\u7684\u5730\u5740\u7a7a\u95f4\u548c\u8d44\u6e90\u662f\u76f8\u4e92\u72ec\u7acb\u7684<\/td>\n<\/tr>\n<tr>\n<td>\u5f71\u54cd\u5173\u7cfb<\/td>\n<td>\u4e00\u4e2a\u8fdb\u7a0b\u5d29\u6e83\u540e\uff0c\u5728\u4fdd\u62a4\u6a21\u5f0f\u4e0b\u4e0d\u4f1a\u5bf9\u5176\u4ed6\u8fdb\u7a0b\u4ea7\u751f\u5f71\u54cd<\/td>\n<td>\u4f46\u662f\u4e00\u4e2a\u7ebf\u7a0b\u5d29\u6e83\u6574\u4e2a\u8fdb\u7a0b\u90fd\u6b7b\u6389\u3002\u6240\u4ee5\u591a\u8fdb\u7a0b\u8981\u6bd4\u591a\u7ebf\u7a0b\u5065\u58ee<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/li>\n<\/ul>\n<ol>\n<li>MapReduce\u591a\u8fdb\u7a0b\u6a21\u578b<\/li>\n<\/ol>\n<ul>\n<li>\u6bcf\u4e2aTask\u8fd0\u884c\u5728\u4e00\u4e2a\u72ec\u7acb\u7684JVM\u8fdb\u7a0b\u4e2d\uff1b<\/li>\n<li>\u53ef\u5355\u72ec\u4e3a\u4e0d\u540c\u7c7b\u578b\u7684Task\u8bbe\u7f6e\u4e0d\u540c\u7684\u8d44\u6e90\u91cf\uff0c\u76ee\u524d\u652f\u6301\u5185\u5b58\u548cCPU\u4e24\u79cd\u8d44\u6e90\uff1b<\/li>\n<li>\u6bcf\u4e2aTask\u8fd0\u884c\u5b8c\u540e\uff0c\u5c06\u91ca\u653e\u6240\u5360\u7528\u7684\u8d44\u6e90\uff0c\u8fd9\u4e9b\u8d44\u6e90\u4e0d\u80fd\u88ab\u5176\u4ed6Task\u590d\u7528\uff0c\u5373\u4f7f\u662f\u540c\u4e00\u4e2a\u4f5c\u4e1a\u76f8\u540c\u7c7b\u578b\u7684Task\u3002\u4e5f\u5c31\u662f\u8bf4\uff0c\u6bcf\u4e2aTask\u90fd\u8981\u7ecf\u5386\u201c\u7533\u8bf7\u8d44\u6e90\u2014> \u8fd0\u884cTask \u2013> \u91ca\u653e\u8d44\u6e90\u201d\u7684\u8fc7\u7a0b\u3002<\/li>\n<\/ul>\n<ol start=\"2\">\n<li>Spark\u591a\u7ebf\u7a0b\u6a21\u578b<\/li>\n<\/ol>\n<ul>\n<li>\u6bcf\u4e2a\u8282\u70b9\u4e0a\u53ef\u4ee5\u8fd0\u884c\u4e00\u4e2a\u6216\u591a\u4e2aExecutor\u670d\u52a1\uff1b<\/li>\n<li>\u6bcf\u4e2aExecutor\u914d\u6709\u4e00\u5b9a\u6570\u91cf\u7684slot\uff0c\u8868\u793a\u8be5Executor\u4e2d\u53ef\u4ee5\u540c\u65f6\u8fd0\u884c\u591a\u5c11\u4e2aShuffleMapTask\u6216\u8005ReduceTask\uff1b<\/li>\n<li>\u6bcf\u4e2aExecutor\u5355\u72ec\u8fd0\u884c\u5728\u4e00\u4e2aJVM\u8fdb\u7a0b\u4e2d\uff0c\u6bcf\u4e2aTask\u5219\u662f\u8fd0\u884c\u5728Executor\u4e2d\u7684\u4e00\u4e2a\u7ebf\u7a0b\uff1b<\/li>\n<li>\u540c\u4e00\u4e2aExecutor\u5185\u90e8\u7684Task\u53ef\u5171\u4eab\u5185\u5b58\uff0c\u6bd4\u5982\u901a\u8fc7\u51fd\u6570SparkContext#broadcast\u5e7f\u64ad\u7684\u6587\u4ef6\u6216\u8005\u6570\u636e\u7ed3\u6784\u53ea\u4f1a\u5728\u6bcf\u4e2aExecutor\u4e2d\u52a0\u8f7d\u4e00\u6b21\uff0c\u800c\u4e0d\u4f1a\u50cfMapReduce\u90a3\u6837\uff0c\u6bcf\u4e2aTask\u52a0\u8f7d\u4e00\u6b21\uff1b<\/li>\n<li>Executor\u4e00\u65e6\u542f\u52a8\u540e\uff0c\u5c06\u4e00\u76f4\u8fd0\u884c\uff0c\u4e14\u5b83\u7684\u8d44\u6e90\u53ef\u4ee5\u4e00\u76f4\u88abTask\u590d\u7528\uff0c\u76f4\u5230Spark\u7a0b\u5e8f\u8fd0\u884c\u5b8c\u6210\u540e\u624d\u91ca\u653e\u9000\u51fa\u3002<\/li>\n<\/ul>\n<hr \/>\n<h4><span class=\"ez-toc-section\" id=\"Mapreduce_shuffle%E5%92%8CSpark_shuffle%E7%9A%84%E5%8C%BA%E5%88%AB\"><\/span>Mapreduce shuffle\u548cSpark shuffle\u7684\u533a\u522b<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ul>\n<li>Mrshuffle:MapTask \u5230reduceTask\u4e4b\u95f4\u7684\u8fc7\u7a0b<\/li>\n<li>Sparkshuffle:job\u5230stage\u4e4b\u95f4\u7684\u8fc7\u7a0b<\/li>\n<\/ul>\n<h4><span class=\"ez-toc-section\" id=\"Spark_shuffle%E8%B0%83%E4%BC%98\"><\/span>Spark shuffle\u8c03\u4f18<span class=\"ez-toc-section-end\"><\/span><\/h4>\n<ol>\n<li>\u7a0b\u5e8f\u8c03\u4f18<\/li>\n<\/ol>\n<pre><code class=\"language-scala line-numbers\">\u2fb8\u5148\uff0c\u5c3d\u91cf\u51cf\u5c11shuffle\u6b21\u6570\uff1b\n\/\/\u4e24\u6b21shuffle\nrdd.map().repartition(1000).reduceByKey(_+_,3000)\n\/\/\u2f00\u6b21shuffle\nRdd.map().repartition(3000).reduceByKey(_+_)\n\u7136\u540e\u5fc5\u8981\u65f6\u4e3b\u52a8shuffle\uff0c\u901a\u5e38\u2f64\u4e8e\u6539\u53d8\u5e76\u2f8f\u5ea6\uff0c\u63d0\u2fbc\u540e\u7eed\u5206\u5e03\u5f0f\u8fd0\u2f8f\u901f\u5ea6\uff1b\nrdd.repartition(largerNumPartition).map()\n\u6700\u540e\uff0c\u4f7f\u2f64treeReduce&amp;treeAggregate\u66ff\u6362reduce&amp;aggregate\u3002\u6570\u636e\u91cf\u8f83\u2f24\u65f6\uff0creduce&amp;aggregate\u2f00\u6b21\u6027\u805a\n\u5408\uff0cshuffle\u91cf\u592a\u2f24\uff0c\u2f7dtreeReduce&amp;treeAggregate\u662f\u5206\u6279\u805a\u5408\uff0c\u66f4\u4e3a\u4fdd\u9669\n<\/code><\/pre>\n<ol start=\"2\">\n<li>\u53c2\u6570\u8c03\u4f18<\/li>\n<\/ol>\n<ul>\n<li>\u51cf\u5c11\u2f79\u7edc\u4f20\u8f93\u7684\u6b21\u6570<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">spark.reducer.maxSizeFlight:reduce task\u53bb\u78c1\u76d8\u62c9\u53d6\u6570\u636e\n\u9ed8\u8ba4\u503c\uff1a48m\n\u53c2\u6570\u8bf4\u660e\uff1a\u8be5\u53c2\u6570\u2f64\u4e8e\u8bbe\u7f6eshuffle read task\u7684buffer\u7f13\u51b2\u2f24\u2f29\uff0c\u2f7d\u8fd9\u4e2abuffer\u7f13\u51b2\u51b3\u5b9a\u4e86\u6bcf\u6b21\u80fd\u591f\u62c9\u53d6\u591a\u5c11\u6570\u636e\u3002\n\u8c03\u4f18\u5efa\u8bae\uff1a\u5982\u679c\u4f5c\u4e1a\u53ef\u2f64\u7684\u5185\u5b58\u8d44\u6e90\u8f83\u4e3a\u5145\u2f9c\u7684\u8bdd\uff0c\u53ef\u4ee5\u589e\u52a0\u8fd9\u4e2a\u53c2\u6570\u7684\u2f24\u2f29(\u2f50\u598296M)\uff0c\u4ece\u2f7d\u51cf\u5c11\u62c9\u53d6\u6570\u636e\u7684\u6b21\u6570\uff0c\u4e5f\u5c31\u53ef\u4ee5\u51cf\u5c11\u2f79\u7edc\u4f20\u8f93\u7684\u6b21\u6570\uff0c\u8fdb\u2f7d\u63d0\u5347\u6027\u80fd\u3002\u5728\u5b9e\u8df5\u4e2d\u53d1\u73b0\uff0c\u5408\u7406\u8c03\u8282\u8be5\u53c2\u6570\uff0c\u6027\u80fd\u4f1a\u67091\u52305%\u7684\u63d0\u5347\u3002\n<\/code><\/pre>\n<ul>\n<li>\u5bf9\u4e8e\u4f20\u8f93\u5927\u6570\u636e\u91cf\u8c03\u8282\u8be5\u53c2\u6570\u53ef\u4ee5\u2f24\u5e45\u5ea6\u63d0\u5347\u7a33\u5b9a\u6027<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">Spark.shuffle.io.maxRetries\n\u9ed8\u8ba4\u503c\uff1a3\n\u53c2\u6570\u8bf4\u660e\uff1ashuffle read task\u4eceshuffle write task\u6240\u5728\u8282\u70b9\u62c9\u53d6\u5c5e\u4e8e\u2f83\u2f30\u7684\u6570\u636e\u65f6\uff0c\u5982\u679c\u56e0\u4e3a\u2f79\u7edc\u5f02\u5e38\u5bfc\u81f4\u62c9\u53d6\u5931\u8d25\uff0c\u65f6\u4f1a\u2f83\u52a8\u8fdb\u2f8f\u91cd\u8bd5\u7684\u3002\u8be5\u53c2\u6570\u5c31\u4ee3\u8868\u4e86\u53ef\u4ee5\u91cd\u8bd5\u7684\u6700\u2f24\u6b21\u6570\uff0c\u5982\u679c\u5728\u6307\u5b9a\u6b21\u6570\u5185\u62c9\u53d6\u5c5e\u4e8e\u8fd8\u662f\u6ca1\u6709\u6210\u529f\uff0c\u5c31\u53ef\u80fd\u4f1a\u5bfc\u81f4\u4f5c\u4e1a\u6267\u2f8f\u5931\u8d25\u3002\n\u8c03\u4f18\u5efa\u8bae\uff1a\u5bf9\u4e8e\u90a3\u4e9b\u5305\u542b\u4e86\u7279\u522b\u8017\u65f6\u7684shuffle\u64cd\u4f5c\u7684\u4f5c\u4e1a\uff0c\u5efa\u8bae\u589e\u52a0\u91cd\u8bd5\u6700\u2f24\u6b21\u6570(\u2f50\u59826\u6b21)\uff0c\u53ef\u4ee5\u907f\u514d\u7531\u4e8eJVM\u7684full gc\u6216\u8005\u2f79\u7edc\u4e0d\u7a33\u5b9a\u7b49\u56e0\u7d20\u5bfc\u81f4\u7684\u6570\u636e\u62c9\u53d6\u5931\u8d25\u3002\u5728\u5b9e\u8df5\u4e2d\u53d1\u73b0\uff0c\u5bf9\u4e8e\u8d85\u2f24\u6570\u636e\u91cf(\u6570\u2f17\u4ebf\u5230\u4e0a\u767e\u4ebf)\u7684shuffle\u8fc7\u7a0b\uff0c\u8c03\u8282\u8be5\u53c2\u6570\u53ef\u4ee5\u2f24\u5e45\u5ea6\u63d0\u5347\u7a33\u5b9a\u6027\u3002\n<\/code><\/pre>\n<ul>\n<li>\u589e\u52a0shuffle\u64cd\u4f5c\u7684\u7a33\u5b9a\u6027\u3002<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">Spark.shuffle.io.retryWait\n\u9ed8\u8ba4\u503c\uff1a5s\n\u53c2\u6570\u8bf4\u660e\uff1ashuffle read task\u4eceshuffle write task\u6240\u5728\u8282\u70b9\u62c9\u53d6\u5c5e\u4e8e\u2f83\u2f30\u7684\u6570\u636e\u65f6\uff0c\u5982\u679c\u62c9\u53d6\u5931\u8d25\u4e86\u6bcf\u6b21\u91cd\u8bd5\u62c9\u53d6\u6570\u636e\u7684\u7b49\u5f85\u65f6\u95f4\u95f4\u9694\uff0c\u9ed8\u8ba4\u662f5s\uff1b\n\u8c03\u4f18\u5efa\u8bae\uff1a\u5efa\u8bae\u52a0\u2f24\u65f6\u95f4\u95f4\u9694\u65f6\u957f\uff0c\u2f50\u598260s\uff0c\u4ee5\u589e\u52a0shuffle\u64cd\u4f5c\u7684\u7a33\u5b9a\u6027\u3002\n<\/code><\/pre>\n<ul>\n<li>\u907f\u514d\u7531\u4e8e\u5185\u5b58\u4e0d\u2f9c\u5bfc\u81f4\u805a\u5408\u8fc7\u7a0b\u4e2d\u9891\u7e41\u8bfb\u5199\u78c1\u76d8<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">spark.shuffle.memoryFraction\n\u9ed8\u8ba4\u503c\uff1a0.2\n\u53c2\u6570\u8bf4\u660e\uff1a\u8be5\u53c2\u6570\u4ee3\u8868\u4e86executor\u5185\u5b58\u4e2d\uff0c\u5206\u914d\u7ed9shuffle read task\u8fdb\u2f8f\u805a\u5408\u64cd\u4f5c\u7684\u5185\u5b58\u2f50\u4f8b\uff0c\u9ed8\u8ba4\u662f20%\uff1b\n\u8c03\u4f18\u5efa\u8bae\uff1a\u5982\u679c\u5185\u5b58\u5145\u2f9c\uff0c\u2f7d\u4e14\u5f88\u5c11\u4f7f\u2f64\u6301\u4e45\u5316\u64cd\u4f5c\uff0c\u5efa\u8bae\u8c03\u2fbc\u8fd9\u4e2a\u2f50\u4f8b\uff0c\u7ed9shuffle read\u7684\u805a\u5408\u64cd\u4f5c\u66f4\u591a\u5185\u5b58\uff0c\u4ee5\u907f\u514d\u7531\u4e8e\u5185\u5b58\u4e0d\u2f9c\u5bfc\u81f4\u805a\u5408\u8fc7\u7a0b\u4e2d\u9891\u7e41\u8bfb\u5199\u78c1\u76d8\u3002\u5728\u5b9e\u8df5\u4e2d\u53d1\u73b0\uff0c\u5408\u7406\u8c03\u8282\u8be5\u53c2\u6570\u53ef\u4ee5\u5c06\u6027\u80fd\u63d0\u534710%\u3002\n<\/code><\/pre>\n<ul>\n<li>\u63d0\u4f9b\u8f83\u597d\u7684\u78c1\u76d8\u8bfb\u5199\u6027\u80fd\u3002<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">Spark.shuffle.manager\n\u9ed8\u8ba4\u503c\uff1asort\n\u53c2\u6570\u8bf4\u660e\uff1a\u8be5\u53c2\u6570\u2f64\u4e8e\u8bbe\u7f6eshuffleManager\u7684\u7c7b\u578b\u3002Spark1.5\u4ee5\u540e\u6709\u4e09\u4e2a\u53ef\u9009\u9879\uff1ahash\u3001sort\u548ctungsten\u0002sort\u3002Tungsten-sort\u4e0esort\u7c7b\u4f3c\uff0c\u4f46\u662f\u4f7f\u2f64\u4e86tungsten\u8ba1\u5212\u4e2d\u7684\u5806\u5916\u5185\u5b58\u7ba1\u7406\u673a\u5236\uff0c\u5185\u5b58\u4f7f\u2f64\u6548\u7387\u63d0\u2fbc\u3002\n\u8c03\u4f18\u5efa\u8bae\uff1a\u7531\u4e8esort shuffleManager\u9ed8\u8ba4\u4f1a\u5bf9\u6570\u636e\u8fdb\u2f8f\u6392\u5e8f\uff0c\u56e0\u6b64\u5982\u679c\u4f60\u7684\u4e1a\u52a1\u903b\u8f91\u4e2d\u9700\u8981\u8be5\u6392\u5e8f\u673a\u5236\u7684\u8bdd\uff0c\u5219\u4f7f\u2f64\u9ed8\u8ba4\u7684sort ShuffleManager\u5c31\u53ef\u4ee5\uff1b\u4f46\u662f\u5982\u679c\u4f60\u7684\u4e1a\u52a1\u903b\u8f91\u4e0d\u9700\u8981\u5bf9\u6570\u636e\u8fdb\u2f8f\u6392\u5e8f\uff0c\u90a3\u4e48\u5efa\u8bae\u53c2\u8003\u540e\u2faf\u7684\u2f0f\u4e2a\u53c2\u6570\u8c03\u4f18\uff0c\u901a\u8fc7bypass\u673a\u5236\u6216\u4f18\u5316\u7684hash ShuffleManager\u6765\u907f\u514d\u6392\u5e8f\u64cd\u4f5c\uff0c\u540c\u65f6\u63d0\u4f9b\u8f83\u597d\u7684\u78c1\u76d8\u8bfb\u5199\u6027\u80fd\u3002\u8fd9\u2fa5\u8981\u6ce8\u610f\u7684\u662f\uff0ctungsten-sort\u8981\u614e\u2f64\uff0c\u56e0\u4e3a\u4e4b\u524d\u53d1\u73b0\u4e86\u2f00\u4e9b\u76f8\u5e94\u7684bug\u3002\nSpark.shuffle.sort.bypassMergeThreshold\n<\/code><\/pre>\n<ul>\n<li>\u51cf\u5c11\u4e86\u6392\u5e8f\u7684\u6027\u80fd\u5f00\u9500<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">\u9ed8\u8ba4\u503c\uff1a200\n\u53c2\u6570\u8bf4\u660e\uff1a\u5f53shuffleManager\u4e3asortshuffleManager\u65f6\uff0c\u5982\u679cshuffle read task\u7684\u6570\u91cf\u2f29\u4e8e\u8fd9\u4e2a\u9608\u503c\uff0c\u5219shuffle write\u8fc7\u7a0b\u4e2d\u4e0d\u4f1a\u8fdb\u2f8f\u6392\u5e8f\u64cd\u4f5c\uff0c\u2f7d\u662f\u76f4\u63a5\u6309\u7167\u672a\u7ecf\u4f18\u5316hashShuffleManager\u7684\u2f45\u5f0f\u53bb\u5199\u6570\u636e\uff0c\u4f46\u662f\u6700\u540e\u4f1a\u5c06\u6bcf\u4e2atask\u4ea7\u2f63\u7684\u6240\u6709\u4e34\u65f6\u78c1\u76d8\u2f42\u4ef6\u90fd\u5408\u5e76\u6210\u2f00\u4e2a\u2f42\u4ef6\uff0c\u5e76\u4f1a\u521b\u5efa\u5355\u72ec\u7684\u7d22\u5f15\u2f42\u4ef6\u3002\n\u8c03\u4f18\u5efa\u8bae\uff1a\u5f53\u4f60\u4f7f\u2f64sortShuffleManager\u65f6\uff0c\u5982\u679c\u7684\u786e\u4e0d\u9700\u8981\u6392\u5e8f\u64cd\u4f5c\uff0c\u90a3\u4e48\u5efa\u8bae\u5c06\u8fd9\u4e2a\u53c2\u6570\u8c03\u2f24\u2f00\u4e9b\uff0c\u2f24\u4e8eshuffleread task\u7684\u6570\u91cf\uff0c\u90a3\u4e48\u6b64\u65f6\u5c31\u4f1a\u2f83\u52a8\u542f\u2f64bupass\u673a\u5236\uff0cmap-side\u5c31\u4e0d\u4f1a\u8fdb\u2f8f\u6392\u5e8f\uff0c\u51cf\u5c11\u4e86\u6392\u5e8f\u7684\u6027\u80fd\u5f00\u9500\u3002\u4f46\u662f\u8fd9\u79cd\u2f45\u5f0f\u4e0b\uff0c\u4f9d\u7136\u4f1a\u4ea7\u2f63\u2f24\u91cf\u7684\u78c1\u76d8\u2f42\u4ef6\uff0c\u56e0\u6b64shuffle write\u6027\u80fd\u6709\u5f85\u63d0\u2fbc\u3002\n<\/code><\/pre>\n<ul>\n<li>\u6781\u2f24\u5730\u51cf\u5c11\u78c1\u76d8IO\u5f00\u9500<\/li>\n<\/ul>\n<pre><code class=\"language-scala line-numbers\">Spark.shuffle.consolidateFiles\n\u9ed8\u8ba4\u503c\uff1afalse\n\u53c2\u6570\u8bf4\u660e\uff1a\u5982\u679c\u4f7f\u2f64hashShuffleManager\uff0c\u8be5\u53c2\u6570\u6709\u6548\u3002\u5982\u679c\u8bbe\u7f6e\u4e3atrue\uff0c\u90a3\u4e48\u5c31\u4f1a\u5f00\u542fconsilidate\u673a\u5236\uff0c\u4f1a\u2f24\u5e45\u5ea6\u5408\u5e76shuflle write\u7684\u8f93\u51fa\u2f42\u4ef6\uff0c\u5bf9\u4e8eshuffle read task\u6570\u91cf\u7279\u522b\u591a\u7684\u60c5\u51b5\u4e0b\uff0c\u8fd9\u79cd\u2f45\u6cd5\u53ef\u4ee5\u6781\u2f24\u5730\u51cf\u5c11\u78c1\u76d8IO\u5f00\u9500\uff0c\u63d0\u5347\u6027\u80fd\u3002\n\u8c03\u4f18\u5efa\u8bae\uff1a\u5982\u679c\u7684\u786e\u4e0d\u9700\u8981sortHashShuffle\u7684\u6392\u5e8f\u673a\u5236\uff0c\u90a3\u4e48\u9664\u4e86\u4f7f\u2f64bypass\u673a\u5236\uff0c\u8fd8\u53ef\u4ee5\u5c1d\u8bd5\n\u5c06spark.shuffle.manager\u53c2\u6570\u2f3f\u52a8\u8c03\u8282\u4e3ahash\uff0c\u4f7f\u2f64hashShuffleManager\uff0c\u540c\u65f6\u5f00\u542fconsolidate\u673a\u5236\u3002\u5728\u5b9e\u8df5\u4e2d\u5c1d\u8bd5\u8fc7\uff0c\u53d1\u73b0\u5176\u6027\u80fd\u2f50\u5f00\u542f\u4e86bypass\u673a\u5236\u7684sortshuffleManager\u8981\u2fbc\u51fa10%\u523030%\u3002\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>Scala\u7684\u4e00\u4e9b\u77e5\u8bc6 1\u3001Scala\u548cJava\u7684\u533a\u522b \u5bf9\u6bd4\u7684\u5185\u5bb9 Scala java \u53d8\u91cf\u58f0\u660e \u53ea\u9700\u8981\u58f0\u660e\u662f [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[7],"tags":[],"_links":{"self":[{"href":"http:\/\/www.specialwu.com\/index.php?rest_route=\/wp\/v2\/posts\/844"}],"collection":[{"href":"http:\/\/www.specialwu.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.specialwu.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.specialwu.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.specialwu.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=844"}],"version-history":[{"count":49,"href":"http:\/\/www.specialwu.com\/index.php?rest_route=\/wp\/v2\/posts\/844\/revisions"}],"predecessor-version":[{"id":1959,"href":"http:\/\/www.specialwu.com\/index.php?rest_route=\/wp\/v2\/posts\/844\/revisions\/1959"}],"wp:attachment":[{"href":"http:\/\/www.specialwu.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=844"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.specialwu.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=844"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.specialwu.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=844"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}