danwei.txt 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. import org.apache.spark.sql.SparkSession
  2. object Danwei {
  3. def main(args: Array[String]): Unit = {
  4. System.setProperty("hadoop.home.dir","E:\\Scala\\hadoop-2.6.5")
  5. var ss = SparkSession.builder().appName("Test").master("local[*]").getOrCreate()
  6. var sc = ss.sparkContext
  7. var rdd1 = sc.textFile("./src/main/scala/data/danwei2.txt",1)
  8. var rdd2 = rdd1.map(f =>{
  9. val arr = f.split(",")
  10. arr
  11. })
  12. var acc = sc.longAccumulator("name")
  13. acc.isZero
  14. var rdd3 = rdd2.map(f => {
  15. if(f.length != 2){
  16. println("错误" + f.toBuffer)
  17. acc.add(1)
  18. println(acc.value)
  19. f
  20. }else if(f(1).contains("贴")){
  21. val source = "贴" + "/" + f(0).toString
  22. acc.add(1)
  23. source
  24. }else if(f(1).contains("枚")){
  25. val source = "枚" + "/" + f(0).toString
  26. acc.add(1)
  27. source
  28. }else if(f(1).contains("袋")){
  29. val source = "袋" + "/" + f(0).toString
  30. acc.add(1)
  31. source
  32. }else if(f(1).contains("粒")){
  33. val source = "粒" + "/" + f(0).toString
  34. acc.add(1)
  35. source
  36. }else if(f(1).contains("片")){
  37. val source = "片" + "/" + f(0).toString
  38. acc.add(1)
  39. source
  40. }else if(f(1).contains("丸")){
  41. val source = "丸" + "/" + f(0).toString
  42. acc.add(1)
  43. source
  44. }else if(f(1).contains("套装")){
  45. val source = "套装"
  46. acc.add(1)
  47. source
  48. }else if(f(1).contains("支")){
  49. val source = "支" + "/" +f(0).toString
  50. acc.add(1)
  51. source
  52. }else if(f(1).contains("ml")){
  53. val source = "ml" + "/" +f(0).toString
  54. acc.add(1)
  55. source
  56. }else if(f(1).contains("mg")){
  57. val source = "mg" + "/" +f(0).toString
  58. acc.add(1)
  59. source
  60. }else if(f(1).contains("g")){
  61. val source = "g" + "/" +f(0).toString
  62. acc.add(1)
  63. source
  64. }else if(f(1).contains("条")){
  65. val source = "条" + "/" +f(0).toString
  66. acc.add(1)
  67. source
  68. }else if(f(1).contains("张")){
  69. val source = "张" + "/" +f(0).toString
  70. acc.add(1)
  71. source
  72. }else if(f(1).contains("L")){
  73. val source = "L" + "/" +f(0).toString
  74. acc.add(1)
  75. source
  76. }else if(f(1).contains("瓶")){
  77. val source = "瓶" + "/" +f(0).toString
  78. acc.add(1)
  79. source
  80. }else if(f(1).contains("只")){
  81. val source = "只" + "/" +f(0).toString
  82. acc.add(1)
  83. source
  84. }else if(f(1).contains("包")){
  85. val source = "包" + "/" +f(0).toString
  86. acc.add(1)
  87. source
  88. }else if(f(1).contains("个")){
  89. val source = "个" + "/" +f(0).toString
  90. acc.add(1)
  91. source
  92. }else if(f(1).contains("给")){
  93. val source = "给" + "/" +f(0).toString
  94. acc.add(1)
  95. source
  96. }else if(f(1).contains("扎")){
  97. val source = "扎" + "/" +f(0).toString
  98. acc.add(1)
  99. source
  100. }else if(f(1).contains("吸")){
  101. val source = "吸" + "/" +f(0).toString
  102. acc.add(1)
  103. source
  104. }else if(f(1).contains("mm")){
  105. val source = "mm" + "/" +f(0).toString
  106. acc.add(1)
  107. source
  108. }else if(f(1).contains("人份")){
  109. val source = "人份" + "/" +f(0).toString
  110. acc.add(1)
  111. source
  112. }else if(f(1).contains("卷")){
  113. val source = "卷" + "/" +f(0).toString
  114. acc.add(1)
  115. source
  116. }else if(f(1).contains("把")){
  117. val source = "把" + "/" +f(0).toString
  118. acc.add(1)
  119. source
  120. }else if(f(1).contains("球")){
  121. val source = "球" + "/" +f(0).toString
  122. acc.add(1)
  123. source
  124. }else if(f(1).contains("根")){
  125. val source = "根" + "/" +f(0).toString
  126. acc.add(1)
  127. source
  128. }else if(f(1).contains("双")){
  129. val source = "双" + "/" +f(0).toString
  130. acc.add(1)
  131. source
  132. }else if(f(1).contains("件")){
  133. val source = "件" + "/" +f(0).toString
  134. acc.add(1)
  135. source
  136. }else if(f(1).contains("抽")){
  137. val source = "抽" + "/" +f(0).toString
  138. acc.add(1)
  139. source
  140. }else if(f(1).contains("cm")){
  141. val source = "cm" + "/" +f(0).toString
  142. acc.add(1)
  143. source
  144. }else if(f(1).contains("m")){
  145. val source = "m" + "/" +f(0).toString
  146. acc.add(1)
  147. source
  148. }else if(f(1).contains("单盘")){
  149. val source = "单盘" + "/" +f(0).toString
  150. acc.add(1)
  151. source
  152. }else if(f(1).contains("块")){
  153. val source = "块" + "/" +f(0).toString
  154. acc.add(1)
  155. source
  156. }else if(f(1).contains("双")){
  157. val source = "双" + "/" +f(0).toString
  158. acc.add(1)
  159. source
  160. }else if(f(1).contains("克")){
  161. val source = "克" + "/" +f(0).toString
  162. acc.add(1)
  163. source
  164. }else if(f(1).contains("幅")){
  165. val source = "幅" + "/" +f(0).toString
  166. acc.add(1)
  167. source
  168. }else if(f(1).contains("公分")){
  169. val source = "公分" + "/" +f(0).toString
  170. acc.add(1)
  171. source
  172. }else if(f(1).contains("头")){
  173. val source = "头" + "/" +f(0).toString
  174. acc.add(1)
  175. source
  176. }else if(f(1).contains("对")){
  177. val source = "对" + "/" +f(0).toString
  178. acc.add(1)
  179. source
  180. }else if(f(1).contains("G")){
  181. val source = "G" + "/" +f(0).toString
  182. acc.add(1)
  183. source
  184. }else if(f(1).contains("s")){
  185. val source = "s" + "/" +f(0).toString
  186. acc.add(1)
  187. source
  188. }else if(f(1).contains("副")){
  189. val source = "副" + "/" +f(0).toString
  190. acc.add(1)
  191. source
  192. }else if(f(1).contains("台")){
  193. val source = "台" + "/" +f(0).toString
  194. acc.add(1)
  195. source
  196. }else if(f(1).contains("帖")){
  197. val source = "帖" + "/" +f(0).toString
  198. acc.add(1)
  199. source
  200. }else if(f(1).contains("米")){
  201. val source = "米" + "/" +f(0).toString
  202. acc.add(1)
  203. source
  204. }else if(f(1).contains("套")){
  205. val source = "套" + "/" +f(0).toString
  206. acc.add(1)
  207. source
  208. }else if(f(1).contains("毫升")){
  209. val source = "毫升" + "/" +f(0).toString
  210. acc.add(1)
  211. source
  212. }else {
  213. println(f(1))
  214. acc.add(1)
  215. f(0)
  216. }
  217. })
  218. rdd3.collect()
  219. println(acc.value)
  220. rdd3.saveAsTextFile("./src/main/scala/data/outPut1")
  221. }
  222. }