Explorar el Código

获取订单数据中某单位药物的最小单位的总值

deng hace 5 años
padre
commit
b43aa846a2
Se han modificado 1 ficheros con 58 adiciones y 0 borrados
  1. 58 0
      Sum.txt

+ 58 - 0
Sum.txt

@@ -0,0 +1,58 @@
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+
+import scala.util.matching.Regex
+
+object sum {
+  def main(args: Array[String]): Unit = {
+    System.setProperty("hadoop.home.dir","E:\\Scala\\hadoop-2.6.5")
+    val ss = SparkSession.builder().appName("sum").master("local[*]").getOrCreate()
+    val sc = ss.sparkContext
+    var rdd1 = sc.textFile("./src/main/scala/data/file.txt",1)
+    var rdd2 = rdd1.map( f => {
+      var splits = f.split("\\*")
+      var str1 = new Regex("\\d{1,}")
+      if (!f.contains("*")){
+          var str = str1 findFirstIn (f)
+          var str2 = str.getOrElse(-1).toString().toInt
+          if(str2 == -1){
+            f
+          }else{
+            str2
+          }
+      } else if (f.contains("*")){
+        if (splits.length == 2) {
+          var resource1 = (str1 findFirstIn (splits(1))).getOrElse(-1).toString().toInt
+          if(resource1 == -1){
+            f
+          }else{
+            resource1
+          }
+        } else if (splits.length == 3) {
+          var resource2 = (str1 findFirstIn(splits(1))).getOrElse(-1).toString().toInt
+          var resource3 = (str1 findFirstIn(splits(2))).getOrElse(-1).toString().toInt
+          if (f.contains("贴")){
+            resource3
+          }else if(resource2 == -1 | resource3 == -1){
+            f
+          }else{
+            var resource4 = resource2 * resource3
+            resource4
+          }
+        }else if(splits.length == 4){
+          var resource3 = (str1 findFirstIn(splits(2))).getOrElse(-1).toString().toInt
+          var resource4 = (str1 findFirstIn(splits(3))).getOrElse(-1).toString().toInt
+          var resource = resource3 * resource4
+          resource
+        }else{
+          f
+        }
+      } else {
+        f
+      }
+    })
+
+    rdd2.saveAsTextFile("./src/main/scala/data/outPut")
+    rdd2.foreach(println)
+  }
+}