1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- import org.apache.spark.SparkConf
- import org.apache.spark.sql.SparkSession
- import scala.util.matching.Regex
- object sum {
- def main(args: Array[String]): Unit = {
- System.setProperty("hadoop.home.dir","E:\\Scala\\hadoop-2.6.5")
- val ss = SparkSession.builder().appName("sum").master("local[*]").getOrCreate()
- val sc = ss.sparkContext
- var rdd1 = sc.textFile("./src/main/scala/data/file.txt",1)
- var rdd2 = rdd1.map( f => {
- var splits = f.split("\\*")
- var str1 = new Regex("\\d{1,}")
- if (!f.contains("*")){
- var str = str1 findFirstIn (f)
- var str2 = str.getOrElse(-1).toString().toInt
- if(str2 == -1){
- f
- }else{
- str2
- }
- } else if (f.contains("*")){
- if (splits.length == 2) {
- var resource1 = (str1 findFirstIn (splits(1))).getOrElse(-1).toString().toInt
- if(resource1 == -1){
- f
- }else{
- resource1
- }
- } else if (splits.length == 3) {
- var resource2 = (str1 findFirstIn(splits(1))).getOrElse(-1).toString().toInt
- var resource3 = (str1 findFirstIn(splits(2))).getOrElse(-1).toString().toInt
- if (f.contains("贴")){
- resource3
- }else if(resource2 == -1 | resource3 == -1){
- f
- }else{
- var resource4 = resource2 * resource3
- resource4
- }
- }else if(splits.length == 4){
- var resource3 = (str1 findFirstIn(splits(2))).getOrElse(-1).toString().toInt
- var resource4 = (str1 findFirstIn(splits(3))).getOrElse(-1).toString().toInt
- var resource = resource3 * resource4
- resource
- }else{
- f
- }
- } else {
- f
- }
- })
- rdd2.saveAsTextFile("./src/main/scala/data/outPut")
- rdd2.foreach(println)
- }
- }
|