IDEA配置Scala编写Spark程序
pom.xml文件配置
<properties><maven.compiler.source>1.8</maven.compiler.source><maven.compiler.target>1.8</maven.compiler.target><spark.version>2.2.0</spark.version><scala.version>2.11.8</scala.version><hadoop.version>2.8.4</hadoop.version><encoding>UTF-8</encoding></properties><dependencies><!-- scala的依赖导入 --><dependency><groupId>org.scala-lang</groupId><artifactId>scala-library</artifactId><version>${scala.version}</version></dependency><!-- spark的依赖导入 --><dependency><groupId>org.apache.spark</groupId><artifactId>spark-core_2.11</artifactId><version>${spark.version}</version></dependency><!-- hadoop-client API 导入 --><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version></dependency></dependencies><build><plugins><!-- 在maven项目中既有java又有scala代码时配置 maven-scala-plugin 插件打包时可以将两类代码一起打包 --><plugin><groupId>org.scala-tools</groupId><artifactId>maven-scala-plugin</artifactId><version>2.15.2</version><executions><execution><goals><goal>compile</goal><goal>testCompile</goal></goals></execution></executions></plugin><!-- maven 打jar包需要插件 --><plugin><artifactId>maven-assembly-plugin</artifactId><version>2.4</version><configuration><!-- 设置false后是去掉 MySpark-1.0-SNAPSHOT-jar-with-dependencies.jar 后的 “-jar-with-dependencies” --><!--<appendAssemblyId>false</appendAssemblyId>--><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs><archive><manifest><mainClass>com.bjsxt.scalaspark.sql.windows.OverFunctionOnHive</mainClass></manifest></archive></configuration><executions><execution><id>make-assembly</id><phase>package</phase><goals><goal>assembly</goal></goals></execution></executions></plugin></plugins></build>
运行时参数指定:
scala单词计数程序
import org.apache.spark.{SparkConf, SparkContext}object SparkWordCount {def main(args:Array[String]): Unit ={//2. 设置参数 setAppName 设置程序名 setMaster 本地测试线程数 *多个val conf = new SparkConf().setAppName("SparkWordCount").setMaster("local[2]")//1. 创建spark执行程序的入口val sc = new SparkContext(conf)//3. 加载数据 args(0) args(1)sc.textFile(args(0)).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).sortBy(_._2,false).saveAsTextFile(args(1))sc.stop()}}
