IDEA配置Scala编写Spark程序
pom.xml文件配置
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<spark.version>2.2.0</spark.version>
<scala.version>2.11.8</scala.version>
<hadoop.version>2.8.4</hadoop.version>
<encoding>UTF-8</encoding>
</properties>
<dependencies>
<!-- scala的依赖导入 -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!-- spark的依赖导入 -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- hadoop-client API 导入 -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- 在maven项目中既有java又有scala代码时配置 maven-scala-plugin 插件打包时可以将两类代码一起打包 -->
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- maven 打jar包需要插件 -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<!-- 设置false后是去掉 MySpark-1.0-SNAPSHOT-jar-with-dependencies.jar 后的 “-jar-with-dependencies” -->
<!--<appendAssemblyId>false</appendAssemblyId>-->
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.bjsxt.scalaspark.sql.windows.OverFunctionOnHive</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>assembly</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
运行时参数指定:
scala单词计数程序
import org.apache.spark.{SparkConf, SparkContext}
object SparkWordCount {
def main(args:Array[String]): Unit ={
//2. 设置参数 setAppName 设置程序名 setMaster 本地测试线程数 *多个
val conf = new SparkConf().setAppName("SparkWordCount").setMaster("local[2]")
//1. 创建spark执行程序的入口
val sc = new SparkContext(conf)
//3. 加载数据 args(0) args(1)
sc.textFile(args(0)).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
.sortBy(_._2,false)
.saveAsTextFile(args(1))
sc.stop()
}
}