vlambda博客
学习文章列表

IDEA配置Scala编写Spark程序

pom.xml文件配置

 <properties> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> <spark.version>2.2.0</spark.version> <scala.version>2.11.8</scala.version> <hadoop.version>2.8.4</hadoop.version> <encoding>UTF-8</encoding> </properties>
<dependencies> <!-- scala的依赖导入 --> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> </dependency> <!-- spark的依赖导入 --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>${spark.version}</version> </dependency> <!-- hadoop-client API 导入 --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> </dependencies>
<build> <plugins>
<!-- 在maven项目中既有java又有scala代码时配置 maven-scala-plugin 插件打包时可以将两类代码一起打包 --> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.2</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> </plugin>
<!-- maven 打jar包需要插件 --> <plugin> <artifactId>maven-assembly-plugin</artifactId> <version>2.4</version> <configuration> <!-- 设置false后是去掉 MySpark-1.0-SNAPSHOT-jar-with-dependencies.jar 后的 “-jar-with-dependencies” --> <!--<appendAssemblyId>false</appendAssemblyId>--> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <archive> <manifest> <mainClass>com.bjsxt.scalaspark.sql.windows.OverFunctionOnHive</mainClass> </manifest> </archive> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>assembly</goal> </goals> </execution> </executions> </plugin> </plugins>    </build>

运行时参数指定:

scala单词计数程序

import org.apache.spark.{SparkConf, SparkContext}
object SparkWordCount { def main(args:Array[String]): Unit ={ //2. 设置参数 setAppName 设置程序名 setMaster 本地测试线程数 *多个 val conf = new SparkConf().setAppName("SparkWordCount").setMaster("local[2]") //1. 创建spark执行程序的入口 val sc = new SparkContext(conf) //3. 加载数据 args(0) args(1)      sc.textFile(args(0)).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_) .sortBy(_._2,false) .saveAsTextFile(args(1)) sc.stop() }}