分类:hadoop| 发布时间:2018-04-23 00:05:00
MRUnit 是一个测试库,它便于将已知的输入传递给 mapper 或者检查 reducer 的输出是否符合预期。 MRUnit 与标准的执行框架(如 JUnit)一起使用,因此可以将 MapReduce 作业的测试作为正常开发环境的一部分运行。
本项目使用 mvn 进行构建和测试,假设你已经在机器上安装好 maven 了。
首先使用:
% mvn archetype:generate
Choose a number or apply filter (format: [groupId:]artifactId, case sensitive contains): 1173: 1172
Choose org.apache.maven.archetypes:maven-archetype-quickstart version:
1: 1.0-alpha-1
2: 1.0-alpha-2
3: 1.0-alpha-3
4: 1.0-alpha-4
5: 1.0
6: 1.1
7: 1.3
Choose a number: 7: 1
Define value for property 'groupId': wordcount-mrunit
Define value for property 'artifactId': wordcount-mruint
Define value for property 'version' 1.0-SNAPSHOT: :
Define value for property 'package' wordcount-mrunit: :
Confirm properties configuration:
groupId: wordcount-mrunit
artifactId: wordcount-mruint
version: 1.0-SNAPSHOT
package: wordcount-mrunit
Y: y
删除自动生成的代码:
% rm -rf src/main/java/wordcount-mrunit/
% rm -rf src/test/java/wordcount-mrunit/
添加以下内容到 src/main/java/WordCountMapper.java 中:
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final IntWritable ONE = new IntWritable(1);
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String s = value.toString();
for (String word : s.split("\\W+")) {
if (word.length() > 0) {
context.write(new Text(word), ONE);
}
}
}
}
修改 pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>wordcount-mrunit</groupId>
<artifactId>wordcount-mrunit</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>wordcount with mrunit example</name>
<url>http://maven.apache.org</url>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
编译:
% mvn compile
添加以下内容到 src/test/java/WordCountMapperTest.java 文件中:
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.junit.*;
public class WordCountMapperTest {
MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
@Before
public void setUp() throws Exception, InterruptedException {
WordCountMapper mapper = new WordCountMapper();
mapDriver = new MapDriver<LongWritable, Text, Text, IntWritable>();
mapDriver.setMapper(mapper);
}
@Test
public void testMapperWithCatsAndDogs() throws Exception, InterruptedException {
mapDriver.withInput(new LongWritable(1), new Text("cat cat dog"));
mapDriver.withOutput(new Text("cat"), new IntWritable(1));
mapDriver.withOutput(new Text("cat"), new IntWritable(1));
mapDriver.withOutput(new Text("dog"), new IntWritable(1));
mapDriver.runTest();
}
@Test
public void testMapperWithHorsesAndZebras() throws Exception, InterruptedException {
mapDriver.withInput(new LongWritable(1), new Text("horse horse zebra"));
mapDriver.withOutput(new Text("horse"), new IntWritable(1));
mapDriver.withOutput(new Text("horse"), new IntWritable(1));
mapDriver.withOutput(new Text("zebra"), new IntWritable(1));
mapDriver.runTest();
}
}
修改 pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>wordcount-mrunit</groupId>
<artifactId>wordcount-mrunit</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>wordcount with mrunit example</name>
<url>http://maven.apache.org</url>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.0.3</version>
</dependency>
<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>1.1.0</version>
<classifier>hadoop1</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<version>1.3</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
使用以下命令运行测试:
% mvn test
[INFO] Scanning for projects...
[INFO]
[INFO] ------------------------------------------------------------------------
[INFO] Building wordcount with mrunit example 1.0-SNAPSHOT
[INFO] ------------------------------------------------------------------------
[INFO]
[INFO] --- maven-resources-plugin:2.3:resources (default-resources) @ wordcount-mrunit ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/ubuntu/src/wordcount-mrunit/src/main/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:compile (default-compile) @ wordcount-mrunit ---
[INFO] Nothing to compile - all classes are up to date
[INFO]
[INFO] --- maven-resources-plugin:2.3:testResources (default-testResources) @ wordcount-mrunit ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/ubuntu/src/wordcount-mrunit/src/test/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:testCompile (default-testCompile) @ wordcount-mrunit ---
[INFO] Nothing to compile - all classes are up to date
[INFO]
[INFO] --- maven-surefire-plugin:2.10:test (default-test) @ wordcount-mrunit ---
[INFO] Surefire report directory: /home/ubuntu/src/wordcount-mrunit/target/surefire-reports
-------------------------------------------------------
T E S T S
-------------------------------------------------------
Running WordCountMapperTest
Tests run: 2, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.286 sec
Results :
Tests run: 2, Failures: 0, Errors: 0, Skipped: 0
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 1.200s
[INFO] Finished at: Mon Apr 23 13:39:52 CST 2018
[INFO] Final Memory: 10M/481M
[INFO] ------------------------------------------------------------------------
添加以下内容到 src/main/java/WordCountReducer.java 中:
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int total = 0;
for (IntWritable value : values) {
total += value.get();
}
context.write(key, new IntWritable(total));
}
}
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.*;
public class WordCountReducerTest {
ReduceDriver<Text, IntWritable, Text, IntWritable> reduceDriver;
@Before
public void setUp() throws Exception, InterruptedException {
WordCountReducer reducer = new WordCountReducer();
reduceDriver = new ReduceDriver<Text, IntWritable, Text, IntWritable>();
reduceDriver.setReducer(reducer);
}
@Test
public void testReducerWithCats() throws Exception, InterruptedException {
List<IntWritable> values = new ArrayList<IntWritable>();
values.add(new IntWritable(1));
values.add(new IntWritable(1));
reduceDriver.withInput(new Text("cat"), values);
reduceDriver.withOutput(new Text("cat"), new IntWritable(2));
reduceDriver.runTest();
}
@Test
public void testReducerWithDogs() throws Exception, InterruptedException {
List<IntWritable> values = new ArrayList<IntWritable>();
values.add(new IntWritable(1));
values.add(new IntWritable(1));
reduceDriver.withInput(new Text("dog"), values);
reduceDriver.withOutput(new Text("dog"), new IntWritable(2));
reduceDriver.runTest();
}
@Test
public void testReducerWithHorses() throws Exception, InterruptedException {
List<IntWritable> values = new ArrayList<IntWritable>();
values.add(new IntWritable(1));
values.add(new IntWritable(1));
reduceDriver.withInput(new Text("horse"), values);
reduceDriver.withOutput(new Text("horse"), new IntWritable(2));
reduceDriver.runTest();
}
}
使用以下命令运行测试:
% mvn test
canning for projects...
[INFO]
[INFO] ------------------------------------------------------------------------
[INFO] Building wordcount with mrunit example 1.0-SNAPSHOT
[INFO] ------------------------------------------------------------------------
[INFO]
[INFO] --- maven-resources-plugin:2.3:resources (default-resources) @ wordcount-mrunit ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/ubuntu/src/wordcount-mrunit/src/main/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:compile (default-compile) @ wordcount-mrunit ---
[INFO] Nothing to compile - all classes are up to date
[INFO]
[INFO] --- maven-resources-plugin:2.3:testResources (default-testResources) @ wordcount-mrunit ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/ubuntu/src/wordcount-mrunit/src/test/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:testCompile (default-testCompile) @ wordcount-mrunit ---
[INFO] Changes detected - recompiling the module!
[WARNING] File encoding has not been set, using platform encoding UTF-8, i.e. build is platform dependent!
[INFO] Compiling 2 source files to /home/ubuntu/src/wordcount-mrunit/target/test-classes
[INFO]
[INFO] --- maven-surefire-plugin:2.10:test (default-test) @ wordcount-mrunit ---
[INFO] Surefire report directory: /home/ubuntu/src/wordcount-mrunit/target/surefire-reports
-------------------------------------------------------
T E S T S
-------------------------------------------------------
Running WordCountMapperTest
Tests run: 2, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.281 sec
Running WordCountReducerTest
Tests run: 3, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.04 sec
Results :
Tests run: 5, Failures: 0, Errors: 0, Skipped: 0
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 1.994s
[INFO] Finished at: Mon Apr 23 13:50:41 CST 2018
[INFO] Final Memory: 17M/350M
[INFO] ------------------------------------------------------------------------
可以使用 MapReduceDriver 将 map 和 reduce 结合起来一起测试,添加以下代码到 src/test/java/WordCountMapReduceTest.java 中:
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.junit.*;
public class WordCountMapReduceTest {
` MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable> mapReduceDriver;
@Before
public void setUp() throws Exception, InterruptedException {
WordCountMapper mapper = new WordCountMapper();
WordCountReducer reducer = new WordCountReducer();
mapReduceDriver = new MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable>();
mapReduceDriver.setMapper(mapper);
mapReduceDriver.setReducer(reducer);
}
@Test
public void testMapReduceWithCatsAndDogs() throws Exception, InterruptedException {
mapReduceDriver.withInput(new LongWritable(1), new Text("cat cat dog"));
mapReduceDriver.addOutput(new Text("cat"), new IntWritable(2));
mapReduceDriver.addOutput(new Text("dog"), new IntWritable(1));
mapReduceDriver.runTest();
}
@Test
public void testMapReduceWithHorsesAndZebras() throws Exception, InterruptedException {
mapReduceDriver.withInput(new LongWritable(1), new Text("horse zebra horse"));
mapReduceDriver.addOutput(new Text("horse"), new IntWritable(2));
mapReduceDriver.addOutput(new Text("zebra"), new IntWritable(1));
mapReduceDriver.runTest();
}
}
使用以下命令运行测试:
[INFO] Scanning for projects...
[INFO]
[INFO] ------------------------------------------------------------------------
[INFO] Building wordcount with mrunit example 1.0-SNAPSHOT
[INFO] ------------------------------------------------------------------------
[INFO]
[INFO] --- maven-resources-plugin:2.3:resources (default-resources) @ wordcount-mrunit ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/ubuntu/src/wordcount-mrunit/src/main/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:compile (default-compile) @ wordcount-mrunit ---
[INFO] Nothing to compile - all classes are up to date
[INFO]
[INFO] --- maven-resources-plugin:2.3:testResources (default-testResources) @ wordcount-mrunit ---
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /home/ubuntu/src/wordcount-mrunit/src/test/resources
[INFO]
[INFO] --- maven-compiler-plugin:3.1:testCompile (default-testCompile) @ wordcount-mrunit ---
[INFO] Changes detected - recompiling the module!
[WARNING] File encoding has not been set, using platform encoding UTF-8, i.e. build is platform dependent!
[INFO] Compiling 3 source files to /home/ubuntu/src/wordcount-mrunit/target/test-classes
[INFO]
[INFO] --- maven-surefire-plugin:2.10:test (default-test) @ wordcount-mrunit ---
[INFO] Surefire report directory: /home/ubuntu/src/wordcount-mrunit/target/surefire-reports
-------------------------------------------------------
T E S T S
-------------------------------------------------------
Running WordCountMapperTest
Tests run: 2, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.296 sec
Running WordCountReducerTest
Tests run: 3, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.044 sec
Running WordCountMapReduceTest
Tests run: 2, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.026 sec
Results :
Tests run: 7, Failures: 0, Errors: 0, Skipped: 0
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 1.847s
[INFO] Finished at: Mon Apr 23 14:04:31 CST 2018
[INFO] Final Memory: 17M/367M
[INFO] ------------------------------------------------------------------------