Skip to content

Commit b859565

Browse files
committed
Initial commit.
0 parents  commit b859565

File tree

5 files changed

+229
-0
lines changed

5 files changed

+229
-0
lines changed

pom.xml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
2+
<modelVersion>4.0.0</modelVersion>
3+
<groupId>com.cloudera</groupId>
4+
<artifactId>gapdeduce</artifactId>
5+
<version>1.0-SNAPSHOT</version>
6+
<name>Gap Deduce</name>
7+
<dependencies>
8+
<dependency>
9+
<groupId>org.apache.hadoop</groupId>
10+
<artifactId>hadoop-client</artifactId>
11+
<version>2.0.0-cdh4.1.0</version>
12+
</dependency>
13+
14+
<!-- For unit testing -->
15+
<dependency>
16+
<groupId>org.apache.mrunit</groupId>
17+
<artifactId>mrunit</artifactId>
18+
<version>0.8.0-incubating</version>
19+
</dependency>
20+
</dependencies>
21+
22+
<build>
23+
<plugins>
24+
<!-- Tells maven to use the Java 6 JDK instead of its default -->
25+
<plugin>
26+
<groupId>org.apache.maven.plugins</groupId>
27+
<artifactId>maven-compiler-plugin</artifactId>
28+
<version>2.3.1</version>
29+
<configuration>
30+
<source>1.6</source>
31+
<target>1.6</target>
32+
</configuration>
33+
</plugin>
34+
</plugins>
35+
</build>
36+
37+
<repositories>
38+
<repository>
39+
<id>maven-hadoop</id>
40+
<name>Hadoop Releases</name>
41+
<url>https://repository.cloudera.com/content/repositories/releases/</url>
42+
</repository>
43+
<repository>
44+
<id>cloudera-repos</id>
45+
<name>Cloudera Repos</name>
46+
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
47+
</repository>
48+
</repositories>
49+
</project>

src/main/java/Deducer.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import java.io.IOException;
20+
import java.util.*;
21+
22+
import org.apache.hadoop.io.*;
23+
import org.apache.hadoop.mapred.*;
24+
import org.apache.hadoop.util.*;
25+
26+
public class Deducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
27+
28+
public void reduce(Text key, Iterator<Text> values,
29+
OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
30+
Set<String> attackers = new TreeSet<String>();
31+
while (values.hasNext()) {
32+
String valStr = values.next().toString();
33+
attackers.add(valStr);
34+
}
35+
output.collect(key, new Text(attackers.toString()));
36+
}
37+
}
38+

src/main/java/GapDeduceRunner.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import java.io.IOException;
20+
21+
import org.apache.hadoop.fs.Path;
22+
import org.apache.hadoop.conf.*;
23+
import org.apache.hadoop.io.*;
24+
import org.apache.hadoop.mapred.*;
25+
import org.apache.hadoop.util.*;
26+
27+
public class GapDeduceRunner {
28+
public static void main(String[] args) throws IOException {
29+
JobConf conf = new JobConf(GapDeduceRunner.class);
30+
conf.setJobName("gapdeduce");
31+
32+
conf.setMapOutputKeyClass(Text.class);
33+
conf.setMapOutputValueClass(Text.class);
34+
35+
conf.setOutputKeyClass(Text.class);
36+
conf.setOutputValueClass(Text.class);
37+
38+
conf.setMapperClass(Gapper.class);
39+
conf.setReducerClass(Deducer.class);
40+
41+
// KeyValueTextInputFormat treats each line as an input record,
42+
// and splits the line by the tab character to separate it into key and value
43+
conf.setInputFormat(KeyValueTextInputFormat.class);
44+
conf.setOutputFormat(TextOutputFormat.class);
45+
46+
FileInputFormat.setInputPaths(conf, new Path(args[0]));
47+
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
48+
49+
JobClient.runJob(conf);
50+
}
51+
}

src/main/java/Gapper.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import java.io.IOException;
20+
21+
import org.apache.hadoop.io.*;
22+
import org.apache.hadoop.mapred.*;
23+
import org.apache.hadoop.util.*;
24+
25+
public class Gapper extends MapReduceBase implements Mapper<Text, Text, Text, Text> {
26+
public void map(Text attacker, Text victim, OutputCollector<Text, Text> output,
27+
Reporter reporter) throws IOException {
28+
output.collect(victim, attacker);
29+
}
30+
}
31+

src/test/java/TestGapDeduce.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import java.util.ArrayList;
20+
import java.util.List;
21+
22+
import org.apache.hadoop.io.Text;
23+
import org.apache.hadoop.mrunit.MapDriver;
24+
import org.apache.hadoop.mrunit.MapReduceDriver;
25+
import org.apache.hadoop.mrunit.ReduceDriver;
26+
import org.junit.Before;
27+
import org.junit.Test;
28+
29+
public class TestGapDeduce {
30+
31+
private MapDriver<Text, Text, Text, Text> mapDriver;
32+
private ReduceDriver<Text, Text, Text, Text> reduceDriver;
33+
34+
@Before
35+
public void setUp() {
36+
Gapper mapper = new Gapper();
37+
Deducer reducer = new Deducer();
38+
mapDriver = MapDriver.newMapDriver(mapper);;
39+
reduceDriver = ReduceDriver.newReduceDriver(reducer);
40+
}
41+
42+
@Test
43+
public void testMapper() {
44+
mapDriver.withInput(new Text("sanford"), new Text("sage"));
45+
mapDriver.withOutput(new Text("sage"), new Text("sanford"));
46+
mapDriver.runTest();
47+
}
48+
49+
@Test
50+
public void testReducer() {
51+
List<Text> values = new ArrayList<Text>();
52+
values.add(new Text("sage"));
53+
values.add(new Text("ian"));
54+
values.add(new Text("sage"));
55+
reduceDriver.withInput(new Text("sanford"), values);
56+
reduceDriver.withOutput(new Text("sanford"),
57+
new Text("[ian, sage]"));
58+
reduceDriver.runTest();
59+
}
60+
}

0 commit comments

Comments
 (0)