1、实验3 MapReduce编程初级实践1. 实验目的1. 通过实验掌握基本的 Map Reduce编程方法;2. 掌握用MapReduce解决一些常见的数据处理问题,包括数据去重、数据排序和数据挖掘等。2. 实验平台已经配置完成的 Hadoop伪分布式环境。3. 实验内容和要求1. 编程实现文件合并和去重操作对于两个输入文件,即文件A和文件B,请编写MapReduce程序,对两个文件进行合并, 并剔除其中重复的内容,得到一个新的输出文件G下面是输入文件和输出文件的一个样例供参考。实验最终结果(合并的文件)園£J* Vs" TElD hd f 1Id Ih 11:9 OOO/

2、u set/w_ hd ri:/ Lu L d Ihu st .9000/u s20150101y20)5010)X2O15O1My|20150102y201501®X2O15O1Q3耳20150104z20150104y20150105y20150105z匸in 501%X代码如下:package com.Merge;import java.i o.I OExceptio n;import org.apache.hadoop.c onf.Con figurati on;import org.apache.hadoop.fs.Path;import org.apache.hadoop

3、.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.i nput.;import org.apache.hadoop.mapreduce.lib.output.;public class Merge public static class Map exte nds Mapper<Object, Te

4、xt, Text, Text private static Text text = new Text();public void map(Object key. Text value, Con text con text) throws IOExceptio n. In terruptedExcepti on text = value;context.write(text, new Text(""); public static class Reduce exte nds Reducer<Text, Text, Text, Text> public void r

5、educe(Text key, Iterable<Text> values, Con text con text) throws IOExceptio n, In terruptedExcepti on con text.write(key, new Text(""); public static void main(String args) throws Exception Con figurati on conf = new Con figurati on();conf.set("fs.defaultFS", "hdfs:/lo

6、calhost:9000");Strin g otherArgs = new Stri ng "in put", "output" ;if (otherArgs.length != 2) System.err.println("Usage: Merge and duplicate removal <in> <out>"); System.exit(2);Job job = Job.getInstance(conf, "Merge and duplicate removal"); j

7、ob.setJarByClass(Merge.class);job.setMapperClass(Map.class);job.setReducerClass(Reduce.class);job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class);(job, new Path(otherArgs0);(job, new Path(otherArgs1);System.exit(job.waitForCompletion(true) ? 0 : 1);2. 编写程序实现对输入文件的排序现在有多个输入文件, 每个文件

8、中的每行内容均为一个整数。要求读取所有文件中的整数,进行升序排序后,输出到一个新的文件中,输出的数据格式为每行两个整数,第一个数字为第二个整数的排序位次,第二个整数为原待排列的整数。下面是输入文件和输出文件的 一个样例供参考。实验结果截图:代码如下:package com.MergeSort;import java.i o.I OExceptio n;import org.apache.hadoop.c onf.Con figurati on;import org.apache.hadoop.fs.Path;import org.apache.hadoop.i o.ln tWritable;i

9、mport org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.i nput.;import org.apache.hadoop.mapreduce.lib.output.;public class MergeSort public static class Map ext

10、e ndsMapper<Object, Text, IntWritable, IntWritable> private static IntWritable data = new IntWritable();public void map(Object key, Text value, Con text con text) throws IOExceptio n, In terruptedExcepti on String line = value.toString(); data.set(I nteger.parse In t(l in e);con text.write(dat

11、a, new In tWritable(1); public static class Reduce exte ndsReducerV ntWritable, I ntWritable, I ntWritable, I ntWritable private static IntWritable linenum = new IntWritable(l);public void reduce(I ntWritable key, Iterable<I ntWritablevalues,Con text con text) throws IOExcepti on, In terruptedExc

12、epti on for (I ntWritable val : values) con text.write(li nenum, key);linenum = new In tWritable(li nenu m.get() + 1);public static void main(String args) throws Exception Con figurati on conf = new Con figurati on();conf.set("fs.defaultFS", "hdfs:/localhost:9000");Stri ng otherA

13、rgs = new Stri ng "in put2", "output2" ; /*直接设置输入参数*/if (otherArgs.length != 2) System.err.pri ntln ("Usage: mergesort <in> <out>");System.exit(2);Job job = Job.getI nsta nce(conf, "mergesort"); job.setJarByClass(MergeSort.class); job.setMapperClas

14、s(Map.class);job.setReducerClass(Reduce.class);job.setOutputKeyClass(l ntWritable.class);job.setOutputValueClass(l ntWritable.class);(job, new Path(otherArgs0);(job, new Path(otherArgs1);System.exit(job.waitForCompletion(true) ? 0 : 1);3. 对给定的表格进行信息挖掘下面给出一个child-pare nt的表格,要求挖掘其中的父子辈关系,给出祖孙辈关系的表格。实验

15、最后结果截图如下: hdfs:/localhost:900JI STjoin.jdva hdfs:/hcalhost:900 E3 先 一grand_child grand parentMark JesseMark AlicePhilipPhilip AliceJone JesseJone AliceSteven JesseSteven AlkeSteven FrankSteven M aryJone FrankJone Mary代码如下:package com.jo in;import java.i o.IO Excepti on;import java.util.*;import org.

16、apache.hadoop.c onf.Con figurati on;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.;import org.apache.hadoop.map

17、reduce.lib.output.;public class STjoin public static int time = 0;public static class Map extends Mapper<Object, Text, Text, Text> public void map(Object key, Text value, Context context) throws IOException, InterruptedException String child_name = new String();String parent_name = new String(

18、);String relation_type = new String(); String line = value.toString();int i = 0;while (line.charAt(i) != ' ') i+;String values = line.substring(0, i), line.substring(i + 1) ;if (pareTo("child") != 0) child_name = values0; parent_name = values1; relation_type = "1"context.

19、write(new Text(values1), new Text(relation_type +"+"+ child_name + "+" + parent_name); relation_type = "2"context.write(new Text(values0), new Text(relation_type +"+"+ child_name + "+" + parent_name); public static class Reduce extends Reducer<Tex

20、t, Text, Text, Text> public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException if (time = 0) context.write(new Text("grand_child"), newText("grand_parent");time+;int grand_child_num = 0;String grand_child = new String10

21、;int grand_parent_num = 0;String grand_parent = new String10;Iterator ite = values.iterator();while (ite.hasNext() String record = ite.next().toString();int len = record.length();int i = 2;if (len = 0)continue;char relation_type = record.charAt(0); String child_name = new String();Stri ng pare nt_n

22、ame = new Stri ng();while (record.charAt(i) != '+') child_ name = child_ name + record.charAt(i); i+;i = i + 1;while (i < len) pare nt_n ame = pare nt_n ame + record.charAt(i); i+;if (relatio n_type = '1') gra nd_childgra nd_child_ num = child_ name;gra nd_child_ nu m+; else gran

23、d_pare ntgra nd_pare nt_num = pare nt_n ame; gra nd_pare nt_nu m+; -if (gra nd_pare nt_num != 0 && gra nd_child_ num != 0) for (int m = 0; m < gra nd_child_ num; m+) for (int n = 0; n < gra nd_pare nt_num; n+) context.write(new Text(grand_childm), new Text( gran d_pare ntn ); -public s

24、tatic void main(String args) throws Exception Con figurati on conf = new Con figurati on();conf.set("fs.defaultFS", "hdfs:/localhost:9000");String otherArgs = new String "input3", "output3" ;if (otherArgs.length != 2) System.err.pri ntl n("Usage: Si ngle

25、Table Join <i n> <out>"); System.exit(2);Job job = Job.getI nsta nce(c onf, "Sin gle table join ”); job.setJarByClass(STj oin. class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class);(job,

26、new Path(otherArgs0);(job, new Path(otherArgs1); System.exit(job.waitForCompletion(true) ? 0 : 1);4. 实验报告云计算实验报告题目Map Reduce 编程姓名包生友日期:初级实践2016/12/20实验环境:机房的虚拟机上配置好的环境解决问题的思路:根据老师给的代码进行操作实验内容与完成情况:已完成,与同学商量后仍有部分代码尚未知道其作用所在 出现的问题:执行之后,出现未找到main函数情况,再次执行会报错,说文件已经存在。解决方案(列出遇到的问题和解决办法,列出没有解决的问题):问题:1.执行之后,出现未找到 main函数情况2.再次执行会报错,说文件已经存在。解决办法:删除输出文件即可(程序执行时输出文件不能存在)5. 实验总结通过本次实验,使我掌握基本的 Map Reduce编程方法;掌握用 MapReduce 解决一些常见的数据处理问题,包括数据去重、数据排序和数据挖掘等 。短暂的 云计算课程实验到此结束,到我知道对云计算的学习是没有尽头的。


