Hee: MapReduce: Split a txt file into multiple files based on a pattern in a file -

Wednesday, 15 August 2012

MapReduce: Split a txt file into multiple files based on a pattern in a file -

i have tab separated .txt file this:

05-12-2011 02:00:00 xyzz

05-12-2011 02:01:00 xyzz

05-12-2011 02:02:00 xyzz

05-12-2011 02:03:00 xyzz

05-12-2011 02:04:00 abcd

05-12-2011 02:05:00 abcd

05-12-2011 02:06:00 abcd

05-12-2011 02:07:00 xyzz

05-12-2011 02:08:00 abcd

i want write info different files such pattern "xyzz" 1 file , "abcd" file.

file1.txt contain:

05-12-2011 02:01:00 xyzz

05-12-2011 02:02:00 xyzz

05-12-2011 02:03:00 xyzz

05-12-2011 02:07:00 xyzz

and file2.txt contain:

05-12-2011 02:04:00 abcd

05-12-2011 02:05:00 abcd

05-12-2011 02:06:00 abcd

here's code want share.

public class wordcount2 {    public static class tokenizermapper2        extends mapper<object, text, text, intwritable>{          private final static intwritable 1 = new intwritable(1);         private text word = new text();      public void map(object key, text value, context context                     ) throws ioexception, interruptedexception {       stringtokenizer itr = new stringtokenizer(value.tostring());       while (itr.hasmoretokens()) {         word.set(itr.nexttoken());         context.write(word, one);       }     }   }    public static class intsumreducer2        extends reducer<text,intwritable,text,intwritable> {     private intwritable result = new intwritable();      public void reduce(text key, iterable<intwritable> values,                         context context                        ) throws ioexception, interruptedexception {     /*  int sum = 0;       (intwritable val : values) {         sum += val.get();       }       result.set(sum);       context.write(key, result);*/     }   }    public static void main(string[] args) throws exception {      configuration conf = new configuration();     string line;      string arguements[];     string[] otherargs = new genericoptionsparser(conf, args).getremainingargs();      // calculating total number of attributes in file     filereader infile = new filereader(args[0]);     bufferedreader bufread = new bufferedreader(infile);     line = bufread.readline();     arguements = line.split(","); //for spliting fields separated comma     conf.setint("argno", arguements.length); // saving attribute value      job job = new job(conf, "word count");     job.setjarbyclass(wordcount.class);     job.setmapperclass(tokenizermapper.class);     job.setcombinerclass(intsumreducer.class);     job.setreducerclass(intsumreducer.class);     job.setoutputkeyclass(text.class);     job.setoutputvalueclass(intwritable.class);     fileinputformat.addinputpath(job, new path(otherargs[0]));     fileoutputformat.setoutputpath(job, new path(otherargs[1]));     system.exit(job.waitforcompletion(true) ? 0 : 1);   } }

mapreduce

Hee

Wednesday, 15 August 2012

MapReduce: Split a txt file into multiple files based on a pattern in a file -

No comments:

Post a Comment