Tuesday, 15 June 2010

Hadoop JAVA MR job -



Hadoop JAVA MR job -

hi new hadoop mr. tried write simple mr job count shortest path node destination node.basically logic :

if input text file has next paths given: abcd abd acd bed bd bacd

the output should : abd bd

which gives shortest path between nodes , d , shortest path between b , d.

the output getting : [abcd abd acd bed bd bacd]

i have wrote next mr same. not giving desired answer. running mr in stand lone mode.

please allow me know wrong code , solution it. lot time.

public class shpath { public static class map extends mapreducebase implements mapper<longwritable, text, text, text> { public void map(longwritable key, text value, outputcollector<text, text> output, reporter reporter) throws ioexception { string[] line = value.tostring().split("\t"); list<string> l = new arraylist<string>(); for(string lin :line){ l.add(lin); } list <string>startend = new arraylist<string>(); for(string s : l){ string g = s.substring(0,1)+s.substring((s.length())-1); if(!startend.contains(g)) { startend.add(g); } } list <string> uniquestringlist = new arraylist<string>(); java.util.map finalmap = new hashmap(); for(string s1 : startend){ for(string s : l) { if(s.startswith(s1.substring(0,1)) && (s.endswith(s1.substring((s1.length())-1)))){ uniquestringlist.add(s); } } string smallestkey = null; int minsize = integer.max_value; string smallest = null; for(string s2 : uniquestringlist){ if(s2.length() < minsize) { minsize = s2.length(); smallest = s2; smallestkey = s1; } finalmap.put(s1,smallest); } uniquestringlist.clear(); }output.collect(new text(),new text(finalmap.values().tostring())); } } public static class cut down extends mapreducebase implements reducer<text, text, text, text> { public void reduce(text key, iterator<text> value, outputcollector<text, text> output, reporter reporter) throws ioexception { while (value.hasnext()){ output.collect(new text(key),new text(value.next())); } } } public static void main(string[] args) throws exception { jobconf conf = new jobconf(shpath.class); conf.setjobname("shpath"); conf.setoutputkeyclass(text.class); conf.setoutputvalueclass(text.class); conf.setmapperclass(map.class); conf.setcombinerclass(reduce.class); conf.setreducerclass(reduce.class); conf.setinputformat(org.apache.hadoop.mapred.textinputformat.class); conf.setoutputformat(org.apache.hadoop.mapred.textoutputformat.class); org.apache.hadoop.mapred.fileinputformat.setinputpaths(conf, new path(args[0])); org.apache.hadoop.mapred.fileoutputformat.setoutputpath(conf, new path(args[1])); jobclient.runjob(conf); } }

i not sure has this:

public void map(longwritable key, text value, outputcollector<text, text> output, reporter reporter) throws ioexception { map<string , hashmap<integer, string> > outmap = new hashmap<string, hashmap<integer, string> >(); hashmap<integer, string> tempmap = new hashmap<integer, string>(); tempmap.put(integer.max_value, ""); outmap.put("ad", tempmap); outmap.put("bd", tempmap); string[] line = value.tostring().split("\t"); (string path : line) { string temppath = new string( new char[]{path.charat(0) , path.charat(path.length() - 1)}); if(outmap.containskey(temppath)) { hashmap<integer, string> tempoutmap = outmap.get(temppath); (iterator itr = tempoutmap.keyset().iterator(); itr.hasnext(); ) { integer count = (integer) itr.next(); if(count > temppath.length()){ tempmap.remove(count); tempmap.put(temppath.length(), temppath); } } } } (string str : outmap.keyset()) { output.collect(new text(str), new text(outmap.get(str).values().tostring())); } } public void reduce(text key, iterator<text> value, outputcollector<text, text> output, reporter reporter) throws ioexception { string outstring; int smallest = integer.max_value; while (value.hasnext()){ string str = value.next(); if(str.length() < smallest) { outstring = str; smallest = str.length(); } } output.collect(new text(key),new text(outstring)); }

hadoop

No comments:

Post a Comment