package jp.ac.dendai.cdl.mori.wikie.mapred;

import java.io.*;

import jp.ac.dendai.cdl.mori.wikie.parser.*;
import jp.ac.dendai.cdl.mori.wikie.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

import au.id.jericho.lib.html.*;

public class SizeMapper extends WMapper {

    @Override
    public void map(LongWritable key, Text value,
                    OutputCollector<Text, Text> output,
                    Reporter reporter) throws IOException {
        try {
            WPageElementHandler page = createPageHandler(value);
            String id  = page.getId();
            String text = WNormalizer.deleteNonPrintingChar(page.getText());
            text = model.render(text);
            Segment seg = new Segment(new Source(text), 0, text.length()-1);
            output.collect(new Text(id), new Text(String.valueOf(seg.getTextExtractor().toString().length())));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}
