package eu.dnetlib.iis.collapsers;

import eu.dnetlib.iis.collapsers.schemas.DocumentTextEnvelope;
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author Dominika Tkaczyk
 */
public class DocumentTextReducer extends Reducer<AvroKey<String>, AvroValue<DocumentTextEnvelope>, AvroKey<DocumentText>, NullWritable> {
    
    private List<String> origins;

    
	@Override
	protected void setup(Context context) throws IOException,
			InterruptedException {
		origins = Arrays.asList(context.getConfiguration().get("origins").split(","));
	}
    
	@Override
	public void reduce(AvroKey<String> key, Iterable<AvroValue<DocumentTextEnvelope>> values, Context context) 
            throws IOException, InterruptedException {
        
        CharSequence[] texts = new CharSequence[origins.size()];
        for (AvroValue<DocumentTextEnvelope> value : values) {
            texts[origins.indexOf(value.datum().getOrigin())] = value.datum().getData().getText();
        }
                
        StringBuilder sb = new StringBuilder();
        for (CharSequence text : texts) {
            if (text != null) {
                sb.append(text);
                sb.append("\n");
            }
        }

        DocumentText text = DocumentText.newBuilder()
                .setId(key.datum())
                .setText(sb.toString().trim())
                .build();
        context.write(new AvroKey<DocumentText>(text), NullWritable.get());
	}

}
 