package eu.dnetlib.iis.collapsers;

import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * @author Dominika Tkaczyk
 */
public class DefaultCollapserMapper extends Mapper<AvroKey<IndexedRecord>, NullWritable, AvroKey<String>, AvroValue<IndexedRecord>> {
    
    private String idField;
    
	@Override
	protected void setup(Context context) throws IOException,
			InterruptedException {      
        idField = context.getConfiguration().get("id_field");
    }

	@Override
	protected void map(AvroKey<IndexedRecord> key, NullWritable ignore, Context context) throws IOException, InterruptedException {
        Schema schema = key.datum().getSchema();
        int dataPos = schema.getField("data").pos();
        int idPos = schema.getField("data").schema().getField(idField).pos();
        
        String id = (String) ((IndexedRecord) key.datum().get(dataPos)).get(idPos);
        
        context.write(
	        		new AvroKey<String>(id), 
	        		new AvroValue<IndexedRecord>(key.datum()));
	}
    
}
