package eu.dnetlib.iis.collapsers;

import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * @author Dominika Tkaczyk
 */
public class DefaultCollapserReducer extends Reducer<AvroKey<String>, AvroValue<IndexedRecord>, AvroKey<IndexedRecord>, NullWritable> {
    
	private List<String> origins;
    
	@Override
	protected void setup(Context context) throws IOException,
			InterruptedException {
    	origins = Arrays.asList(context.getConfiguration().get("origins").split(","));
	}
    
	@Override
	public void reduce(AvroKey<String> key, Iterable<AvroValue<IndexedRecord>> values, Context context) 
            throws IOException, InterruptedException {

        IndexedRecord bestRecord = null;
        
        Iterator<AvroValue<IndexedRecord>> iterator = values.iterator();
        while (iterator.hasNext()) {
            AvroValue<IndexedRecord> value = iterator.next();
            if (bestRecord == null) {
                bestRecord = GenericData.get().deepCopy(value.datum().getSchema(), value.datum());
            }
            int bestPos = bestRecord.getSchema().getField("origin").pos();
            int valuePos = value.datum().getSchema().getField("origin").pos();
            String bestOrigin = (String) bestRecord.get(bestPos);
            String valueOrigin = (String) value.datum().get(valuePos);
            if (origins.indexOf(bestOrigin) > origins.indexOf(valueOrigin)) {
                bestRecord = GenericData.get().deepCopy(value.datum().getSchema(), value.datum());
            }
        }

        int dataPos = bestRecord.getSchema().getField("data").pos();
        context.write(new AvroKey<IndexedRecord>((IndexedRecord)bestRecord.get(dataPos)), NullWritable.get());
	}

}
 