package eu.dnetlib.data.mapreduce.hbase.broker.enrich;

import java.io.IOException;
import java.util.HashSet;
import java.util.Map;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.UpdateMerger;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.TypeProtos.Type;
import org.apache.commons.collections.MapUtils;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;

/**
 * Created by claudio on 08/07/16.
 */
public class EnrichmentMapper extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {

	private ImmutableBytesWritable outValue;

	private ImmutableBytesWritable outKey;

	@Override
	protected void setup(final Context context) {
		outKey = new ImmutableBytesWritable();
		outValue = new ImmutableBytesWritable();
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {

		final String mergedInCF = DedupUtils.getDedupCF_mergedIn(Type.result);

		final Map<byte[], byte[]> mergedIn = value.getFamilyMap(Bytes.toBytes(mergedInCF));

		if (MapUtils.isEmpty(mergedIn)) {
			context.getCounter("empty", mergedInCF).increment(1);
			return;
		}

		final String rootId = getRootId(mergedIn, context);

		final Map<byte[], byte[]> map = value.getFamilyMap(Bytes.toBytes(Type.result.name()));
		if (MapUtils.isEmpty(map)) {
			context.getCounter("body",  "empty map").increment(1);
			return;
		}

		final Oaf body = UpdateMerger.mergeBodyUpdates(context, map);

		if (body == null) {
			context.getCounter("body",  "empty oaf").increment(1);
			return;
		}

		outKey.set(Bytes.toBytes(rootId));
		outValue.set(body.toByteArray());

		context.write(outKey, outValue);
	}

	private String getRootId(final Map<byte[], byte[]> mergedIn, Context context) {
		final HashSet<String> ids = Sets.newHashSet(Iterables.transform(mergedIn.keySet(), new Function<byte[], String>() {

			@Override
			public String apply(final byte[] input) {
				return new String(input);
			}
		}));

		//context.getCounter("duplicate group size", String.valueOf(ids.size())).increment(1);

		try {
			return Iterables.getOnlyElement(ids);
		} catch(IllegalArgumentException e) {
			System.err.println(ids);
			throw e;
		}
	}

}
