package eu.dnetlib.data.mapreduce.hbase.dedup.experiment;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
import eu.dnetlib.data.proto.PersonProtos;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.NullWritable;

/**
 * builds map {merged author -> anchorId}
 *
 * @author claudio
 *
 */
public class AnchorStatsMapper extends TableMapper<NullWritable, NullWritable> {

	@Override
	protected void map(final ImmutableBytesWritable keyIn, final Result value, final Context context) throws IOException, InterruptedException {


		final byte[] body = value.getValue("person".getBytes(), DedupUtils.BODY_B);

		if (body != null) {
			try {
				final OafDecoder decoder = OafDecoder.decode(body);

				final PersonProtos.Person p = decoder.getEntity().getPerson();

				if (!p.getAnchor()) {
					context.getCounter("person", "not anchor").increment(1);
					return;
				}

				trackPersonInfo(p.getMergedpersonCount(), context, "person merged");
				trackPersonInfo(p.getCoauthorCount(), context, "person coauthors");

			} catch (final Throwable e) {
				System.out.println("GOT EX " + e);
				//e.printStackTrace(System.err);
				context.getCounter("error", e.getClass().toString()).increment(1);
			}
		} else {
			context.getCounter("person", "missing body").increment(1);
		}
	}

	private void trackPersonInfo(final int count, final Context context, final String counterName) {

		if (count > 0 && count <= 10) {
			context.getCounter(counterName, count + "").increment(1);
			return;
		}

		if (count > 10 && count <= 20) {
			context.getCounter(counterName, "[10, 20)").increment(1);
			return;
		}

		if (count > 20 && count <= 30) {
			context.getCounter(counterName, "[20, 30)").increment(1);
			return;
		}

		if (count > 30 && count <= 40) {
			context.getCounter(counterName, "[30, 40)").increment(1);
			return;
		}

		if (count > 40 && count <= 50) {
			context.getCounter(counterName, "[40, 50)").increment(1);
			return;
		}

		if (count > 50 && count <= 70) {
			context.getCounter(counterName, "[50, 70)").increment(1);
			return;
		}

		if (count > 70 && count <= 100) {
			context.getCounter(counterName, "[70, 100)").increment(1);
			return;
		}

		if (count > 100) {
			context.getCounter(counterName, "[100, *)").increment(1);
			return;
		}

	}

}
