package eu.dnetlib.data.mapreduce.hbase.broker.enrich;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.data.mapreduce.hbase.broker.AbstractEventFactory;
import eu.dnetlib.data.mapreduce.hbase.broker.OAVersionEventFactory;
import eu.dnetlib.data.mapreduce.hbase.broker.PIDEventFactory;
import eu.dnetlib.data.mapreduce.hbase.broker.PublicationDateEventFactory;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getKey;
import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getPropertyValues;

/**
 * Created by claudio on 08/07/16.
 */
public class EnrichmentReducer extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable, Text, Text> {

	private static final int LIMIT = 1000;

	private Map<String, String> dsTypeMap = Maps.newHashMap();

	private Set<String> dsWhitelist = Sets.newHashSet();

	private Set<String> dsBlacklist = Sets.newHashSet();

	// This is for EuropePMC. They expose OA abstracts, but we want to identify real OA publications. WTF.
	private Set<String> untrustedOaDsList = Sets.newHashSet();

	// White list for datasource typologies.
	private Set<String> dsTypeWhitelist = Sets.newHashSet();

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		super.setup(context);

		System.out.println("LIMIT: " + LIMIT);

		dsWhitelist.addAll(getPropertyValues(context, "broker.datasource.id.whitelist"));
		dsBlacklist.addAll(getPropertyValues(context, "broker.datasource.id.blacklist"));
		dsTypeWhitelist.addAll(getPropertyValues(context, "broker.datasource.type.whitelist"));
		untrustedOaDsList.addAll(getPropertyValues(context, "broker.datasource.untrusted.oa.list"));

		dsTypeMap = getDsTypeMap(context, dsTypeWhitelist);

		System.out.println("datasource whitelist: " + dsWhitelist);
		System.out.println("datasource blacklist: " + dsBlacklist);
		System.out.println("datasource OA list: " + untrustedOaDsList);

		System.out.println("datasource type whitelist: " + dsTypeWhitelist);
	}

	private Map<String, String> getDsTypeMap(final Context context, final Set<String> dsTypeWhitelist) throws IOException {
		System.out.println("loading datasource typology mapping");

		final Map<String, String> dsTypeMap = Maps.newHashMap();

		final Scan scan = new Scan();
		final FilterList fl = new FilterList(Operator.MUST_PASS_ALL);
		fl.addFilter(new PrefixFilter(Bytes.toBytes("10")));
		scan.setFilter(fl);
		scan.addFamily(Bytes.toBytes("datasource"));

		final String tableName = context.getConfiguration().get("hbase.mapred.inputtable");

		System.out.println(String.format("table name: '%s'", tableName));

		final HTable table = new HTable(context.getConfiguration(), tableName);

		final ResultScanner res = table.getScanner(scan);

		for(Result r : res) {
			final byte[] b = r.getValue(Bytes.toBytes("datasource"), Bytes.toBytes("body"));
			if (b != null) {
				final Oaf oaf = Oaf.parseFrom(b);
				final String dsId = StringUtils.substringAfter(oaf.getEntity().getId(), "|");
				final String dsType = oaf.getEntity().getDatasource().getMetadata().getDatasourcetype().getClassid();

				if (dsTypeWhitelist.contains(dsType)) {
					System.out.println(String.format("dsId '%s', dsType '%s'", dsId, dsType));
					dsTypeMap.put(dsId, dsType);
				}
			}
		}

		res.close();

		System.out.println("datasource type map size: " + dsTypeMap.size());
		return dsTypeMap;
	}

	@Override
	protected void reduce(final ImmutableBytesWritable key, final Iterable<ImmutableBytesWritable> values, final Context context) throws IOException,
			InterruptedException {

		final List<Oaf> oafList = Lists.newArrayList(Iterables.transform(Iterables.limit(values, LIMIT), oafDeserialiser()));

		generateEvents(oafList, context);
	}

	private void generateEvents(final List<Oaf> oafList, final Context context) throws IOException, InterruptedException {

		for(Oaf current : oafList) {

			final String currentId = current.getEntity().getId();

			final String currentDsId = StringUtils.substringAfter(getKey(current.getEntity().getCollectedfromList()), "|");
			final String currentDsType = dsTypeMap.get(currentDsId);

			//System.out.println(String.format("'%s' -> '%s'", currentDsId, currentDsType));

			if (StringUtils.isBlank(currentDsType) && !dsWhitelist.contains(currentDsId)) {
				context.getCounter("events skipped", "datasource type excluded").increment(1);
			} else {
				if (dsBlacklist.contains(currentDsId)) {
					context.getCounter("events skipped", "datasource blacklisted").increment(1);
				} else {
					for (Oaf other : oafList) {

						final String otherId = other.getEntity().getId();
						if (!currentId.equals(otherId)) {
							PIDEventFactory.process(context, current, other, RandomUtils.nextFloat());
							OAVersionEventFactory.process(context, current, other, RandomUtils.nextFloat(), untrustedOaDsList);
							AbstractEventFactory.process(context, current, other, RandomUtils.nextFloat());
							PublicationDateEventFactory.process(context, current, other, RandomUtils.nextFloat());
						}
					}
				}
			}
		}
	}

	private Function<ImmutableBytesWritable, Oaf> oafDeserialiser() {
		return new Function<ImmutableBytesWritable, Oaf>() {
			@Override
			public Oaf apply(final ImmutableBytesWritable input) {
				try {
					return Oaf.parseFrom(input.copyBytes());
				} catch (InvalidProtocolBufferException e) {
					throw new IllegalArgumentException(e);
				}
			}
		};
	}

}
