package eu.dnetlib.data.mapreduce.hbase.broker.enrich;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.data.broker.model.openaire.OpenAireEventPayload;
import eu.dnetlib.data.mapreduce.hbase.broker.Topic;
import eu.dnetlib.data.mapreduce.hbase.broker.mapping.HighlightFactory;
import eu.dnetlib.data.mapreduce.hbase.broker.mapping.OpenAireEventPayloadFactory;
import eu.dnetlib.data.mapreduce.hbase.broker.model.EventMessage;
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.OafProtos.OafEntity;
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
import eu.dnetlib.data.proto.ResultProtos.Result.Metadata;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import static eu.dnetlib.data.mapreduce.hbase.broker.mapping.EventFactory.asEvent;
import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getKey;
import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getPropertyValues;

/**
 * Created by claudio on 08/07/16.
 */
public class EnrichmentReducer extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable, Text, Text> {

	private static final int LIMIT = 1000;
	private Set<String> pidType;

	private Text tKey;
	private Text tValue;

	private Map<String, String> dsTypeMap = Maps.newHashMap();

	private Set<String> dsWhitelist = Sets.newHashSet();

	private Set<String> dsBlacklist = Sets.newHashSet();

	// This is for EuropePMC. They expose OA abstracts, but we want to identify real OA publications. WTF.
	private Set<String> untrustedOaDsList = Sets.newHashSet();

	// White list for datasource typologies.
	private Set<String> dsTypeWhitelist = Sets.newHashSet();


	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		super.setup(context);

		System.out.println("LIMIT: " + LIMIT);

		tKey = new Text("");
		tValue = new Text();

		pidType = Sets.newHashSet("doi", "pmc", "pmid", "urn", "arxiv");

		dsWhitelist.addAll(getPropertyValues(context, "broker.datasource.id.whitelist"));
		dsBlacklist.addAll(getPropertyValues(context, "broker.datasource.id.blacklist"));
		dsTypeWhitelist.addAll(getPropertyValues(context, "broker.datasource.type.whitelist"));
		untrustedOaDsList.addAll(getPropertyValues(context, "broker.datasource.untrusted.oa.list"));

		dsTypeMap = getDsTypeMap(context, dsTypeWhitelist);

		System.out.println("datasource whitelist: " + dsWhitelist);
		System.out.println("datasource blacklist: " + dsBlacklist);
		System.out.println("datasource OA list: " + untrustedOaDsList);

		System.out.println("datasource type whitelist: " + dsTypeWhitelist);
	}

	private Map<String, String> getDsTypeMap(final Context context, final Set<String> dsTypeWhitelist) throws IOException {
		System.out.println("loading datasource typology mapping");

		final Map<String, String> dsTypeMap = Maps.newHashMap();

		final Scan scan = new Scan();
		final FilterList fl = new FilterList(Operator.MUST_PASS_ALL);
		fl.addFilter(new PrefixFilter(Bytes.toBytes("10")));
		scan.setFilter(fl);
		scan.addFamily(Bytes.toBytes("datasource"));

		final String tableName = context.getConfiguration().get("hbase.mapred.inputtable");

		System.out.println(String.format("table name: '%s'", tableName));

		final HTable table = new HTable(context.getConfiguration(), tableName);

		final ResultScanner res = table.getScanner(scan);

		for(Result r : res) {
			final byte[] b = r.getValue(Bytes.toBytes("datasource"), Bytes.toBytes("body"));
			if (b != null) {
				final Oaf oaf = Oaf.parseFrom(b);
				final String dsId = StringUtils.substringAfter(oaf.getEntity().getId(), "|");
				final String dsType = oaf.getEntity().getDatasource().getMetadata().getDatasourcetype().getClassid();

				if (dsTypeWhitelist.contains(dsType)) {
					System.out.println(String.format("dsId '%s', dsType '%s'", dsId, dsType));
					dsTypeMap.put(dsId, dsType);
				}
			}
		}

		res.close();

		System.out.println("datasource type map size: " + dsTypeMap.size());
		return dsTypeMap;
	}

	@Override
	protected void reduce(final ImmutableBytesWritable key, final Iterable<ImmutableBytesWritable> values, final Context context) throws IOException,
			InterruptedException {

		final List<Oaf> oafList = Lists.newArrayList(Iterables.transform(Iterables.limit(values, LIMIT), oafDeserialiser()));

		generateEvents(oafList, context);
	}

	private void generateEvents(final List<Oaf> oafList, final Context context) throws IOException, InterruptedException {

		for(Oaf current : oafList) {

			final String currentId = current.getEntity().getId();

			final String currentDsId = StringUtils.substringAfter(getKey(current.getEntity().getCollectedfromList()), "|");
			final String currentDsType = dsTypeMap.get(currentDsId);

			//System.out.println(String.format("'%s' -> '%s'", currentDsId, currentDsType));

			if (StringUtils.isBlank(currentDsType) && !dsWhitelist.contains(currentDsId)) {
				context.getCounter("events skipped", "datasource type excluded").increment(1);
			} else {
				if (dsBlacklist.contains(currentDsId)) {
					context.getCounter("events skipped", "datasource blacklisted").increment(1);
				} else {
					for (Oaf other : oafList) {

						final String otherId = other.getEntity().getId();
						if (!currentId.equals(otherId)) {

							//PIDS
							for (final String type : pidType) {
								if (!hasPid(current, type) && hasPid(other, type)) {
									final Oaf.Builder prototype = Oaf.newBuilder(current);
									final Iterable<StructuredProperty> pids =
											Iterables.filter(other.getEntity().getPidList(), new Predicate<StructuredProperty>() {
												@Override
												public boolean apply(final StructuredProperty pid) {
													return pid.getQualifier().getClassid().equalsIgnoreCase(type);
												}
											});
									prototype.getEntityBuilder().addAllPid(pids);
									final Oaf oaf = prototype.build();

									final EventMessage event = asEvent(oaf.getEntity(), Topic.PID, other.getEntity());
									final OpenAireEventPayload payload = OpenAireEventPayloadFactory.fromOAF(oaf.getEntity());
									event.setPayload(HighlightFactory.highlightEnrichPid(payload, Lists.newArrayList(pids)).toJSON());

									emit(event, context);
									context.getCounter("event", Topic.PID.getValue()).increment(1);
								}
							}

							final String otherDsId = StringUtils.substringAfter(getKey(other.getEntity().getCollectedfromList()), "|");

							//OPEN ACCESS STATUS
							if (openAccessCheck(current, currentDsId, other, otherDsId)) {
								final Oaf.Builder prototype = Oaf.newBuilder(current);
								final Iterable<Instance> i = Iterables.filter(other.getEntity().getResult().getInstanceList(), new Predicate<Instance>() {
									@Override
									public boolean apply(final Instance i) {
										return "OPEN".equalsIgnoreCase(i.getLicence().getClassid());
									}
								});
								prototype.getEntityBuilder().getResultBuilder().addAllInstance(i);

								final Oaf oaf = prototype.build();

								final EventMessage event = asEvent(oaf.getEntity(), Topic.OA_STATUS, other.getEntity());
								final OpenAireEventPayload payload = OpenAireEventPayloadFactory.fromOAF(oaf.getEntity());
								event.setPayload(HighlightFactory.highlightEnrichOa(payload, Lists.newArrayList(i)).toJSON());

								emit(event, context);
								context.getCounter("event", Topic.OA_STATUS.getValue()).increment(1);
							}

							//ABSTRACT
							if (!hasAbstract(current) && hasAbstract(other)) {
								final Oaf.Builder prototype = Oaf.newBuilder(current);
								final List<StringField> descriptionList = other.getEntity().getResult().getMetadata().getDescriptionList();
								prototype.getEntityBuilder().getResultBuilder().getMetadataBuilder().addAllDescription(descriptionList);

								final Oaf oaf = prototype.build();

								final EventMessage event = asEvent(oaf.getEntity(), Topic.ABSTRACT, other.getEntity());
								final OpenAireEventPayload payload = OpenAireEventPayloadFactory.fromOAF(oaf.getEntity());
								event.setPayload(HighlightFactory.highlightEnrichAbstract(payload, descriptionList).toJSON());

								emit(event, context);
								context.getCounter("event", Topic.ABSTRACT.getValue()).increment(1);
							}

							//PUBLICATION DATE
							if (!hasPubDate(current) && hasPubDate(other)) {
								final Oaf.Builder prototype = Oaf.newBuilder(current);

								final StringField date = other.getEntity().getResult().getMetadata().getDateofacceptance();
								prototype.getEntityBuilder().getResultBuilder().getMetadataBuilder().setDateofacceptance(date);

								final Oaf oaf = prototype.build();

								final EventMessage event = asEvent(oaf.getEntity(), Topic.PUBLICATION_DATE, other.getEntity());
								final OpenAireEventPayload payload = OpenAireEventPayloadFactory.fromOAF(oaf.getEntity());
								event.setPayload(HighlightFactory.highlightEnrichPublicationDate(payload, date).toJSON());

								emit(event, context);
								context.getCounter("event", Topic.PUBLICATION_DATE.getValue()).increment(1);
							}
						}
					}
				}
			}
		}
	}

	/**
	 * Check the OpenAccess status for publications current and other. Their resp. datasource id can drive the decision.
	 * For ceratin Datasources the OA status of their publications cannot be trusted, thus they have to be enriched even
	 * if their status is OPEN and must be excluded when they are 'other' DS ids.
	 *
	 * In other words: Other's OA status is considered interesting for current DS id if current DS id is untrusted OR
	 * other is OPEN AND current is NOT OPEN, AND other is trusted.
	 *
	 * @param current
	 * @param currentDsId
	 * @param other
	 * @param otherDsId
	 * @return true when other's OA status has to be notified to current DS id.
	 */
	private boolean openAccessCheck(final Oaf current, final String currentDsId, final Oaf other, final String otherDsId) {
		return (untrustedOaDsList.contains(currentDsId) || !hasAccess(current, "OPEN", false)) &&
				(!untrustedOaDsList.contains(otherDsId) && hasAccess(other, "OPEN", false));
	}

	private void emit(final EventMessage e, final Context context) throws IOException, InterruptedException {
		//tKey.set(e.getMap().get("id"));
		tValue.set(e.toString());
		context.write(tKey, tValue);
	}

	private boolean hasPubDate(final Oaf current) {
		final Metadata m = current.getEntity().getResult().getMetadata();
		return StringUtils.isNotBlank(m.getDateofacceptance().getValue());
	}

	private boolean hasAbstract(final Oaf oaf) {
		return Iterables.all(oaf.getEntity().getResult().getMetadata().getDescriptionList(), new Predicate<StringField>() {
			@Override
			public boolean apply(final StringField s) {
				return StringUtils.isNotBlank(s.getValue());
			}
		});
	}

	private boolean hasAccess(final Oaf oaf, final String access, final boolean strict) {
		return Iterables.all(oaf.getEntity().getChildrenList(), new Predicate<OafEntity>() {
			@Override
			public boolean apply(final OafEntity entity) {
				final Predicate<Instance> p = new Predicate<Instance>() {
					@Override
					public boolean apply(final Instance i) {
						return access.equalsIgnoreCase(i.getLicence().getClassid());
					}
				};
				return strict ? Iterables.all(entity.getResult().getInstanceList(), p) :  Iterables.any(entity.getResult().getInstanceList(), p);
			}
		});
	}

	private boolean hasPid(final Oaf oaf, final String type) {
		return Iterables.any(oaf.getEntity().getPidList(), new Predicate<StructuredProperty>() {
			@Override
			public boolean apply(final StructuredProperty pid) {
				return pid.getQualifier().getClassid().equalsIgnoreCase(type);
			}
		});
	}

	private Function<ImmutableBytesWritable, Oaf> oafDeserialiser() {
		return new Function<ImmutableBytesWritable, Oaf>() {
			@Override
			public Oaf apply(final ImmutableBytesWritable input) {
				try {
					return Oaf.parseFrom(input.copyBytes());
				} catch (InvalidProtocolBufferException e) {
					throw new IllegalArgumentException(e);
				}
			}
		};
	}

}
