package eu.dnetlib.data.transform;

import java.io.StringReader;
import java.util.List;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.SolrInputDocument;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.protobuf.Descriptors.EnumValueDescriptor;
import com.google.protobuf.Descriptors.FieldDescriptor;
import com.google.protobuf.GeneratedMessage;

/**
 * The Class ProtoDocumentMapper.
 */
public class ProtoDocumentMapper {

	/** The fields. */
	private Document fields;

	/**
	 * Instantiates a new proto document mapper.
	 *
	 * @param fields
	 *            the fields
	 * @throws DocumentException
	 *             the document exception
	 */
	public ProtoDocumentMapper(final String fields) throws DocumentException {
		this.fields = parse(fields);

		if (StringUtils.isBlank(this.fields.valueOf("//FIELD[@name = 'objIdentifier']/@name")))
			throw new IllegalArgumentException("field objIdentifier is mandatory");
	}

	/**
	 * Map.
	 *
	 * @param proto
	 *            the proto
	 * @param version
	 *            the version
	 * @param dsId
	 *            the ds id
	 * @return the solr input document
	 * @throws DocumentException
	 *             the document exception
	 */
	public SolrInputDocument map(final GeneratedMessage proto, final String version, final String dsId) throws DocumentException {

		final SolrInputDocument doc = new SolrInputDocument();

		for (final Object o : fields.selectNodes("//FIELD")) {
			final Element e = (Element) o;

			final String name = e.attribute("name").getValue().toLowerCase().trim();
			final String path = e.attribute("path").getValue();

			doc.setField(name, processMultiPath(proto, Lists.newLinkedList(Splitter.on("|").trimResults().split(path))));
		}

		doc.setField("__dsid", dsId);
		doc.setField("__dsversion", version);
		doc.setField("objidentifier", patchId((String) doc.getFieldValue("objidentifier")));
		doc.setField("__indexrecordidentifier", doc.getFieldValue("objidentifier"));
		doc.setField("__result", Base64.encodeBase64String(proto.toByteArray()));

		return doc;
	}

	private List<Object> processMultiPath(final GeneratedMessage proto, final List<String> paths) {
		final List<Object> response = Lists.newArrayList();
		for (final String pathElements : paths) {
			response.addAll(processPath(proto, Lists.newLinkedList(Splitter.on("/").trimResults().split(pathElements))));
		}
		return response;
	}

	/**
	 * Process path.
	 *
	 * @param proto
	 *            the proto
	 * @param pathElements
	 *            the list
	 * @return the list
	 */
	private List<Object> processPath(final GeneratedMessage proto, final List<String> pathElements) {

		final List<Object> response = Lists.newArrayList();

		if (pathElements.isEmpty()) throw new RuntimeException("ProtoBuf navigation path is empty");

		final FieldDescriptor fd = proto.getDescriptorForType().findFieldByName(pathElements.get(0));
		if (fd != null) {
			if (fd.isRepeated()) {
				final int count = proto.getRepeatedFieldCount(fd);
				for (int i = 0; i < count; i++) {
					final Object field = proto.getRepeatedField(fd, i);
					response.addAll(generateFields(fd, field, pathElements));
				}
			} else {
				final Object field = proto.getField(fd);
				response.addAll(generateFields(fd, field, pathElements));
			}
		} else throw new RuntimeException("Invalid protobuf path (field not found): " + StringUtils.join(pathElements, ">"));

		return response;
	}

	/**
	 * Generate fields.
	 *
	 * @param fd
	 *            the fd
	 * @param field
	 *            the field
	 * @param list
	 *            the list
	 * @return the list
	 */
	private List<Object> generateFields(final FieldDescriptor fd, final Object field, final List<String> list) {
		if (field instanceof GeneratedMessage) {
			if (list.size() > 1) return processPath((GeneratedMessage) field, list.subList(1, list.size()));
			else throw new RuntimeException("No primitive type found");
		} else {
			if (list.size() == 1) {
				final List<Object> res = Lists.newArrayList();
				switch (fd.getType()) {
				case ENUM:
					res.add(((EnumValueDescriptor) field).getName());
					break;
				default:
					res.add(field);
					break;
				}
				return res;
			}
			else throw new RuntimeException("Found a primitive type before the path end");
		}
	}

	/**
	 * Patch the objidentifier: when it comes from HBase, i.e. contains the separator '|' returns the string that follows.
	 *
	 * @param objidentifier
	 *            the objidentifier
	 * @return the string
	 */
	private String patchId(final String objidentifier) {
		return objidentifier.contains("|") ? StringUtils.substringAfter(objidentifier, "|") : objidentifier;
	}

	/**
	 * Parses the.
	 *
	 * @param s
	 *            the s
	 * @return the document
	 * @throws DocumentException
	 *             the document exception
	 */
	private Document parse(final String s) throws DocumentException {
		return new SAXReader().read(new StringReader(s));
	}

}
