package eu.dnetlib.msro.workflows.nodes.download;

import java.io.ByteArrayInputStream;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;

import com.google.common.base.Function;
import com.google.gson.Gson;

import eu.dnetlib.data.download.rmi.DownloadItem;

/**
 * The Class UrlExtractor.
 */
public class UrlExtractor implements Function<String, String> {

	private static final Log log = LogFactory.getLog(UrlExtractor.class);

	/** The xpath. */
	private String xpath;

	/** The xpath. */
	private String xpathMetadataID;

	public UrlExtractor(final String xpath, final String xpathMetadataID) {
		this.xpath = xpath;
		this.xpathMetadataID = xpathMetadataID;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see com.google.common.base.Function#apply(java.lang.Object)
	 */
	@Override
	public String apply(final String input) {
		try {

			DownloadItem di = new DownloadItem();
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			DocumentBuilder builder;
			builder = factory.newDocumentBuilder();
			Document doc = builder.parse(new ByteArrayInputStream(input.getBytes()));
			XPathFactory xPathFactory = XPathFactory.newInstance();
			XPath myXpath = xPathFactory.newXPath();
			XPathExpression expression = myXpath.compile(xpath);
			Object values = expression.evaluate(doc, XPathConstants.NODESET);
			di.setUrl(getNodes((NodeList) values));
			di.setOriginalUrl(getNodes((NodeList) values));
			expression = myXpath.compile(xpathMetadataID);
			String extracted_metadataId = expression.evaluate(doc);
			di.setIdItemMetadata(extracted_metadataId);
			// di.setFileName(extracted_metadataId);
			return di.toJSON();
		} catch (Exception e) {
			log.error("OPSSS... Something bad happen on evaluating ", e);
			return null;
		}

	}

	private String getNodes(final NodeList nodes) {
		List<String> extracted_Url = new ArrayList<String>();
		if (nodes != null) {
			for (int i = 0; i < nodes.getLength(); i++) {
				extracted_Url.add(nodes.item(i).getNodeValue());
			}
		}
		return new Gson().toJson(extracted_Url);
	}

	/**
	 * Gets the xpath.
	 * 
	 * @return the xpath
	 */
	public String getXpath() {
		return xpath;
	}

	/**
	 * Sets the xpath.
	 * 
	 * @param xpath
	 *            the xpath to set
	 */
	public void setXpath(final String xpath) {
		this.xpath = xpath;
	}

	/**
	 * @return the xpathMetadataID
	 */
	public String getXpathMetadataID() {
		return xpathMetadataID;
	}

	/**
	 * @param xpathMetadataID
	 *            the xpathMetadataID to set
	 */
	public void setXpathMetadataID(final String xpathMetadataID) {
		this.xpathMetadataID = xpathMetadataID;
	}

}
