package eu.dnetlib.data.collector.plugins;

import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.Iterator;
import java.util.Set;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;

import com.google.common.base.Function;
import com.google.common.collect.Iterators;

import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;

/**
 * The Class HttpCSVCollectorPlugin.
 */
public class HttpCSVCollectorPlugin extends AbstractCollectorPlugin {

	private static final Log log = LogFactory.getLog(HttpCSVCollectorPlugin.class);

	/**
	 * The Class HTTPCSVIterator.
	 */
	class HTTPCSVIterator implements Iterable<String> {

		/** The descriptor. */
		private InterfaceDescriptor descriptor;

		/**
		 * Instantiates a new HTTPCSV iterator.
		 * 
		 * @param descriptor
		 *            the descriptor
		 */
		public HTTPCSVIterator(final InterfaceDescriptor descriptor) {
			this.descriptor = descriptor;
		}

		/**
		 * Iterator.
		 * 
		 * @return the iterator
		 */
		@SuppressWarnings("resource")
		@Override
		public Iterator<String> iterator() {

			try {
				final String separatorString = descriptor.getParams().get("separator");
				final String identifier = descriptor.getParams().get("identifier");
				final String quote = descriptor.getParams().get("quote");
				final URL url = new URL(descriptor.getBaseUrl());
				url.openConnection();

				final Reader reader = new InputStreamReader(url.openStream());
				final char separator = separatorString.equals("\\t") || StringUtils.isBlank(separatorString) ? '\t' : separatorString.charAt(0);

				final CSVFormat format = StringUtils.isBlank(quote) ?
						CSVFormat.EXCEL.withHeader().withDelimiter(separator).withQuote(null) :
						CSVFormat.EXCEL.withHeader().withDelimiter(separator).withQuote(quote.charAt(0));

				final CSVParser parser = new CSVParser(reader, format.withTrim());
				final Set<String> headers = parser.getHeaderMap().keySet();

				return Iterators.transform(parser.iterator(), new Function<CSVRecord, String>() {

					@Override
					public String apply(final CSVRecord input) {
						final Document document = DocumentHelper.createDocument();
						final Element root = document.addElement("csvRecord");
						for (String key : headers) {
							final Element row = root.addElement("column");
							row.addAttribute("name", key).addText(input.get(key));
							if (key.equals(identifier)) {
								row.addAttribute("isID", "true");
							}
						}

						return document.asXML();
					}
				});
			} catch (Exception e) {
				log.error("Error iterating csv lines", e);
				return null;
			}
		}
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see eu.dnetlib.data.collector.plugin.CollectorPlugin#collect(eu.dnetlib.data.collector.rmi.InterfaceDescriptor, java.lang.String,
	 * java.lang.String)
	 */
	@Override
	public Iterable<String> collect(final InterfaceDescriptor descriptor, final String fromDate, final String untilDate) throws CollectorServiceException {

		return new HTTPCSVIterator(descriptor);
	}

}
