package eu.dnetlib.data.collector.plugins.filesystem;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;

import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.google.common.base.Function;
import com.google.common.collect.Iterators;

import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;

/**
 * The Class FilesystemIterable.
 *
 * @author Sandro, Michele, Andrea
 */
public class FilesystemIterable implements Iterable<String> {

	/** The Constant log. */
	private static final Log log = LogFactory.getLog(FilesystemIterable.class);

	/** The base dir. */
	private File baseDir;

	/** The extensions. */
	private String extension;

	/**
	 * Instantiates a new filesystem iterable.
	 *
	 * @param descriptor
	 *            the descriptor
	 * @throws CollectorServiceException
	 *             the collector service exception
	 */
	public FilesystemIterable(final InterfaceDescriptor descriptor) throws CollectorServiceException {
		try {
			final String baseUrl = descriptor.getBaseUrl();
			URL basePath = new URL(baseUrl);
			this.baseDir = new File(basePath.getPath());
			if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
			this.extension = descriptor.getParams().get("extensions");
		} catch (MalformedURLException e) {
			throw new CollectorServiceException("Filesystem collector failed! ", e);
		}
	}

	/**
	 * {@inheritDoc}
	 *
	 * @see java.lang.Iterable#iterator()
	 */
	@Override
	public Iterator<String> iterator() {
		final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extension);
		return Iterators.transform(fsi, new Function<String, String>() {

			@Override
			public String apply(final String inputFileName) {
				FileInputStream fileInputStream = null;
				try {
					fileInputStream = new FileInputStream(inputFileName);
					final String s = IOUtils.toString(fileInputStream);
					return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
				} catch (Exception e) {
					log.error("Unable to read " + inputFileName);
					return "";
				} finally {
					if (fileInputStream != null) {
						try {
							fileInputStream.close();
						} catch (IOException e) {
							log.error("Unable to close inputstream for  " + inputFileName);
						}
					}
				}
			}
		});
	}
}
