package eu.dnetlib.msro.openaireplus.workflows.nodes.objectStore;

import java.io.File;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import eu.dnetlib.data.objectstore.rmi.ObjectStoreFile;
import eu.dnetlib.data.objectstore.rmi.Protocols;
import eu.dnetlib.data.transform.xml.AbstractDNetOafXsltFunctions;

public class ArxivIteratorRepository extends IteratorRepository {

	private Pattern pattern;

	public ArxivIteratorRepository(final Iterable<File> input, final String repositoryPrefix, final String oaiPrefix) {
		super(input, repositoryPrefix, oaiPrefix);
		pattern = Pattern.compile("[0-9]");

	}

	@Override
	String generateNextElement() {
		try {
			String inputname = input.next().getCanonicalPath();
			String[] values = inputname.split("/");
			if (values.length > 1) {
				ObjectStoreFile info = new ObjectStoreFile();
				String fileName = values[values.length - 1].replace(".pdf", "");
				Matcher matcher = pattern.matcher(fileName);
				if (matcher.find()) {
					if (matcher.start() != 0) {
						fileName = fileName.substring(0, matcher.start()) + "/" + fileName.substring(matcher.start());
					}
				}
				info.setDownloadedURL("http://arxiv.org/abs/" + fileName);
				info.setAccessProtocol(Protocols.None);
				// String value = "oai:arXiv.org:" + fileName;
				String value = oaiPrefix + fileName;
				String resultID = repositoryPrefix + "::" + AbstractDNetOafXsltFunctions.md5(value);
				info.setObjectID(resultID + "::" + AbstractDNetOafXsltFunctions.md5(info.getDownloadedURL()));
				info.setMimeType("pdf");
				info.setURI(inputname);
				return info.toJSON();
			}
		} catch (Exception e) {
			return null;
		}
		return null;
	}

}
