package eu.dnetlib.data.cleaner;

import java.io.StringReader;
import java.util.*;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
import eu.dnetlib.rmi.data.CleanerException;
import eu.dnetlib.rmi.enabling.ISLookUpException;
import eu.dnetlib.rmi.enabling.ISLookUpService;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.springframework.beans.factory.annotation.Autowired;

public class CleaningRuleFactory {

	@Autowired
	private UniqueServiceLocator serviceLocator;

	public CleaningRule obtainCleaningRule(final String ruleId) throws CleanerException {
		try {
			final String prof = this.serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
					"/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");

			final SAXReader reader = new SAXReader();
			final Document doc = reader.read(new StringReader(prof));

			final CleaningRule rule = new CleaningRule();

			final ISLookUpService lookup = this.serviceLocator.getService(ISLookUpService.class);
			Map<String, String> namespaceUris = Maps.newHashMap();

			for (final Object o : doc.selectNodes("//NAMESPACE")) {
				final Element node = (Element) o;
				final String ns = node.valueOf("@ns");
				final String uri = node.valueOf("@uri");
				namespaceUris.put(ns, uri);
			}

			for (final Object o : doc.selectNodes("//RULE")) {
				final Element node = (Element) o;

				final String xpath = node.valueOf("@xpath");
				final String vocabularies = node.valueOf("@vocabularies");
				final String groovyRule = node.valueOf("@groovy");
				final String strict = node.valueOf("@strict");

				final XPATHCleaningRule xpathRule;
				if (vocabularies != null && vocabularies.length() > 0) {
					final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
					xpathRule = new VocabularyRule(list, lookup);
				} else {
					xpathRule = new GroovyRule(groovyRule);
				}
				xpathRule.setXpath(xpath);
				xpathRule.setStrict("true".equals(strict));
				xpathRule.setNamesapceMap(namespaceUris);
				rule.getXpathRules().add(xpathRule);
			}
			return rule;
		} catch (final Exception e) {
			throw new CleanerException("Error obtaing cleaner rule " + ruleId, e);
		}
	}

	public List<String> getRuleIds() throws CleanerException {
		try {
			final HashSet<String> response = new HashSet<String>();

			final List<String> list = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile("//CLEANER_NAME");
			if (list != null) {
				response.addAll(list);
			}

			return Lists.newArrayList(response);
		} catch (final ISLookUpException e) {
			throw new CleanerException("Error obtaining IDs of cleaner DSs", e);
		}
	}

}
