package eu.dnetlib.functionality.modular.ui.dedup;

import java.io.StringReader;
import java.util.List;
import java.util.Set;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

import com.google.common.base.Function;
import com.google.common.collect.Sets;

import eu.dnetlib.data.proto.TypeProtos.Type;

public class SimilarityGroupFunction implements Function<String, SimilarityGroup> {

	@Override
	public SimilarityGroup apply(String s) {
		try {
			final Document d = new SAXReader().read(new StringReader(s));

			final String groupid = d.valueOf("//FIELD[@name = 'id']");
			final String date = d.valueOf("//FIELD[@name = 'date']");
			final String type = d.valueOf("//FIELD[@name = 'entitytype']");

			final Type pType = Type.valueOf(type);

			final EntityType entityType = new EntityType(String.valueOf(pType.getNumber()), pType.toString(), label(pType.toString()));
			final SimilarityGroup simEntry = new SimilarityGroup(groupid, date, entityType);

			final List<?> items = d.selectNodes("//FIELD[@name='group']/ITEM");
			final Set<String> group = Sets.newHashSet();
			for (final Object id : items) {
				final String objid = ((Node) id).getText();
				group.add(objid);
			}

			simEntry.setGroup(group);

			return simEntry;
		} catch (final DocumentException e) {
			throw new RuntimeException("invalid xml: " + s);
		}
	}

	private String label(String s) {
		if (s.equals("result")) return "Publication";
		else if (s.equals("organization")) return "Organization";
		else return "Unknown";
	}
}
