package eu.dnetlib.pace.clustering;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Document;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument;

public class BlacklistAwareClusteringCombiner extends ClusteringCombiner {

	public static Collection<String> filterAndCombine(MapDocument a, Config conf, Map<String, Set<String>> blacklists) {
		
		Document filtered = new BlacklistAwareClusteringCombiner().filter(a, blacklists);
		return combine(filtered, conf);
	}

	private MapDocument filter(final MapDocument a, final Map<String, Set<String>> blacklists) {
		final Map<String, List<Field>> filtered = Maps.newHashMap(a.getFieldMap());
		if (blacklists != null) {
			for(final Entry<String, List<Field>> e : filtered.entrySet()) {
				filtered.put(e.getKey(), Lists.newArrayList(Iterables.filter(e.getValue(), new FieldFilter(e.getKey(), blacklists))));
			}
		}
		return new MapDocument(a.getIdentifier(), filtered);
	}

	/**
	 * Tries to match the fields in the regex blacklist.
	 * 
	 * @param fieldName
	 * @param value
	 * @return true if the field matches, false otherwise
	 */
	protected boolean regexMatches(String fieldName, String value, Map<String, Set<String>> blacklists) {
		if (blacklists.containsKey(fieldName)) {
			for (final String regex : blacklists.get(fieldName)) {
				if (value.matches(regex)) {
					return true;
				}
			}
		}
		return false;
	}
}
