package eu.dnetlib.data.collector.plugins.archive.targz;

import java.util.Set;

import com.google.common.base.Splitter;
import com.google.common.collect.Sets;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
import eu.dnetlib.rmi.data.plugin.AbstractCollectorPlugin;
import org.apache.commons.lang3.StringUtils;

/**
 * Collector pluging for collecting a .tar.gz folder of records
 *
 * @author andrea
 */
public class TarGzCollectorPlugin extends AbstractCollectorPlugin {

	@Override
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
			throws CollectorServiceException {

		final String baseUrl = interfaceDescriptor.getBaseUrl();
		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
		final String extensions = interfaceDescriptor.getParams().get("extensions");
		Set<String> extensionsSet = Sets.newHashSet();
		if(StringUtils.isNotBlank(extensions)) extensionsSet = parseSet(extensions);
		return new TarGzIterable(interfaceDescriptor, extensionsSet);
	}

	private Set<String> parseSet(final String extensions) {
		return Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(extensions));
	}

}
