package eu.dnetlib.msro.oai.workflows.nodes.hdfs;

import javax.annotation.Resource;
import javax.xml.ws.wsaddressing.W3CEndpointReference;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.googlecode.sarasvati.Arc;
import com.googlecode.sarasvati.NodeToken;

import eu.dnetlib.data.hadoop.config.ClusterName;
import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator;
import eu.dnetlib.data.hadoop.hdfs.SequenceFileUtils;
import eu.dnetlib.enabling.resultset.IterableResultSetFactory;
import eu.dnetlib.miscutils.collections.Pair;
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;

public class ReadHDFSFileJobNode extends SimpleJobNode {

	private static final Log log = LogFactory.getLog(ReadHDFSFileJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM

	private String hdfsFilePath;
	private String clusterName;
	// -1 means no limit
	private int limitRead;

	private String eprParam;

	@Resource
	private ConfigurationEnumerator configurationEnumerator;
	@Resource
	private IterableResultSetFactory iterableResultSetFactory;

	@Override
	protected String execute(final NodeToken token) throws Exception {
		ClusterName cluster = ClusterName.valueOf(clusterName);
		Configuration conf = this.configurationEnumerator.get(cluster);
		log.fatal("Got configuration");
		String propertyPath = this.getPropertyFetcher().getProps().getProperty(hdfsFilePath);
		Path hdfsPath = new Path(propertyPath);
		log.fatal("Got path at " + propertyPath);
		Iterable<Pair<Text, Text>> records = SequenceFileUtils.read(hdfsPath, conf, limitRead);
		log.fatal("Read SequenceFile, now starting transformation of iterable!");
		Iterable<String> recordsIterable = Iterables.transform(records, new Function<Pair<Text, Text>, String>() {

			@Override
			public String apply(final Pair<Text, Text> pair) {
				return pair.getValue().toString();
			}
		});
		log.fatal("Created Iterable of String");
		W3CEndpointReference resultSet = this.iterableResultSetFactory.createIterableResultSet(recordsIterable);
		log.fatal("Created W3CEndpointReference");
		token.getEnv().setAttribute(eprParam, resultSet.toString());
		return Arc.DEFAULT_ARC;
	}

	public String getHdfsFilePath() {
		return hdfsFilePath;
	}

	public void setHdfsFilePath(final String hdfsFilePath) {
		this.hdfsFilePath = hdfsFilePath;
	}

	public String getClusterName() {
		return clusterName;
	}

	public void setClusterName(final String clusterName) {
		this.clusterName = clusterName;
	}

	public int getLimitRead() {
		return limitRead;
	}

	public void setLimitRead(final int limitRead) {
		this.limitRead = limitRead;
	}

	public String getEprParam() {
		return eprParam;
	}

	public void setEprParam(final String eprParam) {
		this.eprParam = eprParam;
	}

	public ConfigurationEnumerator getConfigurationEnumerator() {
		return configurationEnumerator;
	}

	public void setConfigurationEnumerator(final ConfigurationEnumerator configurationEnumerator) {
		this.configurationEnumerator = configurationEnumerator;
	}

	public IterableResultSetFactory getIterableResultSetFactory() {
		return iterableResultSetFactory;
	}

	public void setIterableResultSetFactory(final IterableResultSetFactory iterableResultSetFactory) {
		this.iterableResultSetFactory = iterableResultSetFactory;
	}

}
