package eu.dnetlib.iis.referenceextraction.dataset;

import eu.dnetlib.iis.core.java.HadoopContext;
import eu.dnetlib.iis.core.java.PortBindings;
import eu.dnetlib.iis.core.java.Process;
import eu.dnetlib.iis.core.java.ProcessUtils;
import eu.dnetlib.iis.core.java.io.CloseableIterator;
import eu.dnetlib.iis.core.java.io.DataStore;
import eu.dnetlib.iis.core.java.io.FileSystemPath;
import eu.dnetlib.iis.core.java.porttype.AvroPortType;
import eu.dnetlib.iis.core.java.porttype.PortType;
import eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet;
import java.util.HashMap;
import java.util.Map;
import org.apache.avro.file.DataFileWriter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

/* loaded from: input_file:eu/dnetlib/iis/referenceextraction/dataset/DocumentToDataSetConfidenceLevelBasedCleaner.class */
public class DocumentToDataSetConfidenceLevelBasedCleaner implements Process {
    public static final String CONFIDENCE_LEVEL_THRESHOLD = "export.document_to_dataset.confidence.level.threshold";
    private final Logger log = Logger.getLogger(getClass());
    private static final String inputPort = "input";
    private static final String outputPort = "output";

    public Map<String, PortType> getInputPorts() {
        return createInputPorts();
    }

    public Map<String, PortType> getOutputPorts() {
        return createOutputPorts();
    }

    private static HashMap<String, PortType> createInputPorts() {
        HashMap<String, PortType> hashMap = new HashMap<>();
        hashMap.put(inputPort, new AvroPortType(DocumentToDataSet.SCHEMA$));
        return hashMap;
    }

    private static HashMap<String, PortType> createOutputPorts() {
        HashMap<String, PortType> hashMap = new HashMap<>();
        hashMap.put(outputPort, new AvroPortType(DocumentToDataSet.SCHEMA$));
        return hashMap;
    }

    public void run(PortBindings portBindings, HadoopContext hadoopContext, Map<String, String> map) throws Exception {
        String parameterValue = ProcessUtils.getParameterValue(CONFIDENCE_LEVEL_THRESHOLD, hadoopContext.getConfiguration(), map);
        if (parameterValue == null || parameterValue.isEmpty()) {
            throw new RuntimeException("no confidence level threshold parameter provided: 'export.document_to_dataset.confidence.level.threshold'");
        }
        float parseFloat = Float.parseFloat(parameterValue);
        Map input = portBindings.getInput();
        Map output = portBindings.getOutput();
        FileSystem fileSystem = FileSystem.get(hadoopContext.getConfiguration());
        CloseableIterator reader = DataStore.getReader(new FileSystemPath(fileSystem, (Path) input.get(inputPort)));
        DataFileWriter create = DataStore.create(new FileSystemPath(fileSystem, (Path) output.get(outputPort)), DocumentToDataSet.SCHEMA$);
        while (reader.hasNext()) {
            try {
                DocumentToDataSet documentToDataSet = (DocumentToDataSet) reader.next();
                if (documentToDataSet.getConfidenceLevel() == null || documentToDataSet.getConfidenceLevel().floatValue() >= parseFloat) {
                    create.append(documentToDataSet);
                } else {
                    this.log.warn("skipping relation, confidence level below the threshold (" + parameterValue + "): " + documentToDataSet.toString());
                }
            } finally {
                reader.close();
                create.close();
            }
        }
    }
}
