package eu.dnetlib.pace.util;

import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.config.WfConfig;
import eu.dnetlib.pace.tree.support.TreeProcessor;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType;

/* loaded from: input_file:eu/dnetlib/pace/util/BlockProcessor.class */
public class BlockProcessor {
    public static final List<String> accumulators = new ArrayList();
    private static final Log log = LogFactory.getLog(BlockProcessor.class);
    private DedupConfig dedupConf;
    private final int identifierFieldPos;
    private final int orderFieldPos;

    public static void constructAccumulator(DedupConfig dedupConfig) {
        accumulators.add(String.format("%s::%s", dedupConfig.getWf().getEntityType(), "records per hash key = 1"));
        accumulators.add(String.format("%s::%s", dedupConfig.getWf().getEntityType(), "missing " + dedupConfig.getWf().getOrderField()));
        accumulators.add(String.format("%s::%s", dedupConfig.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConfig.getWf().getOrderField(), Integer.valueOf(dedupConfig.getWf().getGroupMaxSize()))));
        accumulators.add(String.format("%s::%s", dedupConfig.getWf().getEntityType(), "skip list"));
        accumulators.add(String.format("%s::%s", dedupConfig.getWf().getEntityType(), "dedupSimilarity (x2)"));
        accumulators.add(String.format("%s::%s", dedupConfig.getWf().getEntityType(), "d < " + dedupConfig.getWf().getThreshold()));
    }

    public BlockProcessor(DedupConfig dedupConfig, int i, int i2) {
        this.dedupConf = dedupConfig;
        this.identifierFieldPos = i;
        this.orderFieldPos = i2;
    }

    public void processSortedRows(List<Row> list, Reporter reporter) {
        if (list.size() > 1) {
            processRows(list, reporter);
        } else {
            reporter.incrementCounter(this.dedupConf.getWf().getEntityType(), "records per hash key = 1", 1L);
        }
    }

    private void processRows(List<Row> list, Reporter reporter) {
        IncrementalConnectedComponents incrementalConnectedComponents = new IncrementalConnectedComponents(list.size());
        for (int i = 0; i < list.size(); i++) {
            Row row = list.get(i);
            String string = row.getString(this.identifierFieldPos);
            Object javaValue = getJavaValue(row, this.orderFieldPos);
            String obj = javaValue == null ? "" : javaValue.toString();
            WfConfig wf = this.dedupConf.getWf();
            if (obj != null) {
                int nextUnconnected = incrementalConnectedComponents.nextUnconnected(i, i + 1);
                while (true) {
                    int i2 = nextUnconnected;
                    if (i2 >= 0 && i2 < list.size()) {
                        Row row2 = list.get(i2);
                        String string2 = row2.getString(this.identifierFieldPos);
                        if (mustSkip(string2)) {
                            reporter.incrementCounter(wf.getEntityType(), "skip list", 1L);
                            break;
                        }
                        if (wf.getSlidingWindowSize() <= 0 || i2 - i <= wf.getSlidingWindowSize()) {
                            Object javaValue2 = getJavaValue(row2, this.orderFieldPos);
                            String obj2 = javaValue2 == null ? null : javaValue2.toString();
                            if (!string2.equals(string) && obj2 != null && emitOutput(new TreeProcessor(this.dedupConf).compare(row, row2), string, string2, reporter)) {
                                incrementalConnectedComponents.connect(i, i2);
                            }
                            nextUnconnected = incrementalConnectedComponents.nextUnconnected(i, i2 + 1);
                        }
                    }
                }
            }
        }
    }

    public Object getJavaValue(Row row, int i) {
        DataType dataType = row.schema().fields()[i].dataType();
        if (dataType instanceof StringType) {
            return row.getString(i);
        }
        if (dataType instanceof ArrayType) {
            return row.getList(i);
        }
        return null;
    }

    private boolean emitOutput(boolean z, String str, String str2, Reporter reporter) {
        if (z) {
            if (str.compareTo(str2) <= 0) {
                writeSimilarity(reporter, str, str2);
            } else {
                writeSimilarity(reporter, str2, str);
            }
            reporter.incrementCounter(this.dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)", 1L);
        } else {
            reporter.incrementCounter(this.dedupConf.getWf().getEntityType(), "d < " + this.dedupConf.getWf().getThreshold(), 1L);
        }
        return z;
    }

    private boolean mustSkip(String str) {
        return this.dedupConf.getWf().getSkipList().contains(getNsPrefix(str));
    }

    private String getNsPrefix(String str) {
        return StringUtils.substringBetween(str, IdentifierFactory.ID_PREFIX_SEPARATOR, "::");
    }

    private void writeSimilarity(Reporter reporter, String str, String str2) {
        reporter.emit(this.dedupConf.getWf().getEntityType(), str, str2);
    }
}
