package eu.dnetlib.data.mapreduce.hbase.dedup;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.mapreduce.util.OafHbaseUtils;
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
import eu.dnetlib.data.proto.DedupProtos;
import eu.dnetlib.data.proto.KindProtos;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.data.transform.OafUtils;
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
import eu.dnetlib.pace.config.DedupConfig;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;

/* loaded from: input_file:WEB-INF/lib/dnet-mapreduce-jobs-1.1.11-solr75-20190219.103101-153.jar:eu/dnetlib/data/mapreduce/hbase/dedup/SimpleDedupPersonReducer.class */
public class SimpleDedupPersonReducer extends TableReducer<Text, ImmutableBytesWritable, ImmutableBytesWritable> {
    private static final int MAX_Q_SIZE = 3000;
    private DedupConfig dedupConf;

    public static String findMin(Iterable<String> iterable) {
        String str = (String) Iterables.getFirst(iterable, null);
        for (String str2 : iterable) {
            if (str.compareTo(str2) > 0) {
                str = str2;
            }
        }
        return str;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.hadoop.mapreduce.Reducer
    public void setup(Reducer<Text, ImmutableBytesWritable, ImmutableBytesWritable, Writable>.Context context) throws IOException, InterruptedException {
        this.dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
    }

    protected void reduce(Text text, Iterable<ImmutableBytesWritable> iterable, Reducer<Text, ImmutableBytesWritable, ImmutableBytesWritable, Writable>.Context context) throws IOException, InterruptedException {
        try {
            Queue<OafDecoder> prepare = prepare(text, iterable, context);
            if (prepare.size() > 1) {
                if (prepare.size() < 20) {
                    context.getCounter(this.dedupConf.getWf().getEntityType() + " root group size", lpad(prepare.size())).increment(1L);
                } else {
                    context.getCounter(this.dedupConf.getWf().getEntityType() + " root group size", "> 20").increment(1L);
                }
                String findMin = findMin(Iterables.transform(prepare, OafUtils.idDecoder()));
                if (findMin == null) {
                    context.getCounter(this.dedupConf.getWf().getEntityType(), "unable to find min").increment(1L);
                } else {
                    String newId = DedupUtils.newId(findMin, this.dedupConf.getWf().getDedupRun());
                    while (!prepare.isEmpty()) {
                        markDuplicate(context, newId, prepare.remove());
                    }
                }
            } else {
                context.getCounter(this.dedupConf.getWf().getEntityType(), "1").increment(1L);
            }
        } catch (Throwable th) {
            System.out.println("GOT EX " + th);
            th.printStackTrace(System.err);
            context.getCounter(this.dedupConf.getWf().getEntityType(), th.getClass().toString()).increment(1L);
        }
    }

    private Queue<OafDecoder> prepare(Text text, Iterable<ImmutableBytesWritable> iterable, Reducer<Text, ImmutableBytesWritable, ImmutableBytesWritable, Writable>.Context context) {
        LinkedList newLinkedList = Lists.newLinkedList();
        Iterator it = Iterables.transform(iterable, OafHbaseUtils.decoder()).iterator();
        while (it.hasNext()) {
            newLinkedList.add((OafDecoder) it.next());
            if (newLinkedList.size() >= 3000) {
                break;
            }
        }
        return newLinkedList;
    }

    private void markDuplicate(Reducer<Text, ImmutableBytesWritable, ImmutableBytesWritable, Writable>.Context context, String str, OafDecoder oafDecoder) throws InvalidProtocolBufferException, IOException, InterruptedException {
        OafProtos.Oaf.Builder newBuilder = OafProtos.Oaf.newBuilder(oafDecoder.getOaf());
        newBuilder.getDataInfoBuilder().setDeletedbyinference(true).setInferenceprovenance(this.dedupConf.getWf().getConfigurationId());
        OafProtos.Oaf build = newBuilder.build();
        byte[] bytes = Bytes.toBytes(build.getEntity().getId());
        String entityType = this.dedupConf.getWf().getEntityType();
        emit(context, bytes, entityType, DedupUtils.BODY_B, build.toByteArray());
        context.getCounter(entityType, "marked as deleted").increment(1L);
        TypeProtos.Type valueOf = TypeProtos.Type.valueOf(entityType);
        byte[] bytes2 = Bytes.toBytes(str);
        String dedupCF_merges = DedupUtils.getDedupCF_merges(valueOf);
        emit(context, bytes2, dedupCF_merges, bytes, buildRel(bytes2, bytes, DedupProtos.Dedup.RelName.merges));
        String dedupCF_mergedIn = DedupUtils.getDedupCF_mergedIn(valueOf);
        emit(context, bytes, dedupCF_mergedIn, bytes2, buildRel(bytes, bytes2, DedupProtos.Dedup.RelName.isMergedIn));
        context.getCounter(entityType, dedupCF_merges).increment(1L);
        context.getCounter(entityType, dedupCF_mergedIn).increment(1L);
    }

    private void emit(Reducer<Text, ImmutableBytesWritable, ImmutableBytesWritable, Writable>.Context context, byte[] bArr, String str, byte[] bArr2, byte[] bArr3) throws IOException, InterruptedException {
        Put put = new Put(OafRowKeyDecoder.decode(bArr).getKey().getBytes());
        put.setWriteToWAL(true);
        put.add(Bytes.toBytes(str), bArr2, bArr3);
        context.write(new ImmutableBytesWritable(bArr), put);
    }

    private byte[] buildRel(byte[] bArr, byte[] bArr2, DedupProtos.Dedup.RelName relName) {
        return OafProtos.Oaf.newBuilder().setKind(KindProtos.Kind.relation).setLastupdatetimestamp(System.currentTimeMillis()).setDataInfo(AbstractDNetXsltFunctions.getDataInfo(null, "", "0.8", false, true).setInferenceprovenance(this.dedupConf.getWf().getConfigurationId())).setRel(DedupUtils.getDedup(this.dedupConf, new String(bArr), new String(bArr2), relName)).build().toByteArray();
    }

    private String lpad(int i) {
        return StringUtils.leftPad(String.valueOf(i), String.valueOf(3000).length());
    }

    @Override // org.apache.hadoop.mapreduce.Reducer
    protected /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
        reduce((Text) obj, (Iterable<ImmutableBytesWritable>) iterable, (Reducer<Text, ImmutableBytesWritable, ImmutableBytesWritable, Writable>.Context) context);
    }
}
