package eu.dnetlib.data.transform;

import com.wcohen.ss.JaroWinkler;
import eu.dnetlib.data.bulktag.Pair;
import eu.dnetlib.data.proto.FieldTypeProtos;
import eu.dnetlib.pace.model.Person;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.util.Strings;

/* loaded from: input_file:WEB-INF/lib/dnet-openaireplus-mapping-utils-7.0.1.jar:eu/dnetlib/data/transform/AuthorMerger.class */
public class AuthorMerger {
    private static final Double THRESHOLD = Double.valueOf(0.95d);
    private static final String ORCID = "orcid";
    private static final int MAX_AUTHORS = 200;

    public static List<FieldTypeProtos.Author> merge(Collection<List<FieldTypeProtos.Author>> collection, double d) {
        return merge(collection, THRESHOLD.doubleValue());
    }

    public static List<FieldTypeProtos.Author> merge(Collection<List<FieldTypeProtos.Author>> collection) {
        return doMerge((Collection) collection.stream().map(list -> {
            return (List) list.stream().map(AuthorMerger::fixORCID).collect(Collectors.toList());
        }).collect(Collectors.toList()));
    }

    private static List<FieldTypeProtos.Author> doMerge(Collection<List<FieldTypeProtos.Author>> collection) {
        ArrayList arrayList = new ArrayList();
        if (collection.isEmpty()) {
            return arrayList;
        }
        if (collection.size() == 1) {
            return collection.iterator().next();
        }
        TreeMap treeMap = new TreeMap((Map) ((Map) collection.stream().collect(Collectors.groupingBy(AuthorMerger::countOrcid))).entrySet().stream().filter(entry -> {
            return ((Integer) entry.getKey()).intValue() > 0;
        }).collect(Collectors.toMap((v0) -> {
            return v0.getKey();
        }, (v0) -> {
            return v0.getValue();
        })));
        if (treeMap == null || treeMap.isEmpty()) {
            return collection.iterator().next();
        }
        Map.Entry lastEntry = treeMap.lastEntry();
        if (((Integer) lastEntry.getKey()).intValue() > 0) {
            List list = (List) ((List) lastEntry.getValue()).iterator().next();
            arrayList.addAll((Collection) ((List) ((List) lastEntry.getValue()).iterator().next()).stream().filter(author -> {
                return hasOrcid(author);
            }).collect(Collectors.toList()));
            if (list.size() == arrayList.size()) {
                return arrayList;
            }
            Collection values = ((Map) collection.stream().filter(list2 -> {
                return !list2.equals(list);
            }).flatMap((v0) -> {
                return v0.stream();
            }).filter(author2 -> {
                return hasOrcid(author2);
            }).limit(200L).map(author3 -> {
                return new Pair(author3.getPidList().stream().filter(keyValue -> {
                    return keyValue.getKey().equalsIgnoreCase("orcid");
                }).findFirst().get().getValue(), author3);
            }).collect(Collectors.toMap(pair -> {
                return (String) pair.getFst();
            }, pair2 -> {
                return (FieldTypeProtos.Author) pair2.getSnd();
            }, (author4, author5) -> {
                return author5;
            }))).values();
            list.stream().filter(author6 -> {
                return !hasOrcid(author6);
            }).forEach(author7 -> {
                FieldTypeProtos.Author.Builder newBuilder = FieldTypeProtos.Author.newBuilder(author7);
                values.parallelStream().map(author7 -> {
                    return new Pair(sim(author7, author7), author7);
                }).filter(pair3 -> {
                    return ((Double) pair3.getFst()).doubleValue() >= THRESHOLD.doubleValue();
                }).forEach(pair4 -> {
                    newBuilder.mergeFrom((FieldTypeProtos.Author) pair4.getSnd());
                });
                Collection values2 = ((Map) newBuilder.getPidList().stream().collect(Collectors.toMap(keyValue -> {
                    return keyValue.getKey();
                }, Function.identity(), (keyValue2, keyValue3) -> {
                    return keyValue3;
                }))).values();
                newBuilder.clearPid();
                newBuilder.addAllPid(values2);
                arrayList.add(newBuilder.build());
            });
        }
        return arrayList;
    }

    private static FieldTypeProtos.Author fixORCID(FieldTypeProtos.Author author) {
        FieldTypeProtos.Author.Builder newBuilder = FieldTypeProtos.Author.newBuilder(author);
        for (FieldTypeProtos.KeyValue.Builder builder : newBuilder.getPidBuilderList()) {
            if (builder.getKey().toLowerCase().contains("orcid")) {
                builder.setKey("ORCID");
                if (builder.getValue().contains("orcid.org")) {
                    builder.setValue(StringUtils.substringAfterLast(builder.getValue(), "/"));
                }
            }
        }
        return newBuilder.build();
    }

    private static int countOrcid(List<FieldTypeProtos.Author> list) {
        return list.stream().map(author -> {
            return Integer.valueOf(hasOrcid(author) ? 1 : 0);
        }).mapToInt((v0) -> {
            return v0.intValue();
        }).sum();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean hasOrcid(FieldTypeProtos.Author author) {
        return author.getPidList().stream().anyMatch(keyValue -> {
            return keyValue.getKey().equalsIgnoreCase("orcid");
        });
    }

    private static Double sim(FieldTypeProtos.Author author, FieldTypeProtos.Author author2) {
        Person parse = parse(author);
        Person parse2 = parse(author2);
        return parse.isAccurate() & parse2.isAccurate() ? Double.valueOf(new JaroWinkler().score(normalize(parse.getSurnameString()), normalize(parse2.getSurnameString()))) : Double.valueOf(new JaroWinkler().score(normalize(parse.getNormalisedFullname()), normalize(parse2.getNormalisedFullname())));
    }

    private static Person parse(FieldTypeProtos.Author author) {
        return author.hasSurname() ? new Person(author.getSurname() + Strings.DEFAULT_KEYVALUE_SEPARATOR + author.getName(), false) : new Person(author.getFullname(), false);
    }

    private static String normalize(String str) {
        return nfd(str).toLowerCase().replaceAll("(\\W)+", " ").replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ").replaceAll("(\\p{Punct})+", " ").replaceAll("(\\d)+", " ").replaceAll("(\\n)+", " ").trim();
    }

    private static String nfd(String str) {
        return Normalizer.normalize(str, Normalizer.Form.NFD);
    }
}
