package eu.dnetlib.dhp.schema.oaf.utils;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.class */
public class CleaningFunctions {
    public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10.)";
    public static final String DOI_PREFIX = "10.";
    public static final String ORCID_PREFIX_REGEX = "^http(s?):\\/\\/orcid\\.org\\/";
    public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
    public static final Set<String> PID_BLACKLIST = new HashSet();
    public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
    public static final String TITLE_FILTER_REGEX = "[.*test.*\\W\\d]";
    public static final int TITLE_FILTER_RESIDUAL_LENGTH = 10;

    public static <T extends Oaf> T fixVocabularyNames(T t) {
        if (!(t instanceof Datasource) && !(t instanceof Project)) {
            if (t instanceof Organization) {
                Organization organization = (Organization) t;
                if (Objects.nonNull(organization.getCountry())) {
                    fixVocabName(organization.getCountry(), ModelConstants.DNET_COUNTRY_TYPE);
                }
            } else if (!(t instanceof Relation) && (t instanceof Result)) {
                Result result = (Result) t;
                fixVocabName(result.getLanguage(), ModelConstants.DNET_LANGUAGES);
                fixVocabName(result.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE);
                fixVocabName(result.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES);
                if (Objects.nonNull(result.getSubject())) {
                    result.getSubject().forEach(structuredProperty -> {
                        fixVocabName(structuredProperty.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES);
                    });
                }
                if (Objects.nonNull(result.getInstance())) {
                    for (Instance instance : result.getInstance()) {
                        fixVocabName(instance.getAccessright(), ModelConstants.DNET_ACCESS_MODES);
                        fixVocabName(instance.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS);
                    }
                }
                if (Objects.nonNull(result.getAuthor())) {
                    result.getAuthor().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).forEach(author -> {
                        if (Objects.nonNull(author.getPid())) {
                            author.getPid().stream().filter((v0) -> {
                                return Objects.nonNull(v0);
                            }).forEach(structuredProperty2 -> {
                                fixVocabName(structuredProperty2.getQualifier(), ModelConstants.DNET_PID_TYPES);
                            });
                        }
                    });
                }
                if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
                }
            }
        }
        return t;
    }

    public static <T extends Oaf> boolean filter(T t) {
        if ((t instanceof Datasource) || (t instanceof Project) || (t instanceof Organization) || (t instanceof Relation) || !(t instanceof Result)) {
            return true;
        }
        Result result = (Result) t;
        if (Objects.nonNull(result.getTitle()) && result.getTitle().isEmpty()) {
            return false;
        }
        if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
        }
        return true;
    }

    public static <T extends Oaf> T cleanup(T t) {
        if (!(t instanceof Datasource) && !(t instanceof Project)) {
            if (t instanceof Organization) {
                Organization organization = (Organization) t;
                if (Objects.isNull(organization.getCountry()) || StringUtils.isBlank(organization.getCountry().getClassid())) {
                    organization.setCountry(ModelConstants.UNKNOWN_COUNTRY);
                }
            } else if (!(t instanceof Relation) && (t instanceof Result)) {
                Result result = (Result) t;
                if (Objects.nonNull(result.getPublisher()) && StringUtils.isBlank(result.getPublisher().getValue())) {
                    result.setPublisher(null);
                }
                if (Objects.isNull(result.getLanguage()) || StringUtils.isBlank(result.getLanguage().getClassid())) {
                    result.setLanguage(qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
                }
                if (Objects.nonNull(result.getSubject())) {
                    result.setSubject((List) result.getSubject().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(structuredProperty -> {
                        return StringUtils.isNotBlank(structuredProperty.getValue());
                    }).filter(structuredProperty2 -> {
                        return Objects.nonNull(structuredProperty2.getQualifier());
                    }).filter(structuredProperty3 -> {
                        return StringUtils.isNotBlank(structuredProperty3.getQualifier().getClassid());
                    }).map(CleaningFunctions::cleanValue).collect(Collectors.toList()));
                }
                if (Objects.nonNull(result.getTitle())) {
                    result.setTitle((List) result.getTitle().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(structuredProperty4 -> {
                        return StringUtils.isNotBlank(structuredProperty4.getValue());
                    }).filter(structuredProperty5 -> {
                        return structuredProperty5.getValue().toLowerCase().replaceAll(TITLE_FILTER_REGEX, "").length() > 10;
                    }).map(CleaningFunctions::cleanValue).collect(Collectors.toList()));
                }
                if (Objects.nonNull(result.getDescription())) {
                    result.setDescription((List) result.getDescription().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(field -> {
                        return StringUtils.isNotBlank((CharSequence) field.getValue());
                    }).map(CleaningFunctions::cleanValue).collect(Collectors.toList()));
                }
                if (Objects.nonNull(result.getPid())) {
                    result.setPid(processPidCleaning(result.getPid()));
                }
                if (Objects.isNull(result.getResourcetype()) || StringUtils.isBlank(result.getResourcetype().getClassid())) {
                    result.setResourcetype(qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
                }
                if (Objects.nonNull(result.getInstance())) {
                    for (Instance instance : result.getInstance()) {
                        Optional.ofNullable(instance.getPid()).ifPresent(list -> {
                            Set set = (Set) list.stream().filter((v0) -> {
                                return Objects.nonNull(v0);
                            }).filter(structuredProperty6 -> {
                                return StringUtils.isNotBlank(structuredProperty6.getValue());
                            }).collect(Collectors.toCollection(HashSet::new));
                            Optional.ofNullable(instance.getAlternateIdentifier()).ifPresent(list -> {
                                instance.setAlternateIdentifier(Lists.newArrayList(Sets.difference((Set) list.stream().filter((v0) -> {
                                    return Objects.nonNull(v0);
                                }).filter(structuredProperty7 -> {
                                    return StringUtils.isNotBlank(structuredProperty7.getValue());
                                }).collect(Collectors.toCollection(HashSet::new)), set)));
                            });
                        });
                        if (Objects.isNull(instance.getAccessright()) || StringUtils.isBlank(instance.getAccessright().getClassid())) {
                            instance.setAccessright(accessRight(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES));
                        }
                        if (Objects.isNull(instance.getHostedby()) || StringUtils.isBlank(instance.getHostedby().getKey())) {
                            instance.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
                        }
                        if (Objects.isNull(instance.getRefereed())) {
                            instance.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
                        }
                    }
                }
                if (Objects.isNull(result.getBestaccessright()) || StringUtils.isBlank(result.getBestaccessright().getClassid())) {
                    Qualifier createBestAccessRights = OafMapperUtils.createBestAccessRights(result.getInstance());
                    if (Objects.isNull(createBestAccessRights)) {
                        result.setBestaccessright(qualifier(ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE, ModelConstants.DNET_ACCESS_MODES));
                    } else {
                        result.setBestaccessright(createBestAccessRights);
                    }
                }
                if (Objects.nonNull(result.getAuthor())) {
                    ArrayList newArrayList = Lists.newArrayList();
                    for (Author author : result.getAuthor()) {
                        if (Objects.isNull(author.getPid())) {
                            author.setPid(Lists.newArrayList());
                        } else {
                            author.setPid((List) ((LinkedHashMap) author.getPid().stream().filter((v0) -> {
                                return Objects.nonNull(v0);
                            }).filter(structuredProperty6 -> {
                                return Objects.nonNull(structuredProperty6.getQualifier());
                            }).filter(structuredProperty7 -> {
                                return StringUtils.isNotBlank(structuredProperty7.getValue());
                            }).map(structuredProperty8 -> {
                                structuredProperty8.setValue(structuredProperty8.getValue().trim().replaceAll(ORCID_PREFIX_REGEX, ""));
                                return structuredProperty8;
                            }).filter(structuredProperty9 -> {
                                return StringUtils.isNotBlank(structuredProperty9.getValue());
                            }).collect(Collectors.toMap((v0) -> {
                                return v0.getValue();
                            }, Function.identity(), (structuredProperty10, structuredProperty11) -> {
                                return structuredProperty10;
                            }, LinkedHashMap::new))).values().stream().collect(Collectors.toList()));
                        }
                        if (StringUtils.isBlank(author.getFullname()) && StringUtils.isNotBlank(author.getName()) && StringUtils.isNotBlank(author.getSurname())) {
                            author.setFullname(author.getSurname() + ", " + author.getName());
                        }
                        if (StringUtils.isNotBlank(author.getFullname()) && isValidAuthorName(author)) {
                            newArrayList.add(author);
                        }
                    }
                    if (newArrayList.stream().anyMatch(author2 -> {
                        return Objects.isNull(author2.getRank());
                    })) {
                        int i = 1;
                        Iterator<Author> it = newArrayList.iterator();
                        while (it.hasNext()) {
                            int i2 = i;
                            i++;
                            it.next().setRank(Integer.valueOf(i2));
                        }
                    }
                    result.setAuthor(newArrayList);
                }
                if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
                }
            }
        }
        return t;
    }

    private static boolean isValidAuthorName(Author author) {
        return !((String) Stream.of((Object[]) new String[]{author.getFullname(), author.getName(), author.getSurname()}).filter(str -> {
            return (str == null || str.isEmpty()) ? false : true;
        }).collect(Collectors.joining(""))).toLowerCase().matches(INVALID_AUTHOR_REGEX);
    }

    private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> list) {
        return (List) list.stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).filter(structuredProperty -> {
            return StringUtils.isNotBlank(StringUtils.trim(structuredProperty.getValue()));
        }).filter(structuredProperty2 -> {
            return !PID_BLACKLIST.contains(structuredProperty2.getValue().trim().toLowerCase());
        }).filter(structuredProperty3 -> {
            return Objects.nonNull(structuredProperty3.getQualifier());
        }).filter(structuredProperty4 -> {
            return StringUtils.isNotBlank(structuredProperty4.getQualifier().getClassid());
        }).map(CleaningFunctions::normalizePidValue).filter(CleaningFunctions::pidFilter).collect(Collectors.toList());
    }

    protected static StructuredProperty cleanValue(StructuredProperty structuredProperty) {
        structuredProperty.setValue(structuredProperty.getValue().replaceAll(CLEANING_REGEX, " "));
        return structuredProperty;
    }

    protected static Field<String> cleanValue(Field<String> field) {
        field.setValue(field.getValue().replaceAll(CLEANING_REGEX, " "));
        return field;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void fixVocabName(Qualifier qualifier, String str) {
        if (Objects.nonNull(qualifier) && StringUtils.isBlank(qualifier.getSchemeid())) {
            qualifier.setSchemeid(str);
            qualifier.setSchemename(str);
        }
    }

    private static AccessRight accessRight(String str, String str2, String str3) {
        return OafMapperUtils.accessRight(str, str2, str3, str3);
    }

    private static Qualifier qualifier(String str, String str2, String str3) {
        return OafMapperUtils.qualifier(str, str2, str3, str3);
    }

    public static boolean pidFilter(StructuredProperty structuredProperty) {
        String value = structuredProperty.getValue();
        return (Objects.isNull(structuredProperty.getQualifier()) || StringUtils.isBlank(value) || StringUtils.isBlank(value.replaceAll("(?:\\n|\\r|\\t|\\s)", "")) || PID_BLACKLIST.contains(value) || PidBlacklistProvider.getBlacklist(structuredProperty.getQualifier().getClassid()).contains(value)) ? false : true;
    }

    public static StructuredProperty normalizePidValue(StructuredProperty structuredProperty) {
        String str = (String) Optional.ofNullable(structuredProperty.getValue()).map((v0) -> {
            return v0.trim();
        }).orElseThrow(() -> {
            return new IllegalArgumentException("PID value cannot be empty");
        });
        String classid = structuredProperty.getQualifier().getClassid();
        boolean z = -1;
        switch (classid.hashCode()) {
            case 99646:
                if (classid.equals("doi")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                structuredProperty.setValue(str.toLowerCase().replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX));
                break;
        }
        return structuredProperty;
    }

    static {
        PID_BLACKLIST.add("none");
        PID_BLACKLIST.add("na");
    }
}
