package eu.dnetlib.dhp.oa.graph.raw;

import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;

/* loaded from: input_file:eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.class */
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
    public static final String HTTP_DOI_PREIFX = "https://doi.org/";
    public static final String HTTP_HANDLE_PREIFX = "https://hdl.handle.net/";

    public OdfToOafMapper(VocabularyGroup vocabularyGroup, boolean z, boolean z2, boolean z3) {
        super(vocabularyGroup, z, z2, z3);
    }

    public OdfToOafMapper(VocabularyGroup vocabularyGroup, boolean z, boolean z2) {
        super(vocabularyGroup, z, z2);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<StructuredProperty> prepareTitles(Document document, DataInfo dataInfo) {
        ArrayList newArrayList = Lists.newArrayList();
        for (Element element : document.selectNodes("//*[local-name()='titles']/*[local-name()='title']|//*[local-name()='resource']/*[local-name()='title']")) {
            String textTrim = element.getTextTrim();
            String attributeValue = element.attributeValue("titleType");
            if (StringUtils.isNotBlank(attributeValue)) {
                newArrayList.add(OafMapperUtils.structuredProperty(textTrim, attributeValue, attributeValue, "dnet:dataCite_title", "dnet:dataCite_title", dataInfo));
            } else {
                newArrayList.add(OafMapperUtils.structuredProperty(textTrim, ModelConstants.MAIN_TITLE_QUALIFIER, dataInfo));
            }
        }
        return newArrayList;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Author> prepareAuthors(Document document, DataInfo dataInfo) {
        ArrayList arrayList = new ArrayList();
        int i = 1;
        for (Node node : document.selectNodes("//*[local-name()='creator']")) {
            Author author = new Author();
            String valueOf = node.valueOf("./*[local-name()='creatorName']");
            String valueOf2 = node.valueOf("./*[local-name()='givenName']");
            String valueOf3 = node.valueOf("./*[local-name()='familyName']");
            if (StringUtils.isNotBlank(valueOf) || StringUtils.isNotBlank(valueOf2) || StringUtils.isNotBlank(valueOf3)) {
                author.setFullname(valueOf);
                PacePerson pacePerson = new PacePerson(valueOf, false);
                if (StringUtils.isBlank(valueOf2) && pacePerson.isAccurate()) {
                    author.setName(pacePerson.getNormalisedFirstName());
                } else {
                    author.setName(valueOf2);
                }
                if (StringUtils.isBlank(valueOf3) && pacePerson.isAccurate()) {
                    author.setSurname(pacePerson.getNormalisedSurname());
                } else {
                    author.setSurname(valueOf3);
                }
                if (StringUtils.isBlank(author.getFullname())) {
                    author.setFullname(String.format("%s, %s", author.getSurname(), author.getName()));
                }
                author.setAffiliation(prepareListFields(node, "./*[local-name()='affiliation']", dataInfo));
                author.setPid(preparePids(node, dataInfo));
                int i2 = i;
                i++;
                author.setRank(Integer.valueOf(i2));
                arrayList.add(author);
            }
        }
        return arrayList;
    }

    private List<StructuredProperty> preparePids(Node node, DataInfo dataInfo) {
        ArrayList arrayList = new ArrayList();
        for (Object obj : node.selectNodes("./*[local-name()='nameIdentifier']")) {
            String text = ((Node) obj).getText();
            String replace = ((Node) obj).valueOf("./@nameIdentifierScheme").trim().toUpperCase().replace(" ", "").replace("_", "");
            if (replace.toLowerCase().startsWith("orcid")) {
                arrayList.add(OafMapperUtils.structuredProperty(text.replace("http://orcid.org/", "").replace("https://orcid.org/", ""), ORCID_PID_TYPE, dataInfo));
            } else if (replace.startsWith("MAGID")) {
                arrayList.add(OafMapperUtils.structuredProperty(text, MAG_PID_TYPE, dataInfo));
            }
        }
        return arrayList;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Instance> prepareInstances(Document document, DataInfo dataInfo, KeyValue keyValue, KeyValue keyValue2) {
        Instance instance = new Instance();
        instance.setInstancetype(prepareQualifier(document, "//dr:CobjCategory", "dnet:publication_resource"));
        instance.setCollectedfrom(keyValue);
        instance.setHostedby(keyValue2);
        List<StructuredProperty> prepareResultPids = prepareResultPids(document, dataInfo);
        List pids = IdentifierFactory.getPids(prepareResultPids, keyValue);
        instance.setInstanceTypeMapping(prepareInstanceTypeMapping(document));
        HashSet hashSet = new HashSet(pids);
        instance.setAlternateIdentifier((List) prepareResultPids.stream().filter(structuredProperty -> {
            return !hashSet.contains(structuredProperty);
        }).collect(Collectors.toList()));
        instance.setPid(pids);
        instance.setDateofacceptance(OafMapperUtils.field(document.valueOf("//oaf:dateAccepted"), dataInfo));
        String valueOf = document.valueOf("//oaf:distributionlocation");
        instance.setDistributionlocation(StringUtils.isNotBlank(valueOf) ? valueOf : null);
        instance.setAccessright(prepareAccessRight(document, "//oaf:accessrights", "dnet:access_modes"));
        instance.setLicense(OafMapperUtils.field(document.valueOf("//oaf:license"), dataInfo));
        instance.setRefereed(prepareQualifier(document, "//oaf:refereed", "dnet:review_levels"));
        instance.setProcessingchargeamount(OafMapperUtils.field(document.valueOf("//oaf:processingchargeamount"), dataInfo));
        instance.setProcessingchargecurrency(OafMapperUtils.field(document.valueOf("//oaf:processingchargeamount/@currency"), dataInfo));
        Optional<U> map = prepareListURL(document, "//oaf:fulltext", dataInfo).stream().findFirst().map((v0) -> {
            return v0.getValue();
        });
        instance.getClass();
        map.ifPresent(instance::setFulltext);
        HashSet hashSet2 = new HashSet();
        Iterator it = document.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']").iterator();
        while (it.hasNext()) {
            hashSet2.add(trimAndDecodeUrl(((Node) it.next()).getText().trim()));
        }
        Iterator it2 = document.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']").iterator();
        while (it2.hasNext()) {
            hashSet2.add(trimAndDecodeUrl(((Node) it2.next()).getText().trim()));
        }
        Iterator it3 = document.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']").iterator();
        while (it3.hasNext()) {
            hashSet2.add(trimAndDecodeUrl(((Node) it3.next()).getText().trim()));
        }
        Iterator it4 = document.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']").iterator();
        while (it4.hasNext()) {
            hashSet2.add(trimAndDecodeUrl(((Node) it4.next()).getText().trim()));
        }
        Iterator it5 = document.selectNodes("//*[local-name()='identifier' and ./@identifierType='w3id']").iterator();
        while (it5.hasNext()) {
            hashSet2.add(trimAndDecodeUrl(((Node) it5.next()).getText().trim()));
        }
        Set<String> validateUrl = validateUrl(hashSet2);
        if (validateUrl.stream().noneMatch(str -> {
            return str.contains("doi.org");
        })) {
            Iterator it6 = document.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']").iterator();
            while (it6.hasNext()) {
                validateUrl.add(HTTP_DOI_PREIFX + ((Node) it6.next()).getText().trim());
            }
            Iterator it7 = document.selectNodes("//*[local-name()='identifier' and ./@identifierType='DOI']").iterator();
            while (it7.hasNext()) {
                validateUrl.add(HTTP_DOI_PREIFX + ((Node) it7.next()).getText().trim());
            }
        }
        if (validateUrl.stream().noneMatch(str2 -> {
            return str2.contains("hdl.handle.net");
        })) {
            Iterator it8 = document.selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='Handle']").iterator();
            while (it8.hasNext()) {
                validateUrl.add(HTTP_HANDLE_PREIFX + ((Node) it8.next()).getText().trim());
            }
            Iterator it9 = document.selectNodes("//*[local-name()='identifier' and ./@identifierType='Handle']").iterator();
            while (it9.hasNext()) {
                validateUrl.add(HTTP_HANDLE_PREIFX + ((Node) it9.next()).getText().trim());
            }
        }
        if (!validateUrl.isEmpty()) {
            instance.setUrl(new ArrayList());
            instance.getUrl().addAll(validateUrl);
        }
        return Arrays.asList(instance);
    }

    protected String trimAndDecodeUrl(String str) {
        try {
            return URLDecoder.decode(str.trim(), "UTF-8");
        } catch (Throwable th) {
            return str;
        }
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected String findOriginalType(Document document) {
        return (String) ObjectUtils.firstNonNull(new String[]{(String) Optional.ofNullable(document.selectSingleNode("//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']")).map(element -> {
            return (String) ObjectUtils.firstNonNull(new String[]{(String) Optional.ofNullable(element.attributeValue("uri")).filter((v0) -> {
                return StringUtils.isNotBlank(v0);
            }).orElse(null), (String) Optional.ofNullable(element.attributeValue("anyURI")).filter((v0) -> {
                return StringUtils.isNotBlank(v0);
            }).orElse(null), (String) Optional.ofNullable(element.getText()).filter((v0) -> {
                return StringUtils.isNotBlank(v0);
            }).orElse(null), (String) Optional.ofNullable(element.attributeValue("resourceTypeGeneral")).filter((v0) -> {
                return StringUtils.isNotBlank(v0);
            }).orElse(null)});
        }).orElse(null), document.valueOf("//dr:CobjCategory/text()")});
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareSources(Document document, DataInfo dataInfo) {
        return new ArrayList();
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<StructuredProperty> prepareRelevantDates(Document document, DataInfo dataInfo) {
        ArrayList arrayList = new ArrayList();
        for (Object obj : document.selectNodes("//*[local-name()='date']")) {
            String valueOf = ((Node) obj).valueOf("@dateType");
            if (StringUtils.isBlank(valueOf) || !(valueOf.equalsIgnoreCase("Accepted") || valueOf.equalsIgnoreCase("Issued") || valueOf.equalsIgnoreCase("Updated") || valueOf.equalsIgnoreCase("Available"))) {
                arrayList.add(OafMapperUtils.structuredProperty(((Node) obj).getText(), "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date", dataInfo));
            } else {
                arrayList.add(OafMapperUtils.structuredProperty(((Node) obj).getText(), valueOf, valueOf, "dnet:dataCite_date", "dnet:dataCite_date", dataInfo));
            }
        }
        return arrayList;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareCoverages(Document document, DataInfo dataInfo) {
        return new ArrayList();
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareContributors(Document document, DataInfo dataInfo) {
        return prepareListFields(document, "//*[local-name()='contributorName']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareFormats(Document document, DataInfo dataInfo) {
        return prepareListFields(document, "//*[local-name()='format']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> preparePublisher(Document document, DataInfo dataInfo) {
        return prepareField(document, "//*[local-name()='publisher']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareDescriptions(Document document, DataInfo dataInfo) {
        return prepareListFields(document, "//*[local-name()='description' and ./@descriptionType='Abstract']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Subject> prepareSubjects(Document document, DataInfo dataInfo) {
        return prepareSubjectList(document, "//*[local-name()='subject']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Qualifier prepareLanguages(Document document) {
        return prepareQualifier(document, "//*[local-name()='language']", "dnet:languages");
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareOtherResearchProductTools(Document document, DataInfo dataInfo) {
        return new ArrayList();
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareOtherResearchProductContactGroups(Document document, DataInfo dataInfo) {
        return prepareListFields(document, "//*[local-name()='contributor' and ./@contributorType='ContactGroup']/*[local-name()='contributorName']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareOtherResearchProductContactPersons(Document document, DataInfo dataInfo) {
        return prepareListFields(document, "//*[local-name()='contributor' and ./@contributorType='ContactPerson']/*[local-name()='contributorName']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Qualifier prepareSoftwareProgrammingLanguage(Document document, DataInfo dataInfo) {
        return prepareQualifier(document, "//*[local-name()='format']", "dnet:programming_languages");
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> prepareSoftwareCodeRepositoryUrl(Document document, DataInfo dataInfo) {
        return null;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<StructuredProperty> prepareSoftwareLicenses(Document document, DataInfo dataInfo) {
        return new ArrayList();
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Field<String>> prepareSoftwareDocumentationUrls(Document document, DataInfo dataInfo) {
        return prepareListFields(document, "//*[local-name()='relatedIdentifier' and ./@relatedIdentifierType='URL' and @relationType='IsDocumentedBy']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<GeoLocation> prepareDatasetGeoLocations(Document document, DataInfo dataInfo) {
        ArrayList arrayList = new ArrayList();
        for (Object obj : document.selectNodes("//*[local-name()='geoLocation']")) {
            GeoLocation geoLocation = new GeoLocation();
            geoLocation.setBox(((Node) obj).valueOf("./*[local-name()='geoLocationBox']"));
            geoLocation.setPlace(((Node) obj).valueOf("./*[local-name()='geoLocationPlace']"));
            geoLocation.setPoint(((Node) obj).valueOf("./*[local-name()='geoLocationPoint']"));
            arrayList.add(geoLocation);
        }
        return arrayList;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> prepareDatasetMetadataVersionNumber(Document document, DataInfo dataInfo) {
        return null;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> prepareDatasetLastMetadataUpdate(Document document, DataInfo dataInfo) {
        return prepareField(document, "//*[local-name()='date' and ./@dateType='Updated']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> prepareDatasetVersion(Document document, DataInfo dataInfo) {
        return prepareField(document, "//*[local-name()='version']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> prepareDatasetSize(Document document, DataInfo dataInfo) {
        return prepareField(document, "//*[local-name()='size']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> prepareDatasetDevice(Document document, DataInfo dataInfo) {
        return null;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Field<String> prepareDatasetStorageDate(Document document, DataInfo dataInfo) {
        return prepareField(document, "//*[local-name()='date' and ./@dateType='Issued']", dataInfo);
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<Oaf> addOtherResultRels(Document document, OafEntity oafEntity, DataInfo dataInfo) {
        String id = oafEntity.getId();
        ArrayList arrayList = new ArrayList();
        for (Object obj : document.selectNodes("//*[local-name()='relatedIdentifier']")) {
            String trim = ((Node) obj).getText().trim();
            if (StringUtils.isNotBlank(trim)) {
                String valueOf = ((Node) obj).valueOf("@relatedIdentifierType");
                String valueOf2 = ((Node) obj).valueOf("@relationType");
                String guessRelatedIdentifier = guessRelatedIdentifier(valueOf, trim);
                if (StringUtils.isNotBlank(guessRelatedIdentifier)) {
                    arrayList.addAll(getRelations(valueOf2, id, guessRelatedIdentifier, oafEntity, dataInfo));
                }
            }
        }
        return arrayList;
    }

    protected String guessRelatedIdentifier(String str, String str2) {
        if (StringUtils.isBlank(str) || StringUtils.isBlank(str2)) {
            return null;
        }
        if (str.equalsIgnoreCase("OPENAIRE")) {
            return OafMapperUtils.createOpenaireId(50, str2, false);
        }
        if (pidTypeWithAuthority.containsKey(str.toLowerCase())) {
            return IdentifierFactory.idFromPid("50", pidTypeWithAuthority.get(str.toLowerCase()), str2, true);
        }
        return null;
    }

    protected List<Oaf> getRelations(String str, String str2, String str3, OafEntity oafEntity, DataInfo dataInfo) {
        ArrayList arrayList = new ArrayList();
        RelationInverse findRelation = ModelSupport.findRelation(str);
        if (findRelation != null) {
            arrayList.add(OafMapperUtils.getRelation(str2, str3, findRelation.getRelType(), findRelation.getSubReltype(), findRelation.getRelClass(), oafEntity.getCollectedfrom(), dataInfo, oafEntity.getLastupdatetimestamp(), (String) null, (List) null));
            arrayList.add(OafMapperUtils.getRelation(str3, str2, findRelation.getRelType(), findRelation.getSubReltype(), findRelation.getInverseRelClass(), oafEntity.getCollectedfrom(), dataInfo, oafEntity.getLastupdatetimestamp(), (String) null, (List) null));
        }
        return arrayList;
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected Qualifier prepareResourceType(Document document, DataInfo dataInfo) {
        return prepareQualifier(document, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", "dnet:dataCite_resource");
    }

    @Override // eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper
    protected List<StructuredProperty> prepareResultPids(Document document, DataInfo dataInfo) {
        HashSet hashSet = new HashSet();
        hashSet.addAll(prepareListStructPropsWithValidQualifier(document, "//oaf:identifier", "@identifierType", "dnet:pid_types", dataInfo));
        hashSet.addAll(prepareListStructPropsWithValidQualifier(document, "//*[local-name()='identifier' and ./@identifierType != 'URL' and ./@identifierType != 'landingPage']", "@identifierType", "dnet:pid_types", dataInfo));
        hashSet.addAll(prepareListStructPropsWithValidQualifier(document, "//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType != 'URL' and ./@alternateIdentifierType != 'landingPage']", "@alternateIdentifierType", "dnet:pid_types", dataInfo));
        return (List) hashSet.stream().map(CleaningFunctions::normalizePidValue).collect(Collectors.toList());
    }
}
