package eu.dnetlib.dhp.collection.plugin.base;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

@Disabled
/* loaded from: input_file:eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.class */
public class BaseCollectorIteratorTest {
    @Test
    void testImportFile() throws Exception {
        long j = 0;
        BaseCollectorIterator baseCollectorIterator = new BaseCollectorIterator("base-sample.tar", new AggregatorReport());
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        HashSet hashSet = new HashSet();
        while (baseCollectorIterator.hasNext()) {
            Document parseText = DocumentHelper.parseText(baseCollectorIterator.next());
            j++;
            if (j % 1000 == 0) {
                System.out.println("# Read records: " + j);
            }
            for (Object obj : parseText.selectNodes("//*|//@*")) {
                String path = ((Node) obj).getPath();
                if (hashMap2.containsKey(path)) {
                    ((AtomicInteger) hashMap2.get(path)).incrementAndGet();
                } else {
                    hashMap2.put(path, new AtomicInteger(1));
                }
                if (obj instanceof Element) {
                    Element element = (Element) obj;
                    if ("collection".equals(element.getName())) {
                        String trim = element.getText().trim();
                        if (StringUtils.isNotBlank(trim) && !hashMap.containsKey(trim)) {
                            HashMap hashMap3 = new HashMap();
                            for (Object obj2 : element.attributes()) {
                                hashMap3.put(((Attribute) obj2).getName(), ((Attribute) obj2).getValue());
                            }
                            hashMap.put(trim, hashMap3);
                        }
                    } else if ("type".equals(element.getName())) {
                        hashSet.add(element.getText().trim());
                    }
                }
            }
        }
        ObjectMapper objectMapper = new ObjectMapper();
        for (Map.Entry entry : hashMap.entrySet()) {
            System.out.println(((String) entry.getKey()) + ": " + objectMapper.writeValueAsString(entry.getValue()));
        }
        for (Map.Entry entry2 : hashMap2.entrySet()) {
            System.out.println(((String) entry2.getKey()) + ": " + ((AtomicInteger) entry2.getValue()).get());
        }
        System.out.println("TYPES: ");
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            System.out.println((String) it.next());
        }
        Assertions.assertEquals(30000L, j);
    }

    @Test
    public void testParquet() throws Exception {
        String iOUtils = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
        SparkSession orCreate = SparkSession.builder().master("local[*]").getOrCreate();
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < 10; i++) {
            arrayList.add(extractInfo(iOUtils));
        }
        Dataset createDataset = orCreate.createDataset(JavaSparkContext.fromSparkContext(orCreate.sparkContext()).parallelize(arrayList).rdd(), Encoders.bean(BaseRecordInfo.class));
        createDataset.printSchema();
        createDataset.show(false);
    }

    private BaseRecordInfo extractInfo(String str) {
        try {
            Document parseText = DocumentHelper.parseText(str);
            BaseRecordInfo baseRecordInfo = new BaseRecordInfo();
            LinkedHashSet linkedHashSet = new LinkedHashSet();
            LinkedHashSet linkedHashSet2 = new LinkedHashSet();
            ArrayList arrayList = new ArrayList();
            for (Object obj : parseText.selectNodes("//*|//@*")) {
                linkedHashSet.add(((Node) obj).getPath());
                if (obj instanceof Element) {
                    Element element = (Element) obj;
                    String name = element.getName();
                    if ("collection".equals(name)) {
                        String trim = element.getText().trim();
                        if (StringUtils.isNotBlank(trim)) {
                            BaseCollectionInfo baseCollectionInfo = new BaseCollectionInfo();
                            baseCollectionInfo.setId(trim);
                            baseCollectionInfo.setOpendoarId(element.valueOf("@opendoar_id").trim());
                            baseCollectionInfo.setRorId(element.valueOf("@ror_id").trim());
                            arrayList.add(baseCollectionInfo);
                        }
                    } else if ("type".equals(name)) {
                        linkedHashSet2.add("TYPE: " + element.getText().trim());
                    } else if ("typenorm".equals(name)) {
                        linkedHashSet2.add("TYPE_NORM: " + element.getText().trim());
                    }
                }
            }
            baseRecordInfo.setId(parseText.valueOf("//*[local-name() = 'header']/*[local-name() = 'identifier']").trim());
            baseRecordInfo.getTypes().addAll(linkedHashSet2);
            baseRecordInfo.getPaths().addAll(linkedHashSet);
            baseRecordInfo.setCollections(arrayList);
            return baseRecordInfo;
        } catch (DocumentException e) {
            throw new RuntimeException((Throwable) e);
        }
    }
}
