package org.gcube.elasticsearch.helpers;

import com.google.common.base.Joiner;
import com.google.gson.Gson;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.elasticsearch.common.netty.handler.codec.http.multipart.HttpPostBodyUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/* loaded from: input_file:WEB-INF/lib/elasticsearch-gcube-2.3.0-4.0.0-126569.jar:org/gcube/elasticsearch/helpers/FullTextIndexDocument.class */
public class FullTextIndexDocument implements Serializable {
    private static final long serialVersionUID = 1;
    private static final Logger logger = LoggerFactory.getLogger(FullTextIndexDocument.class);
    private static Gson gson = new Gson();
    private static final String ELEMENT_KEY = "FIELD";
    private static final String ROWSET_KEY = "ROWSET";
    private static final String ATTRIBUTE_FIELDNAME = "name";
    private static final String ID_KEY_FIELD = "ObjectID";
    private static final String COLLECTION_FIELD = "gDocCollectionID";
    private static final String COLLECTION_ATTR_NAME = "colID";
    private static final String LANG_ATTR_NAME = "lang";
    public static final String LANGUAGE_FIELD = "gDocCollectionLang";
    public static final String LANG_UNKNOWN = "unknown";
    public static final String DOCID_FIELD = "ObjectID";
    public static final int MAX_FIELD_LENGTH = 32000;
    private Map<String, List<String>> fields;

    public FullTextIndexDocument() {
        this.fields = null;
        this.fields = new HashMap();
    }

    public FullTextIndexDocument(String str) throws Exception {
        this();
        parseXML(str);
    }

    public void parseXML(String str) throws Exception {
        parseXML(getFields(), str);
    }

    public static void parseXML(Map<String, List<String>> map, String str) throws Exception {
        Document loadXMLFromString = loadXMLFromString(str);
        loadXMLFromString.getDocumentElement().normalize();
        long currentTimeMillis = System.currentTimeMillis();
        Element element = (Element) loadXMLFromString.getElementsByTagName(ROWSET_KEY).item(0);
        String trim = element.getAttribute(COLLECTION_ATTR_NAME).trim();
        String trim2 = element.getAttribute(LANG_ATTR_NAME).trim();
        map.put("gDocCollectionID", Arrays.asList(trim));
        if (trim2 == null || trim2.length() == 0) {
            trim2 = "unknown";
        }
        map.put("gDocCollectionLang", Arrays.asList(trim2));
        NodeList elementsByTagName = loadXMLFromString.getElementsByTagName(ELEMENT_KEY);
        logger.info("xml field nodelist length : " + elementsByTagName.getLength());
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Element element2 = (Element) elementsByTagName.item(i);
            String trim3 = element2.getAttribute("name").trim();
            String trim4 = element2.getTextContent().trim();
            if (trim3.equalsIgnoreCase("gDocCollectionLang") && (trim4 == null || trim4.trim().length() == 0)) {
                trim4 = "unknown";
            }
            if (trim4.length() != 0) {
                if (trim4.length() > 32000 && !trim3.equalsIgnoreCase(HttpPostBodyUtil.FILE)) {
                    logger.trace("encountered field : " + trim3 + "  with length : " + trim4.length() + " will trim it to : " + MAX_FIELD_LENGTH);
                    trim4 = trim4.substring(0, MAX_FIELD_LENGTH);
                }
                if (map.containsKey(trim3)) {
                    map.get(trim3).add(trim4);
                } else {
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(trim4);
                    map.put(trim3, arrayList);
                }
            }
        }
        logger.info("parse xml after : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
    }

    public String getID() {
        return getDocId();
    }

    public String getDocLang() {
        List<String> list = getFields().get("gDocCollectionLang");
        if (list == null) {
            logger.info("Lang is null for doc with id : " + getID());
            return "unknown";
        }
        if (list.size() == 0) {
            logger.info("No languages found for doc with id : " + getID());
            return "unknown";
        }
        if (list.size() > 1) {
            logger.info("Multiple languages found for doc with id : " + getID());
            logger.info("languages are : " + list + " picking first...");
            return list.get(0);
        }
        logger.info("language found for doc with id : " + getID());
        logger.info("language is : " + list + " picking first...");
        return list.get(0);
    }

    public String getDocId() {
        List<String> list = getFields().get("ObjectID");
        if (list == null) {
            logger.info("DocID is null for doc");
            return null;
        }
        if (list.size() != 1) {
            logger.info("0 or more than 1 docIDs found for doc");
            logger.info("docIDs are : " + list);
            return null;
        }
        logger.info("DocID found for doc");
        logger.info("docID is : " + list + " picking first...");
        return list.get(0);
    }

    public String getColId() {
        List<String> list = getFields().get("gDocCollectionID");
        if (list == null) {
            logger.info("ColId is null for doc with id : " + getID());
            return null;
        }
        if (list.size() != 1) {
            logger.info("0 or more than 1 colIDs found for doc with id : " + getID());
            logger.info("colIDs are : " + list);
            return null;
        }
        logger.info("ColId found for doc with id : " + getID());
        logger.info("colIDs is : " + list + " picking first...");
        return list.get(0);
    }

    private static String calculateID(Map<String, ArrayList<String>> map) {
        return Joiner.on(ShingleFilter.DEFAULT_FILLER_TOKEN).join(map.get("ObjectID"));
    }

    public String toString() {
        return "ID : " + getID() + ", values : " + toJSON();
    }

    public String toJSON() {
        return gson.toJson(this);
    }

    public static Document loadXMLFromString(String str) throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(new StringReader(str)));
        logger.info("load xml after : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
        return parse;
    }

    public Map<String, List<String>> getFields() {
        return this.fields;
    }
}
