package marytts.modules;

import com.rapidminer.example.Example;
import de.dfki.lt.tools.tokenizer.JTok;
import de.dfki.lt.tools.tokenizer.annotate.AnnotatedString;
import de.dfki.lt.tools.tokenizer.annotate.FastAnnotatedString;
import java.util.Locale;
import java.util.Properties;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.util.dom.DomUtils;
import marytts.util.dom.MaryDomUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.NodeIterator;

/* JADX WARN: Classes with same name are omitted:
  input_file:builds/deps.jar:marytts-server-5.0-jar-with-dependencies.jar:marytts/modules/JTokeniser.class
  input_file:builds/deps.jar:marytts/modules/JTokeniser.class
  input_file:builds/deps.jar:tmp-src.zip:marytts-server-5.0-jar-with-dependencies.jar:marytts/modules/JTokeniser.class
  input_file:marytts-server-5.0-jar-with-dependencies.jar:marytts/modules/JTokeniser.class
  input_file:marytts-server-5.0-jar-with-dependencies.jar:marytts/modules/JTokeniser.class
  input_file:marytts/modules/JTokeniser.class
 */
/* loaded from: input_file:tmp-src.zip:marytts-server-5.0-jar-with-dependencies.jar:marytts/modules/JTokeniser.class */
public class JTokeniser extends InternalModule {
    public static final int TOKEN_MAXLENGTH = 100;
    private JTok jtok;
    private String jtokLocale;
    static final /* synthetic */ boolean $assertionsDisabled;

    public JTokeniser() {
        this((Locale) null);
    }

    public JTokeniser(String str) {
        super("JTokeniser", MaryDataType.RAWMARYXML, MaryDataType.TOKENS, new Locale(str));
    }

    public JTokeniser(Locale locale) {
        this(MaryDataType.RAWMARYXML, MaryDataType.TOKENS, locale);
    }

    public JTokeniser(MaryDataType maryDataType, MaryDataType maryDataType2, Locale locale) {
        super("JTokeniser", maryDataType, maryDataType2, locale);
        if (locale == null) {
            this.jtokLocale = "en";
        } else {
            this.jtokLocale = locale.getLanguage();
        }
    }

    protected void setTokenizerLanguage(String str) {
        this.jtokLocale = str;
    }

    @Override // marytts.modules.InternalModule, marytts.modules.MaryModule
    public void startup() throws Exception {
        super.startup();
        Properties properties = new Properties();
        properties.setProperty("languages", this.jtokLocale);
        properties.setProperty(this.jtokLocale, "jtok/" + this.jtokLocale);
        this.jtok = new JTok(properties);
    }

    @Override // marytts.modules.InternalModule, marytts.modules.MaryModule
    public MaryData process(MaryData maryData) throws Exception {
        Element createElement;
        Document document = maryData.getDocument();
        NodeIterator createNodeIterator = ((DocumentTraversal) document).createNodeIterator(document, 4, (NodeFilter) null, false);
        StringBuilder sb = new StringBuilder();
        while (true) {
            Text text = (Text) createNodeIterator.nextNode();
            if (text == null) {
                break;
            }
            String trim = text.getData().trim();
            if (trim.length() != 0) {
                if (sb.length() > 0 && !Character.isWhitespace(sb.charAt(sb.length() - 1)) && Character.isLetterOrDigit(trim.charAt(0))) {
                    sb.append(Example.SEPARATOR);
                }
                sb.append(trim);
            }
        }
        FastAnnotatedString fastAnnotatedString = new FastAnnotatedString(sb.toString());
        NodeIterator createNodeIterator2 = ((DocumentTraversal) document).createNodeIterator(document, 4, (NodeFilter) null, false);
        int i = 0;
        while (true) {
            Text text2 = (Text) createNodeIterator2.nextNode();
            if (text2 == null) {
                break;
            }
            String trim2 = text2.getData().trim();
            int length = trim2.length();
            if (length != 0) {
                if (i > 0 && !Character.isWhitespace(sb.charAt(i - 1)) && Character.isLetterOrDigit(trim2.charAt(0))) {
                    i++;
                }
                fastAnnotatedString.annotate("MARYXML", text2, i, i + length);
                i += length;
            }
        }
        AnnotatedString annotatedString = this.jtok.tokenize(sb.toString(), this.jtokLocale);
        Element element = null;
        Element element2 = null;
        Element element3 = null;
        Text text3 = null;
        char index = annotatedString.setIndex(0);
        fastAnnotatedString.setIndex(0);
        while (index != 65535) {
            int runStart = annotatedString.getRunStart(JTok.CLASS_ANNO);
            int runLimit = annotatedString.getRunLimit(JTok.CLASS_ANNO);
            if (null != annotatedString.getAnnotation(JTok.CLASS_ANNO)) {
                fastAnnotatedString.setIndex(runStart);
                Text text4 = (Text) fastAnnotatedString.getAnnotation("MARYXML");
                if (!$assertionsDisabled && text4 == null) {
                    throw new AssertionError();
                }
                if (MaryDomUtils.hasAncestor(text4, MaryXML.TOKEN)) {
                    createElement = (Element) MaryDomUtils.getAncestor(text4, MaryXML.TOKEN);
                } else {
                    String substring = annotatedString.substring(runStart, runLimit);
                    createElement = MaryXML.createElement(document, MaryXML.TOKEN);
                    MaryDomUtils.setTokenText(createElement, substring);
                    text4.getParentNode().insertBefore(createElement, text4);
                }
                if (text3 != null && text3 != text4 && !MaryDomUtils.hasAncestor(text3, MaryXML.TOKEN)) {
                    text3.getParentNode().removeChild(text3);
                }
                text3 = text4;
                if (null != annotatedString.getAnnotation(JTok.BORDER_ANNO)) {
                    if (element != null) {
                        if (!$assertionsDisabled && element3 == null) {
                            throw new AssertionError();
                        }
                        if (!MaryDomUtils.hasAncestor(element, "s") && !MaryDomUtils.hasAncestor(element3, "s")) {
                            Element element4 = (Element) MaryDomUtils.getAncestor(element, "p");
                            Element element5 = (Element) MaryDomUtils.getAncestor(element3, "p");
                            if ((element4 == null && element5 == null) || element4.equals(element5)) {
                                encloseWithSentence(element, element3);
                            }
                        }
                    }
                    element = null;
                    if (annotatedString.getAnnotation(JTok.BORDER_ANNO) == "p") {
                        if (element2 != null) {
                            if (!$assertionsDisabled && element3 == null) {
                                throw new AssertionError();
                            }
                            if (!MaryDomUtils.hasAncestor(element2, "p") && !MaryDomUtils.hasAncestor(element3, "p")) {
                                DomUtils.encloseNodesWithNewElement(DomUtils.getAncestor(element2, "s"), DomUtils.getAncestor(element3, "s"), "p");
                            }
                        }
                        element2 = null;
                    }
                }
                element3 = createElement;
                if (element == null) {
                    element = createElement;
                }
                if (element2 == null) {
                    element2 = createElement;
                }
            }
            index = annotatedString.setIndex(runLimit);
            fastAnnotatedString.setIndex(runLimit);
        }
        if (text3 != null && !MaryDomUtils.hasAncestor(text3, MaryXML.TOKEN)) {
            text3.getParentNode().removeChild(text3);
        }
        if (element != null) {
            if (!$assertionsDisabled && element3 == null) {
                throw new AssertionError();
            }
            if (!MaryDomUtils.hasAncestor(element, "s") && !MaryDomUtils.hasAncestor(element3, "s")) {
                Element element6 = (Element) MaryDomUtils.getAncestor(element, "p");
                Element element7 = (Element) MaryDomUtils.getAncestor(element3, "p");
                if ((element6 == null && element7 == null) || element6.equals(element7)) {
                    encloseWithSentence(element, element3);
                }
            }
        }
        if (element2 != null) {
            if (!$assertionsDisabled && element3 == null) {
                throw new AssertionError();
            }
            if (!MaryDomUtils.hasAncestor(element2, "p") && !MaryDomUtils.hasAncestor(element3, "p")) {
                DomUtils.encloseNodesWithNewElement(DomUtils.getAncestor(element2, "s"), DomUtils.getAncestor(element3, "s"), "p");
            }
        }
        NodeIterator createNodeIterator3 = MaryDomUtils.createNodeIterator(document.getDocumentElement(), MaryXML.TOKEN);
        while (true) {
            Element element8 = (Element) createNodeIterator3.nextNode();
            if (element8 == null) {
                MaryData maryData2 = new MaryData(outputType(), maryData.getLocale());
                maryData2.setDocument(document);
                return maryData2;
            }
            String str = MaryDomUtils.tokenText(element8);
            if (str.length() > 100) {
                String substring2 = str.substring(0, 100);
                this.logger.info("Cutting exceedingly long input token (length " + str.length() + " ) to length 100:\nbefore: " + str + "\nafter: " + substring2);
                MaryDomUtils.setTokenText(element8, substring2);
            }
        }
    }

    private void encloseWithSentence(Element element, Element element2) {
        Element element3 = element;
        Element previousSiblingElement = DomUtils.getPreviousSiblingElement(element);
        if (previousSiblingElement != null && previousSiblingElement.getTagName().equals(MaryXML.BOUNDARY)) {
            element3 = previousSiblingElement;
        }
        Element element4 = element2;
        Element nextSiblingElement = DomUtils.getNextSiblingElement(element2);
        if (nextSiblingElement != null && nextSiblingElement.getTagName().equals(MaryXML.BOUNDARY)) {
            element4 = nextSiblingElement;
        }
        DomUtils.encloseNodesWithNewElement(element3, element4, "s");
    }

    static {
        $assertionsDisabled = !JTokeniser.class.desiredAssertionStatus();
    }
}
