package marytts.modules;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.exceptions.MaryConfigurationException;
import marytts.fst.FSTLookup;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.modules.phonemiser.TrainedLTS;
import marytts.server.MaryProperties;
import marytts.util.MaryRuntimeUtils;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import net.sf.saxon.style.StandardNames;
import org.sdmxsource.sdmx.ediparser.constants.EDI_CONSTANTS;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.NodeIterator;

/* loaded from: input_file:WEB-INF/lib/marytts-d4science-5.0.0.jar:marytts/modules/JPhonemiser.class */
public class JPhonemiser extends InternalModule {
    protected Map<String, List<String>> userdict;
    protected FSTLookup lexicon;
    protected TrainedLTS lts;
    protected AllophoneSet allophoneSet;

    public JPhonemiser(String str) throws IOException, MaryConfigurationException {
        this("JPhonemiser", MaryDataType.PARTSOFSPEECH, MaryDataType.PHONEMES, str + "allophoneset", str + "userdict", str + "lexicon", str + "lettertosound");
    }

    public JPhonemiser(String str, MaryDataType maryDataType, MaryDataType maryDataType2, String str2, String str3, String str4, String str5) throws IOException, MaryConfigurationException {
        super(str, maryDataType, maryDataType2, MaryRuntimeUtils.needAllophoneSet(str2).getLocale());
        this.allophoneSet = MaryRuntimeUtils.needAllophoneSet(str2);
        String filename = MaryProperties.getFilename(str3);
        if (filename != null) {
            if (new File(filename).exists()) {
                this.userdict = readLexicon(filename);
            } else {
                this.logger.info("User dictionary '" + filename + "' for locale '" + getLocale() + "' does not exist. Ignoring.");
            }
        }
        this.lexicon = new FSTLookup(MaryProperties.needStream(str4), str4);
        this.lts = new TrainedLTS(this.allophoneSet, MaryProperties.needStream(str5));
    }

    @Override // marytts.modules.InternalModule, marytts.modules.MaryModule
    public MaryData process(MaryData maryData) throws Exception {
        Document document = maryData.getDocument();
        NodeIterator createNodeIterator = MaryDomUtils.createNodeIterator(document, document, "t");
        while (true) {
            Element element = (Element) createNodeIterator.nextNode();
            if (element == null) {
                MaryData maryData2 = new MaryData(outputType(), maryData.getLocale());
                maryData2.setDocument(document);
                return maryData2;
            }
            if (!element.hasAttribute(MaryXML.PHONE) || element.getAttribute(MaryXML.PHONE).contains("*")) {
                String attribute = element.hasAttribute("sounds_like") ? element.getAttribute("sounds_like") : MaryDomUtils.tokenText(element);
                String str = null;
                if (element.hasAttribute("pos")) {
                    str = element.getAttribute("pos");
                }
                if (attribute != null && !attribute.equals("")) {
                    StringBuilder sb = new StringBuilder();
                    String str2 = null;
                    StringTokenizer stringTokenizer = new StringTokenizer(attribute, " -");
                    while (stringTokenizer.hasMoreTokens()) {
                        String nextToken = stringTokenizer.nextToken();
                        StringBuilder sb2 = new StringBuilder();
                        String phonemise = phonemise(nextToken, str, sb2);
                        if (sb.length() == 0) {
                            str2 = sb2.toString();
                            sb.append(phonemise);
                        } else {
                            sb.append(" - ");
                            sb.append(phonemise.replace('\'', ','));
                        }
                    }
                    if (sb != null && sb.length() > 0) {
                        setPh(element, sb.toString());
                        element.setAttribute("g2p_method", str2);
                    }
                }
            }
        }
    }

    public String phonemise(String str, String str2, StringBuilder sb) {
        String userdictLookup = userdictLookup(str, str2);
        if (userdictLookup != null) {
            sb.append("userdict");
            return userdictLookup;
        }
        String lexiconLookup = lexiconLookup(str, str2);
        if (lexiconLookup != null) {
            sb.append("lexicon");
            return lexiconLookup;
        }
        String normaliseUnicodeLetters = MaryUtils.normaliseUnicodeLetters(str, getLocale());
        if (!normaliseUnicodeLetters.equals(str)) {
            String userdictLookup2 = userdictLookup(normaliseUnicodeLetters, str2);
            if (userdictLookup2 != null) {
                sb.append("userdict");
                return userdictLookup2;
            }
            String lexiconLookup2 = lexiconLookup(normaliseUnicodeLetters, str2);
            if (lexiconLookup2 != null) {
                sb.append("lexicon");
                return lexiconLookup2;
            }
        }
        String syllabify = this.lts.syllabify(this.lts.predictPronunciation(str));
        if (syllabify == null) {
            return null;
        }
        sb.append(StandardNames.RULES);
        return syllabify;
    }

    public String lexiconLookup(String str, String str2) {
        if (str == null || str.length() == 0) {
            return null;
        }
        String[] lexiconLookupPrimitive = lexiconLookupPrimitive(str, str2);
        if (lexiconLookupPrimitive.length == 0) {
            str = str.toLowerCase(getLocale());
            lexiconLookupPrimitive = lexiconLookupPrimitive(str, str2);
        }
        if (lexiconLookupPrimitive.length == 0) {
            lexiconLookupPrimitive = lexiconLookupPrimitive(str.substring(0, 1).toUpperCase(getLocale()) + str.substring(1), str2);
        }
        if (lexiconLookupPrimitive.length == 0) {
            return null;
        }
        return lexiconLookupPrimitive[0];
    }

    private String[] lexiconLookupPrimitive(String str, String str2) {
        String[] lookup;
        if (str2 != null) {
            lookup = this.lexicon.lookup(str + str2);
            if (lookup.length == 0) {
                lookup = this.lexicon.lookup(str);
            }
        } else {
            lookup = this.lexicon.lookup(str);
        }
        return lookup;
    }

    public String userdictLookup(String str, String str2) {
        if (this.userdict == null || str == null || str.length() == 0) {
            return null;
        }
        List<String> list = this.userdict.get(str);
        if (list == null) {
            str = str.toLowerCase(getLocale());
            list = this.userdict.get(str);
        }
        if (list == null) {
            list = this.userdict.get(str.substring(0, 1).toUpperCase(getLocale()) + str.substring(1));
        }
        if (list == null) {
            return null;
        }
        String str3 = null;
        for (String str4 : list) {
            String[] split = str4.split("\\|");
            str3 = split[0];
            if (split.length > 1 && str2 != null) {
                StringTokenizer stringTokenizer = new StringTokenizer(str4);
                while (stringTokenizer.hasMoreTokens()) {
                    if (str2.equals(stringTokenizer.nextToken())) {
                        return str3;
                    }
                }
            }
        }
        return str3;
    }

    public AllophoneSet getAllophoneSet() {
        return this.allophoneSet;
    }

    protected Map<String, List<String>> readLexicon(String str) throws IOException {
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashMap;
            }
            if (!readLine.trim().equals("") && !readLine.startsWith("#")) {
                String[] split = readLine.split("\\s*\\|\\s*");
                String str2 = split[0];
                String str3 = split[1];
                try {
                    this.allophoneSet.splitIntoAllophones(str3);
                } catch (RuntimeException e) {
                    this.logger.warn("Lexicon '" + str + "': invalid entry for '" + str2 + EDI_CONSTANTS.END_TAG, e);
                }
                String str4 = str3;
                if (split.length > 2) {
                    String str5 = split[2];
                    if (!str5.trim().equals("")) {
                        str4 = str4 + "|" + str5;
                    }
                }
                List list = (List) hashMap.get(str2);
                if (null == list) {
                    list = new ArrayList();
                    hashMap.put(str2, list);
                }
                list.add(str4);
            }
        }
    }

    protected void setPh(Element element, String str) {
        if (!element.getTagName().equals("t")) {
            throw new DOMException((short) 15, "Only t elements allowed, received " + element.getTagName() + ".");
        }
        if (element.hasAttribute(MaryXML.PHONE)) {
            element.setAttribute(MaryXML.PHONE, element.getAttribute(MaryXML.PHONE).replaceFirst("\\*", str));
        } else {
            element.setAttribute(MaryXML.PHONE, str);
        }
    }
}
