package opennlp.tools.formats;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

/* loaded from: input_file:opennlp/tools/formats/LeipzigDoccatSampleStream.class */
public class LeipzigDoccatSampleStream extends FilterObjectStream<String, DocumentSample> {
    private final String language;
    private final int sentencesPerDocument;

    /* JADX INFO: Access modifiers changed from: package-private */
    public LeipzigDoccatSampleStream(String str, int i, InputStream inputStream) throws IOException {
        super(new PlainTextByLineStream(inputStream, mapLanguageToEncoding(str)));
        this.language = str;
        this.sentencesPerDocument = i;
    }

    private static String mapLanguageToEncoding(String str) throws IOException {
        if (str == null) {
            throw new NullPointerException("language parameter must not be null!");
        }
        HashMap hashMap = new HashMap();
        hashMap.put("cat", "ISO-8859-1");
        hashMap.put("de", "ISO-8859-1");
        hashMap.put("dk", "ISO-8859-1");
        hashMap.put("ee", "ISO-8859-4");
        hashMap.put("en", "ISO-8859-1");
        hashMap.put("fi", "ISO-8859-1");
        hashMap.put("fr", "ISO-8859-1");
        hashMap.put("it", "ISO-8859-1");
        hashMap.put("jp", "UTF-8");
        hashMap.put("kr", "UTF-8");
        hashMap.put("nl", "ISO-8859-1");
        hashMap.put("no", "ISO-8859-1");
        hashMap.put("se", "ISO-8859-1");
        hashMap.put("sorb", "ISO-8859-2");
        hashMap.put("tr", "ISO-8859-9");
        String str2 = (String) hashMap.get(str);
        if (str2 != null) {
            return str2;
        }
        throw new IOException("Encoding for language " + str + " is not specified!");
    }

    @Override // opennlp.tools.util.ObjectStream
    public DocumentSample read() throws IOException {
        String str;
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < this.sentencesPerDocument && (str = (String) this.samples.read()) != null; i++) {
            String[] strArr = SimpleTokenizer.INSTANCE.tokenize(str);
            if (strArr.length == 0) {
                throw new IOException("Empty lines are not allowed!");
            }
            for (int i2 = 1; i2 < strArr.length; i2++) {
                sb.append(strArr[i2]);
                sb.append(' ');
            }
        }
        if (sb.length() > 0) {
            return new DocumentSample(this.language, sb.toString());
        }
        return null;
    }
}
