/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.operator;

import com.rapidminer.gui.wizards.PreviewListener;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.SimpleSegmenterPreviewerCreator;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypePreview;
import com.rapidminer.parameter.ParameterTypeString;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class SimpleSegmenter
extends Operator {
    public static final String PARAMETER_OUTPUT = "output";
    public static final String PARAMETER_SPLIT_EXPRESSION = "split_expression";

    public SimpleSegmenter(OperatorDescription description) {
        super(description);
    }

    public IOObject[] apply() throws OperatorException {
        File outDir = this.getParameterAsFile(PARAMETER_OUTPUT, true);
        int count = 0;
        File inDir = this.getParameterAsFile("texts");
        File[] files = inDir.listFiles();
        int i = 0;
        while (i < files.length) {
            if (files[i].isFile()) {
                String[] segments;
                String suffix = null;
                int index = files[i].getName().lastIndexOf(46);
                suffix = index > -1 ? files[i].getName().substring(index + 1) : "txt";
                StringBuffer text = new StringBuffer();
                try {
                    BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(files[i]), this.getEncoding()));
                    String line = null;
                    while ((line = in.readLine()) != null) {
                        text.append(String.valueOf(line) + "\n");
                    }
                }
                catch (IOException e1) {
                    throw new UserError((Operator)this, 302, new Object[]{files[i], e1});
                }
                String[] stringArray = segments = text.toString().split(this.getParameterAsString(PARAMETER_SPLIT_EXPRESSION));
                int n = segments.length;
                int n2 = 0;
                while (n2 < n) {
                    String segment = stringArray[n2];
                    String outFileName = String.valueOf(outDir.getAbsolutePath()) + File.separator + "seg" + count + "." + suffix;
                    try {
                        BufferedWriter out = new BufferedWriter(new FileWriter(outFileName));
                        out.write(segment);
                        ((Writer)out).close();
                    }
                    catch (IOException e) {
                        throw new UserError((Operator)this, 303, new Object[]{outFileName, e});
                    }
                    ++count;
                    ++n2;
                }
            }
            ++i;
        }
        return new IOObject[0];
    }

    public Class<?>[] getInputClasses() {
        return new Class[0];
    }

    public Class<?>[] getOutputClasses() {
        return new Class[0];
    }

    public List<ParameterType> getParameterTypes() {
        List types = super.getParameterTypes();
        ParameterTypePreview previewType = new ParameterTypePreview(SimpleSegmenterPreviewerCreator.class, (PreviewListener)this);
        previewType.setExpert(false);
        types.add(previewType);
        types.add(new ParameterTypeDirectory("texts", "A directory containing the documents to be segmented", false));
        types.add(new ParameterTypeDirectory(PARAMETER_OUTPUT, "The directory to which to write the segments", false));
        types.add(new ParameterTypeString(PARAMETER_SPLIT_EXPRESSION, "Specifies a regular expression or XPath expression that matches against substrings of the content which should be treated as individual segments. The syntax is the same as for attribute extraction (see WVTool operator), but instead of extracting only the first match, all matches are extracted and written to individual files", false));
        return types;
    }
}

