/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.iis.utils.contents.arxiv;

import eu.dnetlib.iis.core.java.io.DataStore;
import eu.dnetlib.iis.core.java.io.FileSystemPath;
import eu.dnetlib.iis.utils.contents.Utils;
import eu.dnetlib.iis.utils.contents.arxiv.ArXivOutput;
import eu.dnetlib.iis.utils.contents.arxiv.PDFContentOutput;
import eu.dnetlib.iis.utils.contents.arxiv.PlaintextOutput;
import eu.dnetlib.iis.utils.contents.schemas.arxiv.ArXiv2OpenAIRE;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

public class ArXivConverter {
    private final FileSystemPath inputDir;
    private final FileSystemPath outputDir;
    private final FileSystemPath outputMappingDir;
    private final ArXivOutput output;

    public static void main(String[] args) throws IOException {
        ArXivConverter converter = ArXivConverter.parse(args);
        converter.run();
    }

    private static ArXivConverter parse(String[] args) throws IOException {
        if (args.length != 4) {
            ArXivConverter.parseError("This program converts plaintext documents corresponding to PDF versions of documents coming from arXiv repository. The following arguments should be given: 1) \"true\" if we should extract plaintext, \"false\" if we should extract PDF files; 2) \"input dir\" which corresponds do directory structure with {arXiv plaintext contained in separate UTF-8-encoded text files} or {PDFcontents contained in separate PDF files} where arXiv ID is the name of the file after removing its extension; 3) output dir for data store; 4) output dir for data store with arXiv and OpenAIRE IDs mapping");
        }
        boolean extractPlaintext = Boolean.parseBoolean(args[0]);
        String inputDir = args[1];
        String outputDir = args[2];
        String outputMappingDir = args[3];
        ArXivOutput output = new PlaintextOutput();
        if (!extractPlaintext) {
            output = new PDFContentOutput();
        }
        return new ArXivConverter(new FileSystemPath(new File(inputDir)), new FileSystemPath(new File(outputDir)), new FileSystemPath(new File(outputMappingDir)), output);
    }

    private static void parseError(String error) {
        System.err.println("ERROR while parsing command line: " + error);
        System.exit(1);
    }

    public ArXivConverter(FileSystemPath input, FileSystemPath outputDir, FileSystemPath outputMappingDir, ArXivOutput output) {
        this.inputDir = input;
        this.outputDir = outputDir;
        this.outputMappingDir = outputMappingDir;
        this.output = output;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void run() throws IOException {
        this.output.open(this.outputDir);
        DataFileWriter mappingWriter = DataStore.create((FileSystemPath)this.outputMappingDir, (Schema)ArXiv2OpenAIRE.SCHEMA$);
        try {
            RemoteIterator fileIterator = this.inputDir.getFileSystem().listFiles(this.inputDir.getPath(), true);
            ArXiv2OpenAIRE map = new ArXiv2OpenAIRE();
            while (fileIterator != null && fileIterator.hasNext()) {
                LocatedFileStatus fileStatus = (LocatedFileStatus)fileIterator.next();
                ArXivConverter.assertValidFile(fileStatus);
                Path filePath = fileStatus.getPath();
                String fileName = filePath.getName();
                String arXivId = ArXivConverter.getNameWithoutExtension(fileName);
                String openAIREId = ArXivConverter.convertToOpenAIREId(arXivId);
                map.setArXivId(arXivId);
                map.setOpenAIREId(openAIREId);
                mappingWriter.append((Object)map);
                FSDataInputStream in = this.inputDir.getFileSystem().open(filePath);
                this.output.append(in, openAIREId);
                in.close();
            }
        }
        finally {
            if (this.output != null) {
                this.output.close();
            }
            if (mappingWriter != null) {
                mappingWriter.close();
            }
        }
    }

    private static ArrayList<String> getPathElements(Path path) {
        ArrayList<String> elems = new ArrayList<String>();
        for (Path currPath = path; currPath != null; currPath = currPath.getParent()) {
            String name = currPath.getName();
            elems.add(name);
        }
        return elems;
    }

    private static void assertValidFile(LocatedFileStatus fileStatus) {
        if (!fileStatus.isFile()) {
            throw new RuntimeException(fileStatus.getPath() + " is not a file");
        }
    }

    private static String getNameWithoutExtension(String fileName) {
        int i = fileName.lastIndexOf(46);
        if (i == -1) {
            throw new RuntimeException("Dot not found in name of file \"" + fileName + "\"");
        }
        return fileName.substring(0, i);
    }

    private static String convertToOpenAIREId(String arXivId) {
        return Utils.convertToOpenAIREId("od________18::", "oai:arXiv.org:", arXivId);
    }
}

