package eu.dnetlib.dhp.swh;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.swh.models.LastVisitData;
import eu.dnetlib.dhp.swh.utils.SWHConnection;
import eu.dnetlib.dhp.swh.utils.SWHConstants;
import eu.dnetlib.dhp.swh.utils.SWHUtils;
import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.IOException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/dhp/swh/ArchiveRepositoryURLs.class */
public class ArchiveRepositoryURLs {
    private static final Logger log = LoggerFactory.getLogger(ArchiveRepositoryURLs.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    private static SWHConnection swhConnection = null;

    public static void main(String[] strArr) throws IOException, ParseException {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(CollectLastVisitRepositoryData.class.getResourceAsStream("/eu/dnetlib/dhp/swh/input_archive_repository_urls.json")));
        argumentApplicationParser.parseArgument(strArr);
        String str = argumentApplicationParser.get("namenode");
        log.info("hdfsURI: {}", str);
        String str2 = argumentApplicationParser.get("lastVisitsPath");
        log.info("inputPath: {}", str2);
        String str3 = argumentApplicationParser.get("archiveRequestsPath");
        log.info("outputPath: {}", str3);
        Integer valueOf = Integer.valueOf(Integer.parseInt(argumentApplicationParser.get("archiveThresholdInDays")));
        log.info("archiveThresholdInDays: {}", valueOf);
        String str4 = argumentApplicationParser.get("apiAccessToken");
        log.info("apiAccessToken: {}", str4);
        swhConnection = new SWHConnection(SWHUtils.getClientParams(argumentApplicationParser), str4);
        archive(FileSystem.get(DHPUtils.getHadoopConfiguration(str)), str2, str3, valueOf);
    }

    private static void archive(FileSystem fileSystem, String str, String str2, Integer num) throws IOException {
        SequenceFile.Reader sequenceFileReader = SWHUtils.getSequenceFileReader(fileSystem, str);
        SequenceFile.Writer sequenceFileWriter = SWHUtils.getSequenceFileWriter(fileSystem, str2);
        Text text = new Text();
        Text text2 = new Text();
        while (sequenceFileReader.next(text, text2)) {
            try {
                String handleRecord = handleRecord(text.toString(), text2.toString(), num);
                if (handleRecord != null) {
                    SWHUtils.appendToSequenceFile(sequenceFileWriter, text.toString(), handleRecord);
                }
            } catch (java.text.ParseException e) {
                log.error("Could not handle record with repo Url: {}", text.toString());
                throw new RuntimeException(e);
            }
        }
        sequenceFileWriter.close();
        sequenceFileReader.close();
    }

    public static String handleRecord(String str, String str2, Integer num) throws IOException, java.text.ParseException {
        String str3;
        String cleanDate;
        log.info("{ Key: {}, Value: {} }", str, str2);
        LastVisitData lastVisitData = (LastVisitData) OBJECT_MAPPER.readValue(str2, LastVisitData.class);
        if (lastVisitData.getStatus() != null && lastVisitData.getStatus().equals(SWHConstants.VISIT_STATUS_NOT_FOUND)) {
            log.info("Avoid request -- previous archive request returned NOT_FOUND");
            return null;
        }
        if (lastVisitData.getSnapshot() != null && (cleanDate = GraphCleaningFunctions.cleanDate(lastVisitData.getDate())) != null) {
            long convert = TimeUnit.DAYS.convert(Math.abs(new Date().getTime() - new SimpleDateFormat("yyyy-MM-dd").parse(cleanDate).getTime()), TimeUnit.MILLISECONDS);
            log.info("Date diff from now (in days): {}", Long.valueOf(convert));
            if (num.intValue() >= convert) {
                log.info("Avoid request -- no older than {} days", num);
                return null;
            }
        }
        log.info("Perform archive request for: {}", str);
        URL url = new URL(String.format(SWHConstants.SWH_ARCHIVE_URL, (String) Optional.ofNullable(lastVisitData.getType()).orElse(SWHConstants.DEFAULT_VISIT_TYPE), str.trim()));
        log.info("Sending archive request: {}", url);
        try {
            str3 = swhConnection.call(url.toString());
        } catch (CollectorException e) {
            log.error("Error in request: {}", url);
            str3 = "{}";
        }
        return str3;
    }
}
