package net.matuschek.http;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Vector;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import net.matuschek.util.MD5;
import org.apache.log4j.Category;
import org.archive.io.warc.WARCConstants;
import org.eclipse.jdt.internal.compiler.util.SuffixConstants;

/* loaded from: input_file:WEB-INF/lib/jobo-1.4.0.jar:net/matuschek/http/HttpDocCache.class */
public class HttpDocCache implements HttpDocManager {
    protected static final String CONTENT_DUPLICATE = "Content-Duplicate";
    protected String storagedir;
    static final String QUOTE = "\"";
    protected static Category log = Category.getInstance(HttpDocCache.class.getName());
    protected static final String LINKS = "links" + File.separator;
    protected static final String CONTENT = "content" + File.separator;
    protected static final String DOCUMENTS = "documents" + File.separator;
    static final String LF = System.getProperty("line.separator");
    public boolean useMD5 = true;
    private Collection urls = new LinkedList();
    protected File storageDirectoryFile = null;
    private FileOutputStream storageDirectoryStream = null;
    protected int storageDirDepth = 0;

    public HttpDocCache(String str) {
        setStorageDir(str);
    }

    private void setStorageDir(String str) {
        this.storagedir = str;
        if (!this.storagedir.endsWith(File.separator)) {
            this.storagedir += File.separator;
        }
        File file = new File(this.storagedir + DOCUMENTS);
        if (!file.exists()) {
            file.mkdirs();
        }
        File file2 = new File(this.storagedir + CONTENT);
        if (!file2.exists()) {
            file2.mkdirs();
        }
        if (this.useMD5) {
            this.storageDirectoryFile = new File(this.storagedir + "directory.csv");
            try {
                this.storageDirectoryStream = new FileOutputStream(this.storageDirectoryFile.getPath(), true);
                if (!this.storageDirectoryFile.exists()) {
                    this.storageDirectoryStream.write(("Path,URL" + LF).getBytes());
                }
            } catch (Exception e) {
                log.error(e.getMessage());
            }
        }
    }

    /* JADX WARN: Finally extract failed */
    @Override // net.matuschek.http.HttpDocManager
    public void storeDocument(HttpDoc httpDoc) throws DocManagerException {
        List links = httpDoc.getLinks();
        if (httpDoc.isCached()) {
            return;
        }
        String generateFilename = generateFilename(httpDoc.getURL().toExternalForm());
        String str = this.storagedir + DOCUMENTS + generateFilename;
        checkStoragePathFor(DOCUMENTS, generateFilename);
        try {
            File file = new File(str + SuffixConstants.SUFFIX_STRING_zip);
            if (!file.exists()) {
                writeDirectoryInfo(httpDoc, generateFilename);
            }
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file));
            ZipOutputStream zipOutputStream = new ZipOutputStream(bufferedOutputStream);
            zipOutputStream.setLevel(9);
            try {
                try {
                    storeContent(httpDoc);
                    writeHeadersToZipFile(httpDoc, zipOutputStream);
                    writeUrlToZipFile(httpDoc, zipOutputStream);
                    if (links != null) {
                        writeLinksToZipFile(links, zipOutputStream);
                    }
                    zipOutputStream.close();
                    bufferedOutputStream.close();
                    long dateAsMilliSeconds = httpDoc.getDateAsMilliSeconds();
                    file.setLastModified(dateAsMilliSeconds > 0 ? dateAsMilliSeconds : System.currentTimeMillis());
                } catch (Throwable th) {
                    zipOutputStream.close();
                    bufferedOutputStream.close();
                    long dateAsMilliSeconds2 = httpDoc.getDateAsMilliSeconds();
                    file.setLastModified(dateAsMilliSeconds2 > 0 ? dateAsMilliSeconds2 : System.currentTimeMillis());
                    throw th;
                }
            } catch (Throwable th2) {
                System.out.println(th2);
                zipOutputStream.close();
                bufferedOutputStream.close();
                long dateAsMilliSeconds3 = httpDoc.getDateAsMilliSeconds();
                file.setLastModified(dateAsMilliSeconds3 > 0 ? dateAsMilliSeconds3 : System.currentTimeMillis());
            }
        } catch (IOException e) {
            throw new DocManagerException(e.getMessage());
        }
    }

    protected void writeDirectoryInfo(HttpDoc httpDoc, String str) throws IOException {
        if (this.storageDirectoryFile != null) {
            synchronized (this.storageDirectoryFile) {
                try {
                    this.storageDirectoryStream.write(("\"" + str + "\",\"" + httpDoc.getURL() + "\"" + LF).getBytes());
                } catch (Exception e) {
                    log.warn(e.getMessage());
                    this.storageDirectoryStream.close();
                }
            }
        }
    }

    protected void writeContentToZipFile(HttpDoc httpDoc, ZipOutputStream zipOutputStream) throws IOException {
        ZipEntry zipEntry = new ZipEntry("content" + getExtensionFromContenttype(httpDoc.getHeaderValue("Content-Type")));
        long lastModifiedAsMilliSeconds = httpDoc.getLastModifiedAsMilliSeconds();
        if (lastModifiedAsMilliSeconds < 0) {
            lastModifiedAsMilliSeconds = httpDoc.getDateAsMilliSeconds();
        }
        zipEntry.setTime(lastModifiedAsMilliSeconds);
        zipOutputStream.putNextEntry(zipEntry);
        zipOutputStream.write(httpDoc.getContent());
        zipOutputStream.closeEntry();
    }

    protected ZipEntry writeHeadersToZipFile(HttpDoc httpDoc, ZipOutputStream zipOutputStream) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        Iterator it = httpDoc.getHttpHeader().iterator();
        while (it.hasNext()) {
            HttpHeader httpHeader = (HttpHeader) it.next();
            if (!httpHeader.getName().equals(CONTENT_DUPLICATE)) {
                stringBuffer.append(httpHeader.toString());
                if (it.hasNext()) {
                    stringBuffer.append(LF);
                }
            }
        }
        ZipEntry zipEntry = new ZipEntry("header");
        zipOutputStream.putNextEntry(zipEntry);
        zipOutputStream.write(stringBuffer.toString().getBytes());
        long dateAsMilliSeconds = httpDoc.getDateAsMilliSeconds();
        zipEntry.setTime(dateAsMilliSeconds > 0 ? dateAsMilliSeconds : System.currentTimeMillis());
        zipOutputStream.closeEntry();
        return zipEntry;
    }

    protected boolean readHeadersFromZipFile(HttpDoc httpDoc, ZipFile zipFile) throws IOException {
        ZipEntry entry = zipFile.getEntry("header");
        if (entry == null) {
            return false;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(zipFile.getInputStream(entry)));
        while (bufferedReader.ready()) {
            String readLine = bufferedReader.readLine();
            int indexOf = readLine.indexOf(WARCConstants.COLON_SPACE);
            if (indexOf >= 0) {
                httpDoc.addHeader(new HttpHeader(readLine.substring(0, indexOf), readLine.substring(indexOf + 2)));
            }
        }
        bufferedReader.close();
        return true;
    }

    protected boolean readLinksFromZipFile(HttpDoc httpDoc, ZipFile zipFile) throws IOException {
        ZipEntry entry = zipFile.getEntry("links");
        List links = httpDoc.getLinks();
        if (links == null) {
            links = new Vector();
            httpDoc.setLinks(links);
        } else {
            links.clear();
        }
        if (entry == null) {
            return false;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(zipFile.getInputStream(entry)));
        while (bufferedReader.ready()) {
            String readLine = bufferedReader.readLine();
            if (readLine != null) {
                links.add(new URL(readLine));
            }
        }
        bufferedReader.close();
        return true;
    }

    protected ZipEntry writeUrlToZipFile(HttpDoc httpDoc, ZipOutputStream zipOutputStream) throws IOException {
        String url = httpDoc.getURL().toString();
        ZipEntry zipEntry = new ZipEntry("url");
        zipOutputStream.putNextEntry(zipEntry);
        zipOutputStream.write(url.getBytes());
        long dateAsMilliSeconds = httpDoc.getDateAsMilliSeconds();
        zipEntry.setTime(dateAsMilliSeconds > 0 ? dateAsMilliSeconds : System.currentTimeMillis());
        zipOutputStream.closeEntry();
        return zipEntry;
    }

    private File getContentUsersFile(HttpDoc httpDoc) {
        File file = null;
        if (httpDoc.getContent().length != 0) {
            file = contentFile(httpDoc.getContentMD5(), ".txt");
        }
        return file;
    }

    @Override // net.matuschek.http.HttpDocManager
    public String findDuplicate(HttpDoc httpDoc) throws IOException {
        String str = null;
        File contentUsersFile = getContentUsersFile(httpDoc);
        if (contentUsersFile != null) {
            String url = httpDoc.getURL().toString();
            if (contentUsersFile.exists()) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(contentUsersFile)));
                while (bufferedReader.ready()) {
                    String readLine = bufferedReader.readLine();
                    if (readLine.equals(url)) {
                        break;
                    }
                    if (str == null) {
                        str = readLine;
                    }
                }
                bufferedReader.close();
            }
        }
        return str;
    }

    protected void storeContent(HttpDoc httpDoc) throws IOException {
        if (httpDoc.getContent().length == 0) {
            return;
        }
        File contentUsersFile = getContentUsersFile(httpDoc);
        String url = httpDoc.getURL().toString();
        String contentMD5 = httpDoc.getContentMD5();
        boolean z = false;
        if (contentUsersFile.exists()) {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(contentUsersFile)));
            while (true) {
                try {
                    if (!bufferedReader.ready()) {
                        break;
                    } else if (bufferedReader.readLine().equals(url)) {
                        z = true;
                        break;
                    }
                } finally {
                    bufferedReader.close();
                }
            }
        }
        File contentFile = contentFile(contentMD5, SuffixConstants.SUFFIX_STRING_zip);
        if (contentFile.exists()) {
            contentFile.setLastModified(System.currentTimeMillis());
        } else {
            checkStoragePathFor(CONTENT, useFirstCharactersAsDirectories(contentMD5));
            BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(contentFile));
            ZipOutputStream zipOutputStream = null;
            try {
                zipOutputStream = new ZipOutputStream(bufferedOutputStream);
                zipOutputStream.setLevel(9);
                writeContentToZipFile(httpDoc, zipOutputStream);
                if (zipOutputStream != null) {
                    zipOutputStream.close();
                } else {
                    bufferedOutputStream.close();
                }
            } catch (Throwable th) {
                if (zipOutputStream != null) {
                    zipOutputStream.close();
                } else {
                    bufferedOutputStream.close();
                }
                throw th;
            }
        }
        if (z) {
            return;
        }
        FileOutputStream fileOutputStream = new FileOutputStream(contentUsersFile.getPath(), true);
        try {
            fileOutputStream.write((url + LF).getBytes());
            fileOutputStream.close();
        } catch (Throwable th2) {
            fileOutputStream.close();
            throw th2;
        }
    }

    protected void writeLinksToZipFile(List list, ZipOutputStream zipOutputStream) throws IOException {
        HashSet hashSet = new HashSet();
        zipOutputStream.putNextEntry(new ZipEntry("links"));
        Iterator it = list.iterator();
        while (it.hasNext()) {
            URL url = (URL) it.next();
            if (!hashSet.contains(url)) {
                zipOutputStream.write((url.toString() + LF).getBytes());
                hashSet.add(url);
            }
        }
        zipOutputStream.closeEntry();
    }

    @Override // net.matuschek.http.HttpDocManager
    public void processDocument(HttpDoc httpDoc) throws DocManagerException {
        log.info("Processing " + httpDoc.getURL().toExternalForm() + httpDoc.getHttpHeader());
        if (httpDoc.getHeader(CONTENT_DUPLICATE) == null) {
            this.urls.add(httpDoc.getURL());
        }
    }

    @Override // net.matuschek.http.HttpDocManager
    public HttpDoc retrieveFromCache(URL url) {
        HttpDoc httpDoc = null;
        File file = null;
        try {
            file = new File(this.storagedir + DOCUMENTS + (generateFilename(url.toExternalForm()) + SuffixConstants.SUFFIX_STRING_zip));
            if (file.exists()) {
                log.info("retrieve " + file);
                httpDoc = new HttpDoc();
                httpDoc.setURL(url);
                ZipFile zipFile = new ZipFile(file);
                readHeadersFromZipFile(httpDoc, zipFile);
                readLinksFromZipFile(httpDoc, zipFile);
                httpDoc.setCached(true);
                File contentFile = contentFile(httpDoc.getContentMD5(), SuffixConstants.SUFFIX_STRING_zip);
                if (contentFile.exists()) {
                    ZipFile zipFile2 = new ZipFile(contentFile);
                    readContentFromZipFile(httpDoc, zipFile2);
                    zipFile2.close();
                } else {
                    httpDoc.setContent(new byte[0]);
                }
                zipFile.close();
            }
        } catch (Exception e) {
            log.warn("removing invalid file " + file);
            file.delete();
            httpDoc = null;
        }
        return httpDoc;
    }

    protected void readContentFromZipFile(HttpDoc httpDoc, ZipFile zipFile) throws IOException {
        byte[] bArr = null;
        Enumeration<? extends ZipEntry> entries = zipFile.entries();
        while (true) {
            if (!entries.hasMoreElements()) {
                break;
            }
            ZipEntry nextElement = entries.nextElement();
            if (nextElement.getName().startsWith("content")) {
                InputStream inputStream = zipFile.getInputStream(nextElement);
                int size = (int) nextElement.getSize();
                bArr = new byte[size];
                int i = 0;
                while (true) {
                    int i2 = i;
                    if (i2 >= size) {
                        break;
                    } else {
                        i = i2 + inputStream.read(bArr, i2, size - i2);
                    }
                }
                inputStream.close();
            }
        }
        httpDoc.setContent(bArr);
    }

    @Override // net.matuschek.http.HttpDocManager
    public void removeDocument(URL url) {
        HttpDoc retrieveFromCache = retrieveFromCache(url);
        try {
            String str = generateFilename(url.toExternalForm()) + SuffixConstants.SUFFIX_STRING_zip;
            File file = new File(this.storagedir + LINKS + str);
            if (file.exists()) {
                file.delete();
            }
            deleteContent(retrieveFromCache);
            File file2 = new File(this.storagedir + DOCUMENTS + str);
            if (file2.exists()) {
                file2.delete();
            }
        } catch (Exception e) {
            log.error(e);
        }
    }

    private void deleteContent(HttpDoc httpDoc) throws IOException {
        if (httpDoc.getContent().length == 0) {
            return;
        }
        String url = httpDoc.getURL().toString();
        String contentMD5 = httpDoc.getContentMD5();
        File contentFile = contentFile(contentMD5, ".txt");
        ArrayList arrayList = new ArrayList();
        if (contentFile.exists()) {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(contentFile)));
            while (bufferedReader.ready()) {
                String readLine = bufferedReader.readLine();
                if (!readLine.equals(url)) {
                    arrayList.add(readLine);
                }
            }
            bufferedReader.close();
        }
        if (arrayList.size() <= 0) {
            contentFile.delete();
            File contentFile2 = contentFile(contentMD5, SuffixConstants.SUFFIX_STRING_zip);
            if (contentFile2.exists()) {
                contentFile2.delete();
                return;
            }
            return;
        }
        FileOutputStream fileOutputStream = new FileOutputStream(contentFile.getPath(), false);
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            fileOutputStream.write((((String) it.next()) + LF).getBytes());
        }
        fileOutputStream.close();
    }

    public String toString() {
        StringBuffer stringBuffer = new StringBuffer(1000);
        Iterator it = this.urls.iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next()).append("\n");
        }
        return stringBuffer.toString();
    }

    private final String useFirstCharactersAsDirectories(String str) {
        int i = this.storageDirDepth;
        if (i > str.length()) {
            i = str.length();
        }
        char[] cArr = new char[i * 2];
        for (int i2 = 0; i2 < i; i2++) {
            cArr[i2 * 2] = str.charAt(i2);
            cArr[(i2 * 2) + 1] = File.separatorChar;
        }
        return new String(cArr);
    }

    private final void checkStoragePathFor(String str, String str2) {
        if (!str.endsWith(File.separator)) {
            str = str + File.separator;
        }
        File file = new File(this.storagedir + str + str2.substring(0, this.storageDirDepth * 2));
        if (file.exists()) {
            return;
        }
        file.mkdirs();
    }

    protected String generateFilename(String str) {
        if (this.useMD5) {
            String asHex = new MD5(str).asHex();
            return this.storageDirDepth > 0 ? useFirstCharactersAsDirectories(asHex) + asHex.substring(this.storageDirDepth) : asHex;
        }
        StringBuffer stringBuffer = new StringBuffer(str.length());
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            switch (charAt) {
                case '\"':
                    stringBuffer.append("&quot;");
                    break;
                case '*':
                    stringBuffer.append("&asterisk;");
                    break;
                case '/':
                    stringBuffer.append("&slash;");
                    break;
                case ':':
                    stringBuffer.append("&colon;");
                    break;
                case '<':
                    stringBuffer.append("&lt;");
                    break;
                case '>':
                    stringBuffer.append("&gt;");
                    break;
                case '?':
                    stringBuffer.append("&question;");
                    break;
                case '\\':
                    stringBuffer.append("&backslash");
                    break;
                case '|':
                    stringBuffer.append("&or;");
                    break;
                default:
                    stringBuffer.append(charAt);
                    break;
            }
        }
        return stringBuffer.toString();
    }

    protected File contentFile(String str, String str2) {
        return new File(this.storagedir + CONTENT + useFirstCharactersAsDirectories(str) + str.substring(this.storageDirDepth) + str2);
    }

    @Override // net.matuschek.http.HttpDocManager
    public void finish() {
        if (this.storageDirectoryStream != null) {
            try {
                this.storageDirectoryStream.close();
                this.storageDirectoryStream = null;
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    protected void finalize() throws Throwable {
        finish();
        super.finalize();
    }

    public void setStorageDirDepth(int i) {
        this.storageDirDepth = i;
    }

    public int getStorageDirDepth() {
        return this.storageDirDepth;
    }

    private String getExtensionFromContenttype(String str) {
        String str2 = null;
        if (str != null) {
            int indexOf = str.indexOf(59);
            str2 = getDefaultExtension(indexOf > 0 ? str.substring(0, indexOf).trim() : str.trim());
        }
        return str2 == null ? "" : "." + str2;
    }

    protected String getDefaultExtension(String str) {
        if (str == null) {
            return null;
        }
        if (str.indexOf("text/html") >= 0) {
            return ".html";
        }
        if (str.indexOf("text/") >= 0) {
            return ".txt";
        }
        return null;
    }
}
