package org.gcube.indexmanagement.common;

import java.io.OutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.util.HashMap;

/**
 * An OutputStream used to output LZW compressed data. See
 * http://en.wikipedia.org/wiki/LZW for an explanation of the algorithm.
 * 
 * @see org.gcube.indexmanagement.common.DecompressingInputStream
 * 
 * @version 0.1
 */

public class CompressingOutputStream extends FilterOutputStream {

    /** The previously read, but not yet written code */
    private int  prevCode = -1;

    /**
     * The number of codes currently registered. Also forms the basis for the
     * next code.
     */
    private int codeCount;

    /** The current bit size of codes ( ie sizeCode(255)=8, sizeCode(256)=9 ) */
    private int codeSize;

    /**
     * The max number of codes which can fit into the curent codeSize. Always
     * 2^codeSize
     */
    private int maxCodes;

    /** The part of a code scheduled for outputting, which hasn't been output yet */
    private int remainingPart = 0;

    /** The size of the remainingPart */
    private int remainingSize = 0;

    /**
     * The maximum allowed number of codes. A compromize: the more codes, the
     * larger compression rate, but also the more memory and cpu is used...
     */
    private int maxAllowedCodes = (1 << 19);

    /** The code map, mapping a key generated from read bytes to a code */
    private HashMap<Integer, Integer> codes;

    /**
     * Constructor which wraps an output stream, and initializes the code map
     * with all possible values of a single byte, before increasing the codesize
     * to the size of one byte + 1 to make place for the next code
     * 
     * @param out -
     *            the output stream to output the results trough
     */
    public CompressingOutputStream(OutputStream out) {
        super(out);
        codes = new HashMap<Integer, Integer>();
        for (codeCount = 0; codeCount < 256; codeCount++) {
            codes.put(codeCount, codeCount);
        }
        setCodeSize(9);
    }

    /**
     * {@inheritDoc}
     */
    public void close() throws IOException {
        writeCode(prevCode);
        writeCode(-1);
        super.close();
    }

    /**
     * {@inheritDoc}
     */
    public void write(byte[] bytes) throws IOException {
        write(bytes, 0, bytes.length);
    }

    /**
     * {@inheritDoc}
     */
    public void write(byte[] bytes, int off, int len) throws IOException {
        if ((off + len) > bytes.length) {
            len = bytes.length - off;
        }
        if (prevCode == -1 && bytes.length > 0) {
            prevCode = bytes[0] & 0xFF;
            off++;
        }
        for (int i = off; i < len; i++) {
            compress(bytes[i] & 0xFF);
        }
    }

    /**
     * {@inheritDoc}
     */
    public void write(int b) throws IOException {
        if (b > 255) {
            throw new IllegalArgumentException("Value not within byte range.");
        }
        if (prevCode == -1) {
            prevCode = b;
        } else {
            compress(b);
        }
    }

    /**
     * The method in charge accumulating bytes until no code exists for the
     * given byte sequence, creating new codes and outputting the codes instead
     * of the byte sequences.
     * 
     * Makes a new codeKey from the previous code and the current byte. If this
     * codeKey is already mapped to a code in the code map, this code will be
     * assigned to the "previous" variable. If not, the previous code will be
     * output, and the new code will be added to the map before current byte is
     * assigned to the "previous" variable.
     * 
     * @param currentByte --
     *            the byte to be compressed
     * @throws IOException --
     *             an error outputting the code
     */
    private void compress(int currentByte) throws IOException {
        int read = (prevCode << 8) + currentByte;
        Integer readCode;
        if ((readCode = codes.get(read)) != null) {
            prevCode = readCode;
        } else {
            writeCode(prevCode);
            if (codeCount < maxAllowedCodes) {
                if (codeCount == maxCodes) {
                    setCodeSize(codeSize + 1);
                }
                codes.put(read, codeCount++);
            }
            prevCode = currentByte;
        }
    }

    /**
     * The method in charge of outputting codes (of codeSize # of bits) as bytes
     * (in 8 bit intervals)
     * 
     * @param code -
     *            the code to output
     * @throws IOException --
     *             an error outputting the codes
     */
    private void writeCode(int code) throws IOException {

        if (code == -1) {
            if (remainingSize != 0) {
                super.write(remainingPart << (8 - remainingSize));
                remainingPart = 0;
                remainingSize = 0;
            }
            return;
        }

        remainingPart = (remainingPart << codeSize) | code;
        remainingSize += codeSize;

        while (remainingSize >= 8) {
            remainingSize -= 8;
            super.write(remainingPart >> (remainingSize));
            remainingPart = remainingPart & ((1 << (remainingSize)) - 1);
        }

    }

    /**
     * A helper method to change the codeSize and maxCodes variables in one go.
     * 
     * @param newSize =
     *            The new codeSize
     */
    private void setCodeSize(int newSize) {
        codeSize = newSize;
        maxCodes = 1 << codeSize;
    }
}
