/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2005  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "pch.h"

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "GzipDecompressor.h"
#include "SplittableBuffer.h"
#include "DataChunk.h"
#include <memory>
#include <cstring>
#include <algorithm>

using namespace std;

inline bool
GzipDecompressor::loadBytes(uint8_t* where, size_t num, uint8_t const*& begin, uint8_t const* end)
{
	size_t const need = num - m_stateOffset;
	size_t const avail = end - begin;
	if (avail >= need) {
		memcpy(where + m_stateOffset, begin, need);
		begin += need;
		m_stateOffset = 0;
		return true;
	} else {
		memcpy(where + m_stateOffset, begin, avail);
		begin = end;
		m_stateOffset += avail;
		return false;
	}
}

GzipDecompressor::GzipDecompressor()
:	m_state(STATE_HEAD),
	m_isEOF(false),
	m_isError(false),
	m_isFirstDataSet(true),
	m_stateOffset(0)
{
	initStream();
}

GzipDecompressor::~GzipDecompressor()
{
	inflateEnd(&m_strm);
}

void
GzipDecompressor::reset()
{
	inflateReset(&m_strm);
	m_bufferedInput.clear();
	m_state = STATE_HEAD;
	m_isEOF = false;
	m_isError = false;
	m_isFirstDataSet = true;
	m_stateOffset = 0;
}

bool
GzipDecompressor::isError() const
{
	return m_isError;
}

void
GzipDecompressor::consume(SplittableBuffer& data, bool eof)
{
	if (m_isError) {
		data.clear();
		return;
	}
	m_bufferedInput.appendDestructive(data);
	m_isEOF |= eof;
}

size_t
GzipDecompressor::retrieveOutput(SplittableBuffer& buf, size_t limit)
{
	if (m_isError) {
		return 0;
	}
	auto_ptr<DataChunk> chunk;
	size_t chunk_data_size = 0;
	size_t input_consumed = 0;
	size_t output_produced = 0;
	SplittableBuffer::ByteIterator iter = m_bufferedInput.begin();
	for (; !iter.isAtRightBorder(); iter.nextSpan()) {
		uint8_t const* cur = (uint8_t const*)&*iter;
		uint8_t const* const end = (uint8_t const*)iter.spanEnd();
		
		switch (m_state) {
			case STATE_DATA:
			case_STATE_DATA: {
				if (!chunk.get()) {
					if (limit == 0) {
						break;
					}
					chunk = DataChunk::create(std::min<size_t>(MAX_OUTBUF_SIZE, limit));
				}
				m_strm.next_in = (Bytef*)cur;
				m_strm.avail_in = end - cur;
				uint8_t* const out_begin = (uint8_t*)chunk->getDataAddr() + chunk_data_size;
				m_strm.next_out = out_begin;
				m_strm.avail_out = chunk->getDataSize() - chunk_data_size;
				int res = inflate(&m_strm, Z_SYNC_FLUSH);
				cur = m_strm.next_in;
				if (out_begin != m_strm.next_out) {
					size_t produced = m_strm.next_out - out_begin;
					output_produced += produced;
					chunk_data_size += produced;
					limit -= produced;
					m_dataLength += produced;
					m_dataCRC32 = crc32(m_dataCRC32, out_begin, produced);
					if (chunk_data_size == chunk->getDataSize()) {
						buf.append(chunk);
						chunk_data_size = 0;
					}
				}
				if (res == Z_OK && m_strm.avail_out == 0) {
					goto case_STATE_DATA;
				}
				if (res == Z_OK || res == Z_BUF_ERROR) {
					break;
				} else if (res == Z_STREAM_END) {
					m_state = STATE_CRC32_ISIZE;
					inflateReset(&m_strm);
				} else {
					m_isError = true;
					break;
				}
				// fall through
			}
			case STATE_CRC32_ISIZE: {
				if (!loadBytes(m_footer, sizeof(m_footer), cur, end)) {
					break;
				}
				
				uint32_t crc32 = uint32_t(m_footer[0])|(uint32_t(m_footer[1])<<8)
					|(uint32_t(m_footer[2])<<16)|(uint32_t(m_footer[3])<<24);
				uint32_t length = uint32_t(m_footer[4])|(uint32_t(m_footer[5])<<8)
					|(uint32_t(m_footer[6])<<16)|(uint32_t(m_footer[7])<<24);
				if (crc32 != m_dataCRC32 || length != m_dataLength) {
					m_isError = true;
					break;
				}
				m_isFirstDataSet = false;
				m_state = STATE_HEAD;
				// fall through
			}
			case STATE_HEAD: {
				if (!loadBytes(m_header, sizeof(m_header), cur, end)) {
					break;
				}
				if (m_header[0] != 0x1f || m_header[1] != 0x8b) {
					// signature doesn't match
					m_isError = true;
					break;
				}
				if (m_header[2] != 8) {
					// wrong compression method
					m_isError = true;
					break;
				}
				m_flags = m_header[3];
				if (m_flags & 0xe0) {
					// reserved flags are not clear
					m_isError = true;
					break;
				}
				m_headCRC32 = crc32(0, m_header, 10);
				m_state = STATE_FEXTRA;
				// fall through
			}
			case STATE_FEXTRA: {
				if (!(m_flags & (1<<2))) {
					m_state = STATE_FNAME;
					goto case_STATE_FNAME;
				}
				
				if (cur == end) {
					break;
				}
				m_headCRC32 = crc32(m_headCRC32, cur, 1);
				m_stateOffset = *cur;
				++cur;
				m_state = STATE_FEXTRA_1;
				// fall through
			}
			case STATE_FEXTRA_1: {	
				if (cur == end) {
					break;
				}
				m_headCRC32 = crc32(m_headCRC32, cur, 1);
				m_stateOffset += size_t(*cur) << 8;
				++cur;
				m_state = STATE_FEXTRA_REST;
				// fall through
			}
			case STATE_FEXTRA_REST: {
				size_t avail = end - cur;
				if (avail < m_stateOffset) {
					m_headCRC32 = crc32(m_headCRC32, cur, avail);
					m_stateOffset -= avail;
					cur = end;
					break;
				}
				m_headCRC32 = crc32(m_headCRC32, cur, m_stateOffset);
				cur += m_stateOffset;
				m_stateOffset = 0;
				m_state = STATE_FNAME;
				// fall through
			}
			case STATE_FNAME:
			case_STATE_FNAME: {
				if (!(m_flags & (1<<3))) {
					m_state = STATE_FCOMMENT;
					goto case_STATE_FCOMMENT;
				}
				size_t avail = end - cur;
				uint8_t const* endpos = (uint8_t const*)memchr(cur, 0, avail);
				if (!endpos) {
					m_headCRC32 = crc32(m_headCRC32, cur, avail);
					cur = end;
					break;
				}
				++endpos;
				m_headCRC32 = crc32(m_headCRC32, cur, endpos - cur);
				cur = endpos;
				m_state = STATE_FCOMMENT;
				// fall through
			}
			case STATE_FCOMMENT:
			case_STATE_FCOMMENT: { 
				if (!(m_flags & (1<<4))) {
					m_state = STATE_FHCRC;
					goto case_STATE_FHCRC;
				}
				size_t avail = end - cur;
				uint8_t const* endpos = (uint8_t const*)memchr(cur, 0, avail);
				if (!endpos) {
					m_headCRC32 = crc32(m_headCRC32, cur, avail);
					cur = end;
					break;
				}
				++endpos;
				m_headCRC32 = crc32(m_headCRC32, cur, endpos - cur);
				cur = endpos;
				m_state = STATE_FHCRC;
				// fall through
			}
			case STATE_FHCRC:
			case_STATE_FHCRC: {
				if (!(m_flags & (1<<1))) {
					m_state = STATE_DATA;
					m_dataLength = 0;
					m_dataCRC32 = 0;
					goto case_STATE_DATA;
				}
				if (cur == end) {
					break;
				}
				m_hCRC = *cur;
				++cur;
				m_state = STATE_FHCRC_1;
				// fall through
			}
			case STATE_FHCRC_1: {
				if (cur == end) {
					break;
				}
				m_hCRC |= uint16_t(*cur) << 8;
				++cur;
				if (m_hCRC != (m_headCRC32 & 0xFFFF)) {
					m_isError = true;
					break;
				}
				m_state = STATE_DATA;
				m_dataLength = 0;
				m_dataCRC32 = 0;
				goto case_STATE_DATA;
			}
		} // switch
		
		input_consumed += cur - (uint8_t const*)&*iter;
		if (cur != end) {
			break;
		}
		
		if (m_isError) {
			break;
		}
	} // for
	
	if (chunk_data_size > 0) {
		buf.append(DataChunk::resize(chunk, chunk_data_size));
	}
	
	m_bufferedInput.trimFront(input_consumed);
	
	if (m_isEOF && m_bufferedInput.empty()) {
		m_isError = isErrorAtEOF(limit == 0);
	}
	
	return output_produced;
}

bool
GzipDecompressor::isErrorAtEOF(bool is_limit_reached) const
{
	if (is_limit_reached) {
		// It means that some input may have been left unprocessed.
		// The client is expected to call retrieveOutput() again.
		return false;
	}
	if (m_state != STATE_HEAD) {
		return true;
	}
	if (m_stateOffset == 0) {
		return false;
	}
	if (!m_isFirstDataSet) {
		/*
		I've seen at least one implementation that adds a couple
		of garbage bytes after the gzipped response body.
		Those bytes are outside of the Content-Length, but since
		we ignore Content-Length if Encoding-Encoding is present
		(the issue is described in HttpStateResponseHeaders.cpp),
		we have to handle it here.
		*/
		return false;
	}
	return true;
}

void
GzipDecompressor::initStream()
{
	m_strm.zalloc = Z_NULL;
	m_strm.zfree = Z_NULL;
	m_strm.opaque = Z_NULL;
	m_strm.next_in = Z_NULL;
	inflateInit2(&m_strm, -MAX_WBITS);
}
