/*
    MiddleMan filtering proxy server
    Copyright (C) 2002-2004  Jason McLaughlin

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <stdio.h>
#include <string.h>
#include "proto.h"

HTMLSTREAM *htmlstream_new() 
{
	HTMLSTREAM *ret;

	ret = (HTMLSTREAM*)xmalloc(sizeof(HTMLSTREAM));
	ret->tree = ret->tail = NULL;
	ret->buffer = xnew Filebuf();
	ret->startpos = 0;
	ret->incomment = FALSE;
	ret->callbacks = NULL;

	return ret;
}

void htmlstream_free(HTMLSTREAM *hs) {
	if (hs->callbacks != NULL) hash_destroy(hs->callbacks);
	if (hs->tree != NULL) htmlstream_tree_free(hs->tree);
	xdelete hs->buffer;
	xfree(hs);
}

void htmlstream_tree_free(struct htmlstream_node *node) {
	struct htmlstream_node *tmp;

	for (; node; node = tmp) {
		FREE_AND_NULL(node->text);
		if (node->tag != NULL) htmlstream_tag_free(node->tag);
		if (node->up != NULL) htmlstream_tree_free(node->up);

		tmp = node->next;

		xfree(node);
	}
}

void htmlstream_tag_free(struct htmlstream_tag *tag) {
	struct htmlstream_tag_property *property;

	FREE_AND_NULL(tag->name);
	
	while (tag->properties != NULL) {
		property = tag->properties->next;

		FREE_AND_NULL(tag->properties->name);
		FREE_AND_NULL(tag->properties->value);

		xfree(tag->properties);

		tag->properties = property;
	}

	xfree(tag);
}

void htmlstream_add(HTMLSTREAM *hs, char *data, int len) {
	int highestlevel, warned = FALSE;
	unsigned int i;
	char *ptr;
	struct htmlstream_node *node = NULL;
	struct HASH_LIST *hl;
	struct htmlstream_callback *cb;

	if (data != NULL) hs->buffer->Add(data ,len);

	for (i = hs->startpos; i < hs->buffer->size; i++) {
		if (hs->incomment == TRUE && hs->buffer->data[i] == '>' && hs->buffer->data[i - 1] == '-')
			hs->incomment = FALSE;
		else if (hs->incomment == FALSE && hs->buffer->data[i] == '>' && hs->buffer->data[hs->startpos] == '<') {
			if (hs->buffer->data[hs->startpos + 1] == '/' && i != hs->startpos + 1) {
				for (highestlevel = 0, node = hs->tail; node; node = node->prev) {
					if (node->level > highestlevel) highestlevel = node->level;
					if (highestlevel > HTML_NESTING_LIMIT) {
						/* nesting is limited because it can exhaust
						   memory when the tree is free'ed using a 
						   recursive function, or when the tree is
						   displayed with the htmltree URL command */
						if (warned == FALSE) {
							putlog(MMLOG_WARN, "HTML nesting limit reached");
							warned = TRUE;
						}

						break;
					}

					if (node->tag != NULL && !strncasecmp(node->tag->name, &hs->buffer->data[hs->startpos + 2], i - hs->startpos - 2)) {
						if (node->prev != NULL && node->prev->up == NULL) {
							/* this tag closes a previous tag,
							   bump this part of the tree up to the next
							   level. */
							hs->tail = node->prev;
							node->prev->level = highestlevel + 1;
							node->prev->next = NULL;
							node->prev->up = node;
							node->down = node->prev;
							node->prev = NULL;
						}

						break;
					}
				}

				node = NULL;
			} else {
				node = (htmlstream_node*)xmalloc(sizeof(struct htmlstream_node));
				node->text = NULL;
				node->tag = htmlstream_tag_parse(&hs->buffer->data[hs->startpos], i - hs->startpos + 1);
			}

			hs->startpos = i;
		} else if (hs->incomment == FALSE && (hs->buffer->data[i] == '<' || (data == NULL && i == hs->buffer->size - 1))) {
			/* text area has ended, either because we see a starting tag
			   or the stream has ended. */			

			if (data != NULL) {
				if (i >= hs->buffer->size - 4) return;
				if (!strncmp(&hs->buffer->data[i], "<!--", 4)) {
					hs->incomment = TRUE;
					continue;
				}
			}

			if (i != hs->startpos && !isempty(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1)) {
				if (hs->tail != NULL && hs->tail->text != NULL) {
					/* this is possible if the last tag was a closing
					   tag that didn't match anything */
					ptr = xstrndup(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1);
					hs->tail->text = string_append(hs->tail->text, ptr);
					xfree(ptr);
				} else  {
					node = (htmlstream_node*)xmalloc(sizeof(struct htmlstream_node));
					node->tag = NULL;

					if (hs->startpos == 0)
						/* startpos is the beginning of the buffer, so there's
						   no > there */
						node->text = xstrndup(&hs->buffer->data[hs->startpos], i - hs->startpos);
					else						
						node->text = xstrndup(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1);
				}
			}

			hs->startpos = i;
		}

		if (node != NULL) {
			node->next = NULL;
			node->down = node->up = NULL;
			node->level = 0;

			if (hs->tree == NULL) {
				hs->tree = hs->tail = node;
				node->prev = NULL;
			} else {
				hs->tail->next = node;
				node->prev = hs->tail;
				hs->tail = node;
			}

			if (hs->callbacks != NULL && node->tag != NULL) {
				hl = hash_search(hs->callbacks, node->tag->name);
				if (hl != NULL) {
					cb = (htmlstream_callback*)hl->data;
					cb->func(hs, node, cb->arg);
				}
			}

			node = NULL;
		} 
	}
}

struct htmlstream_tag *htmlstream_tag_parse(char *tag, int len) {
	int i, indquote = FALSE;
	int startname = 0, endname = 0, startvalue = 0, endvalue = 0;
	struct htmlstream_tag *ret;
	struct htmlstream_tag_property *property, *tail = NULL;

	if (len < 2) return NULL;

	ret = (htmlstream_tag*)xmalloc(sizeof(struct htmlstream_tag));
	ret->name = NULL;
	ret->properties = NULL;

	for (i = 1; i < len; i++) {
		if (tag[i] == ' ' || tag[i] == '=' || tag[i] == '>' || tag[i] == '\"') {
			if (tag[i] == '\"') {
				indquote = !indquote;

				if (indquote == TRUE) continue;
			} else if (indquote == TRUE) continue;

			if (startname != 0 && endname == 0)
				endname = i;
			if (startvalue != 0 && endvalue == 0)
				endvalue = i;

			if (ret->name == NULL && endname != 0) {
				ret->name = xstrndup(tag + startname, endname - startname);
				startname = endname = 0;
			} else if (endvalue != 0) {
				property = (htmlstream_tag_property*)xmalloc(sizeof(struct htmlstream_tag_property));
				property->name = xstrndup(tag + startname, endname - startname);
				property->value = xstrndup(tag + startvalue, endvalue - startvalue);
				property->next = NULL;

				if (tail == NULL)
					ret->properties = tail = property;
				else {
					tail->next = property;
					tail = tail->next;
				}

				startname = endname = startvalue = endvalue = 0;
			}

			continue;
		}

		if (startname == 0)
			startname = i;
		else if (startvalue == 0 && endname != 0)
			startvalue = i;
	}

	if (ret->name == NULL) {
		htmlstream_tag_free(ret);
		ret = NULL;
	}

	return ret;
}
		
int htmlstream_callback_add(HTMLSTREAM *hs, const char *tag, void *func, void *arg) {
	struct HASH_LIST *hl;
	struct htmlstream_callback *cb;

	if (hs->callbacks != NULL && hash_search(hs->callbacks, tag))
		return -1;

	if (hs->callbacks == NULL)
		hs->callbacks = hash_create(HSTREAM_HASH_SIZE);

	cb = (htmlstream_callback*)xmalloc(sizeof(struct htmlstream_callback));
	cb->func = (void (*)(HTMLSTREAM*, htmlstream_node*, void*))func;
	cb->arg = arg;

	hl = hash_insert(hs->callbacks, xstrdup(tag), cb);

	return TRUE;
}
