/*
	SRG - Squid Report Generator
	Copyright 2005 University of Waikato

	This file is part of SRG.

	SRG is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation; either version 2 of the License, or
	(at your option) any later version.

	SRG is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with SRG; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/

#include "srg.h"

#define STATE_INIT 0
#define STATE_COLON 1
#define STATE_PROTOCOL 2
#define STATE_PASSPORT 3
#define STATE_USERSITE 4
#define STATE_SITE 5
#define STATE_PORT 6
#define STATE_END 7

/* Parses the specified URL and fills the structure with the different parts
 * return 0 on success or non-zero otherwise */
int parseURL(const char*URL, url_request *req) {

	char *start = strdup(URL);
	char *tmp = start;
	char *t1 = NULL;
	char *t2 = NULL;
	unsigned int state = STATE_INIT;
	
	/* Initialise the return structure */
	req->protocol = NULL;
	req->user = NULL;
	req->password = NULL;
	req->site = NULL;
	req->port = NULL;
	req->location = NULL;
	
	while (*tmp != '\0') {
		switch (state) {
		case STATE_INIT:
			if (*tmp == ':') {
				/* Cannot Start with : */
				if (!t1) {
					free(start);
					return -1;
				}
				*tmp = '\0';
				t1 = start;
				state = STATE_COLON;
			} else if (*tmp == '/') {
				/* Cannot Start with / */
				if (!t1) {
					free(start);
					return -1;
				}
				*tmp = '\0';
				req->site = strdup(start);
				t1 = NULL;
				/* Get the location also */
				*tmp = '/';
				req->location = strdup(tmp);
				state = STATE_END;
			} else {
				/* Temporary pointer */
				t1 = tmp;			
			}
			break;
		case STATE_COLON:
			if (*tmp == '/') {
				state = STATE_PROTOCOL;
			} else if(*tmp == '@') {
				/* user:@ (empty password!) */
				*tmp = '\0';
				req->user = strdup(start);
				req->password = strdup("\0");
				t1 = NULL;
				state = STATE_SITE;
			} else {
				/* No Protocol, could be password or port */
				t2 = tmp;
				state = STATE_PASSPORT;
			} 
			break;
		case STATE_PROTOCOL:
			if (*tmp == '/') {
				/* Valid protocol found */
				req->protocol = strdup(start);
				t1 = NULL;
				/* Could not have a user or a site */
				state = STATE_USERSITE;
			} else {
				/* :/ is not a valid string in a URL */
				free(start);
				return -1;
			}
			break;
		case STATE_USERSITE:
			if (*tmp == ':') {
				/* Cannot Start with : */
				if (!t1) {
					free(start);
					return -1;
				}
				*tmp = '\0';
				state = STATE_PASSPORT;		
			} else if (*tmp == '/') {
				if (!t1) {
					/* /// is not valid in a URL */
					free(start);
					return -1;
				}
				/* No User / Pass, No Port */
				*tmp = '\0';
				req->site = strdup(t1);
				/* Get the location also */
				*tmp = '/';
				req->location = strdup(tmp);
				state = STATE_END;
			}
			/* Set starting pointer */
			if (!t1)
				t1 = tmp;
			break;
		case STATE_PASSPORT:
			if (*tmp == '@') {
				/* We have user/pass */
				*tmp ='\0';
				req->user = strdup(t1);
				if (t2)
					req->password = strdup(t2);
				else
					req->password = strdup("\0");
				t1 = NULL;
				t2 = NULL;
				state = STATE_SITE;
			} else if (*tmp == '/') {
				/* We have site / port */
				if (!t2) {
					/* :/ is not valid in a URL */
					free(start);
					return -1;
				}
				*tmp = '\0';
				req->site = strdup(t1);
				req->port = strdup(t2);
				t1 = NULL;
				t2 = NULL;
				/* Get the location also */
				*tmp = '/';
				req->location = strdup(tmp);
				state = STATE_END;
			}
			/* Set starting pointer */
			if (!t2) 
				t2 = tmp;
			break;
		case STATE_SITE:
			if (*tmp == ':') {
				/* Site / Port found */
				if (!t1) {
					/* Cannot have 0 length site */
					free(start);
					return -1;
				}
				*tmp = '\0';
				req->site = strdup(t1);
				t1 = NULL;
				state = STATE_PORT;
			} else if (*tmp == '/') {
				/* Site / Location found */
				if (!t1) {
					/* Cannot have 0 length site */
					free(start);
					return -1;
				}
				*tmp = '\0';
				req->site = strdup(t1);
				t1 = NULL;
				/* Get the location also */
				*tmp = '/';
				req->location = strdup(tmp);				
				state = STATE_END;
			} else {
				/* Set starting pointer */
				if (!t1)
					t1 = tmp;
			}
			break;
		case STATE_PORT:
			if (*tmp == '/') {
				/* Port / Location found */
				if (!t1) {
					/* Cannot have 0 length port */
					free(start);
					return -1;
				}
				*tmp = '\0';
				req->port = strdup(t1);
				t1 = NULL;
				/* And get the location also */
				*tmp = '/';
				req->location = strdup(tmp);				
				state = STATE_END;
			}
			/* Set starting pointer */
			if (!t1)
				t1 = tmp;
			break;
		case STATE_END:
			break;
			break;
		}
		/* Move to next character */
		tmp++;
	}

	/* Ending cases */
	switch (state) {
	case STATE_INIT:
		/* No : found!, assume form of 'www.google.com', 
                 * assign site only 
                 */
		req->site = strdup(start);
		break;
	case STATE_COLON:
		/* INVALID: Cannot end URL with a : */
		free(start);
		return -1;
		break;
	case STATE_PROTOCOL:
		/* INVALID: Cannot end URL with :/ */
		free(start);
		return -1;
		break;
	case STATE_PASSPORT:
		/* We have a URL of the form 'www.google.com:80' */
		req->site = strdup(t1);
		req->port = strdup(t2);
		break;
	case STATE_USERSITE:
	case STATE_SITE:
		/* Assume that we have found a site */
		if (!t1) {
			/* Must be > 0 length */
			free(start);
			return -1;
		}
		req->site = strdup(t1);
		break;
	case STATE_PORT:
		/* Got port but no location, site is already set */
		if (!t1) {
			/* Cannot have 0 length port */
			free(start);
			return -1;
		}
		req->port = strdup(t1);
		
		break;
	case STATE_END:
		/* All is Well :) */
		break;
	}
	
	/* Always return a valid string for the location */
	if (!req->location)
		req->location = strdup("\0");

	free(start);

	return 0;
}

char *asprintURL(url_request *req) {
	
	char *buffer=NULL;
	
	/* Check that the parsed string matches the original */
	if (req->protocol != NULL) {
		if (req->user != NULL && req->password != NULL) {
			if (req->port != NULL) {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s://%s:%s@%s:%s%s", 
						req->protocol, req->user, 
						req->password, req->site, 
						req->port, req->location);
				} else {
					asprintf(&buffer, 
						"%s://%s:%s@%s:%s", 
						req->protocol, req->user, 
						req->password, req->site, 
						req->port);
				}
			} else {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s://%s:%s@%s%s", 
						req->protocol, req->user, 
						req->password, req->site, 
						req->location);
				} else {
					asprintf(&buffer, 
						"%s://%s:%s@%s", 
						req->protocol, req->user, 
						req->password, req->site);
				}
			}
		} else {
			if (req->port != NULL) {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s://%s:%s%s", 
						req->protocol, req->site, 
						req->port, req->location);
				} else {
					asprintf(&buffer, 
						"%s://%s:%s", 
						req->protocol, req->site, 
						req->port);
				}
			} else {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s://%s%s", 
						req->protocol, req->site, 
						req->location);
				} else {
					asprintf(&buffer, 
						"%s://%s", req->protocol, 
						req->site);
				}
			}
		}
	} else {
		if (req->user != NULL && req->password != NULL) {
			if (req->port != NULL) {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s:%s@%s:%s%s", req->user, 
						req->password, req->site, 
						req->port, req->location);
				} else {
					asprintf(&buffer, 
						"%s:%s@%s:%s", req->user, 
						req->password, req->site, 
						req->port);
				}
			} else {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s:%s@%s%s", req->user, 
						req->password, req->site, 
						req->location);
				} else {
					asprintf(&buffer, 
						"%s:%s@%s", req->user, 
						req->password, req->site);
				}
			}
		} else {
			if (req->port != NULL) {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s:%s%s", req->site, 
						req->port, req->location);
				} else {
					asprintf(&buffer, 
						"%s:%s", req->site, 
						req->port);
				}
			} else {
				if (req->location != NULL) {
					asprintf(&buffer, 
						"%s%s", req->site, 
						req->location);
				} else {
					asprintf(&buffer, 
						"%s", req->site);
				}
			}
		}		
	}

	return buffer;
	
}

void freeURL(url_request *req) {

	/* Free any allocated strings */
	if (req->protocol)
		free(req->protocol);
	if (req->user)
		free(req->user);
	if (req->password)
		free(req->password);
	if (req->site)
		free(req->site);
	if (req->port)
		free(req->port);
	if (req->location)
		free(req->location);

}

#ifdef TEST
#include <assert.h>
void testURL(const char *URL, bool is_invalid);

int main(int argc, char **argv) {

	testURL("www.google.com", false);
	testURL("www.google.com:80", false);
	testURL("http://www.google.com/", false); 
	testURL("http://www.google.com/index.html", false);	
	testURL("http://www.google.com:80/", false);
	testURL("http://www.google.com:80/index.html", false);
	testURL("www.google.com:80/index.html", false);
	testURL("www.google.com/index.html", false);
	testURL("www.google.com/", false);
	testURL("www.google.com:80/", false);
	testURL("matt:@www.google.com/index.html", false);
	testURL("matt:matt@www.google.com:80/", false);
	testURL(":matt@www.google.com:80/", true);
	testURL("ftp://www.google.com/", false);

	exit(0);
}

/* Tests that the specified URL is correctly parsed */
void testURL(const char *URL, bool is_invalid) {

	url_request result;
	char *buffer=NULL;

	if (parseURL(URL, &result)!=0) {	
		/* Invalid URL, cannot parse */
		assert(is_invalid && true);
		return;
	}
	
	/* Check that the parsed string matches the original */
	buffer = asprintURL(&result);
	
	int rv = strcasecmp(buffer, URL);
	assert(rv==0);
	
	if (buffer)
		free(buffer);
	freeURL(&result);

	return;

}

#endif

