/*
   Name: $RCSfile: mod_index.c,v $
   Author: Alan Moran
   $Date: 2005/11/26 15:04:17 $
   $Revision: 1.7 $
   $Id: mod_index.c,v 1.7 2005/11/26 15:04:17 a_j_moran Exp $

   Legal Notice:

   This program is free software; you can redistribute it and/or
   modify it under the terms of the license contained in the
   COPYING file that comes with this distribution.

 */

/**
   @file

   @brief Functions to support the index module.

   Indexing is concerned with the generation of index.html files in
   directories where there is no conscious linkage with the rest of the website
   (e.g. in directories where material is archived etc.)  This process involves
   parsing each file in the directory in order to extract the title and a
   summary (or digest) of its contents.  This information is used to construct
   a list in which links to the relevent documents appear.  The process in not
   recursive and replaces any existing index file.

   Which directories are subject to indexing is determined by the presence of
   &lt;dir&gt; elements within the &lt;digest&gt; child of the &lt;generators&gt; element of the rapple
   configuration file.  Each &lt;dir&gt; element bears a "title" attribute
   used to furnish a title and header for the generated index file and a value
   which is the absolute path to the directory for which the index file is to
   be generated.

 */

#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include "globals.h"
#include "regex.h"
#include "mime.h"
#include "mod_index.h"

static rpl_str_t rpl_index_base_dir, out_dir;
static int enable_indexing;
static rpl_list *dg_dirs_list, *dg_dirs_titles_list;

/**
   Configure the tidy module.

   @param fns pointer to module interface to be configured.
 */
void
rpl_mod_index_configure(rpl_mod_fns *fns)
{
	fns->init = rpl_mod_index_init;
	fns->process = rpl_mod_index_process;
	fns->cleanup = rpl_mod_index_cleanup;
	rpl_index_base_dir = rpl_str_concat(rpl_cfg_get_ds_basedir(), "/", RPL_DS_TOUT_DIR, RPL_STR_EOC);
	fns->basedir = rpl_index_base_dir;
}

/**
   Search the list of directories configured for indexing to find one that matches
   rel_dir.  Note that directories in the config file are relative to the website
   root.

   @return the title to be used for the index page if a match is found, RPL_STR_NUL otherwise.
 */
static rpl_str_t
rpl_index_do_index(rpl_c_str_t rel_dir)
{
    rpl_str_list_node *dg_dirs, *dg_dirs_titles;

	assert(rel_dir != NULL);

	/* scan directories configured for indexing looking for a match */
    dg_dirs = rpl_list_first (dg_dirs_list);
    dg_dirs_titles = rpl_list_first (dg_dirs_titles_list);
    while (dg_dirs != NULL)
    {
		if(strcmp(rel_dir, dg_dirs->str) == 0)
			break;
        dg_dirs = rpl_list_next (&dg_dirs->node);
        dg_dirs_titles = rpl_list_next (&dg_dirs_titles->node);
	}

	return (dg_dirs != NULL) ? dg_dirs_titles->str : RPL_STR_NUL;
}

/**
   Replaces unadorned &'s with &amp; - these arise during processing when
   &amp; entities in the source are interpreted.  During later phases of
   processing this causes parsers to fail (having encountered an & and
   expecting to see an entity definition follow)

   @param ctnt pointer to string to be parsed.
 */
static void
rpl_index_escape_ampersands(rpl_str_t *ctnt)
{
	rpl_regex_t *rep;
	rpl_str_t sp, tp, rpl_ctnt; 

	assert(*ctnt != NULL);
	
	rep = rpl_regex_create("(&[ \t]|&\r?\n)", RPL_REGEX_FLAG_MULTILINE);
	sp = *ctnt;
	while((rpl_ctnt = rpl_regex_replace(rep, sp, "&amp;")) != NULL)
	{
		tp = sp;
		sp = rpl_ctnt;
	}
	*ctnt = sp;

	rpl_regex_destroy(rep);
}

/**
   Perform index initialization.
 */
rpl_wk_status 
rpl_mod_index_init()
{
	rpl_wk_status status = RPL_WK_OK;

	/* set the base output dir */
	out_dir = rpl_str_concat(rpl_cfg_get_ds_basedir(), "/", RPL_DS_TOUT_DIR, RPL_STR_EOC);

	/* acquire target list of directories from config file */
    dg_dirs_list = rpl_cfg_get_dg_dir_names();
    dg_dirs_titles_list = rpl_cfg_get_dg_dir_titles();
    if (rpl_list_count (dg_dirs_list) > 0)
		enable_indexing = 1;

	return status; 
}

/**
   @param filename name of file to be registered.
   @param st_buf stat of file.
 */
rpl_wk_status 
rpl_mod_index_process(rpl_c_str_t filename, struct stat statbuf)
{
	rpl_wk_status status = RPL_WK_OK;
	rpl_web_asset *wa;
	rpl_str_t msg, rdp, fp, idx_path, dgst_filename, rel_path, out_filename;
	rpl_str_t idx_html, title, list_html;
    struct dirent *dirp;
    DIR *dp;
    struct stat sub_statbuf;

    assert(filename != NULL);

	msg = rpl_str_concat(rpl_message_get("WK_PROCESSING", RPL_EOM), "index ", filename, RPL_STR_EOC);
	rpl_log_info(msg);
	rpl_me_free(msg);

	/* if not configured for indexing then don't even bother processing dirs */
	if(enable_indexing)
	{
		if(S_ISREG(statbuf.st_mode))
		{
			/* processing directories to be indexed rather than files that are to be
			   included in directory indexes negates the need maintain state 
			   information during processing. */
		} else if(S_ISDIR(statbuf.st_mode)) {

			if(rpl_fs_resolve_paths(filename, rpl_index_base_dir, &rdp, &fp))
				return RPL_WK_ERR;
			idx_path = rpl_reg_create_key(rdp, fp);

			/* dirs configured for indexing are stated relative to the website root */
			if((title = rpl_index_do_index(idx_path)) != RPL_STR_NUL)
			{
				/* TODO: contributor is the process owner */
				wa = (rpl_web_asset *)rpl_me_malloc(sizeof(rpl_web_asset));
				rpl_wa_init(wa);

				/* set mime type based on filename extension */
				rpl_wa_set_mime_type("text/html", wa);
				rpl_wa_set_rel_dir(idx_path, wa);
				rpl_wa_set_filename(RPL_INDEX_INDEXNM, wa);
				rpl_wa_set_key(wa);

				/* digest each file in the directory */
				idx_html = rpl_str_concat(RPL_INDEX_HTML_ST_1, title, 
						RPL_INDEX_HTML_ST_2, title, RPL_INDEX_HTML_ST_3, RPL_STR_EOC);

				/* read and digest directory contents */
				dp=opendir(filename);
				while ((dirp = readdir (dp)) != NULL) {
					/* ignore the usual suspects (incl. the generated index if it already exists!) */
					if (strcmp (dirp->d_name, ".") == 0 || 
							strcmp (dirp->d_name, "..") == 0 || 
							strcmp(dirp->d_name, RPL_INDEX_INDEXNM) == 0)
						continue;
					dgst_filename = rpl_str_concat(filename, "/", dirp->d_name, RPL_STR_EOC);
					/* perform digest parsing (but check first that we are not dealing with another directory */
					if(stat(dgst_filename,&sub_statbuf) == -1) {
						msg = rpl_message_get("DIGEST_INDEX_FAILED", filename, " (", strerror(errno), ")", RPL_EOM);
						rpl_log_error(msg);
						rpl_me_free(dgst_filename);
						return RPL_WK_ERR;
					}
					/* only attempt to digest assets that are regular transformable files */
					if(S_ISREG(sub_statbuf.st_mode) && rpl_mime_is_transformable(rpl_mime_get_type(dgst_filename))) {
						rpl_digest_parse(dgst_filename);
						rel_path = rpl_str_concat(idx_path, "/", dirp->d_name, RPL_STR_EOC);
						list_html = rpl_str_concat("<li><b>",
								rpl_html_create_hyperlink(rel_path, rpl_digest_get_title()),
								"</b><br /><i>",
								rpl_digest_get_summary(), "...",
								rpl_html_create_more_hyperlink(rel_path),
								"</i></li>", 
								RPL_STR_EOC);
						idx_html = rpl_str_concat(idx_html, list_html, RPL_STR_EOC);
						rpl_me_free(list_html);
						rpl_me_free(rel_path);
					}
					rpl_me_free(dgst_filename);
				}

				idx_html = rpl_str_concat(idx_html, RPL_INDEX_HTML_END, RPL_STR_EOC);

				/* output generated index file */
				out_filename = rpl_str_concat(out_dir, "/", idx_path, "/", RPL_INDEX_INDEXNM, RPL_STR_EOC);
				rpl_index_escape_ampersands(&idx_html);
				rpl_fs_str2f(idx_html, out_filename);

				/* tidy up */
				if (closedir (dp) < 0) {
					msg = rpl_message_get("DIGEST_DIR_CLOSE_FAILED", filename, RPL_EOM);
					fprintf(stderr, rpl_str_concat("Fatal: ", msg, RPL_STR_EOC));
					rpl_log_fatal(msg);
				}

				/* register newly generated asset */
				rpl_reg_insert(wa);

				rpl_me_free(idx_html);
				rpl_me_free(out_filename);
			}
			
			if(strlen(rdp) > 0)
				rpl_me_free(rdp);
			if(strlen(fp) > 0)
				rpl_me_free(fp);
			rpl_me_free(idx_path);
		}
	}

	return status; 
}

/**
   Release resources held during processing. 
 */
rpl_wk_status 
rpl_mod_index_cleanup()
{
	rpl_wk_status status = RPL_WK_OK;

	rpl_me_free(out_dir);
	rpl_me_free(rpl_index_base_dir);

	return status;
}

