/*
 * Bickley - a meta data management framework.
 * Copyright © 2008, Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU Lesser General Public License,
 * version 2.1, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <string.h>
#include <regex.h>

#include <glib.h>
#include <gio/gio.h>

#include "video-metadata.h"
#include "metadata-defines.h"

/* Matches name.s2e3... */
#define TV_REGEX "(.*)\\.?[Ss]+([0-9]+)[._ ]*[Ee]+[Pp]*([0-9]+)"

/* Matches name.702... */
#define TV_REGEX2 "(.*)\\.+([0-9]{3,4})"

/* Matches name.2x02... */
#define TV_REGEX3 "(.*)\\.?([0-9]+)x([0-9]+)"

/* Matches name(1998) */
#define MOVIE_REGEX "(.*)\\.?\\(([0-9]{4,4})\\)"
#define MOVIE_REGEX2 "(.*)\\.?\\[([0-9]{4,4})\\]"

typedef struct _VideoRegex VideoRegex;
typedef gboolean (*MatchFunc) (VideoRegex *rx,
                               const char *line,
                               GHashTable *metadata);
struct _VideoRegex {
    char *expression;
    regex_t *rx;
    int matches;
    MatchFunc func;
};

static char *
get_match (const char *line,
           regmatch_t  match)
{
    return g_strndup (line + match.rm_so, match.rm_eo - match.rm_so);
}

/* Blacklist are words that we ignore everything after */
const char *blacklist[] = {
    "720p", "1080p",
    "ws", "WS",
    "proper", "PROPER",
    "repack", "real.repack",
    "hdtv", "HDTV",
    "pdtv", "PDTV",
    "notv", "NOTV",
    "dsr", "DSR",
    "DVDRip",
    "divx", "DIVX",
    "Xvid", "xvid",
    NULL
};

static char *
get_title (const char *line)
{
    int i;

    /* FIXME: write g_strcasestr */
    for (i = 0; blacklist[i]; i++) {
        char *end;

        end = strstr (line, blacklist[i]);
        if (end) {
            return g_strndup (line, end - line);
        }
    }

    return NULL;
}

static gboolean
is_conjunction_or_preposition (const char *s)
{
    int i;
    const char *con_pre[] = {
        "and", "or", "nor", "but", "yet", "so",
        "of", "on", "to", "in", "for", "with",
        NULL
    };

    for (i = 0; con_pre[i]; i++) {
        if (strncasecmp (con_pre[i], s, strlen (con_pre[i])) == 0) {
            return TRUE;
        }
    }

    return FALSE;
}

static char *
capitalise_string (char *string)
{
    char *s, *str;

    str = string;

    g_print ("Converted %s -> ", string);
    /* Convert the first char */
    if (*str != '\0') {
        *str = g_ascii_toupper (*str);
    }

    while ((s = strchr (str, ' '))) {
        if (*(s + 1) != '\0') {

            /* See http://en.wikipedia.org/wiki/Title_case#Headings_and_publication_titles */
            if (is_conjunction_or_preposition (s + 1) == FALSE) {
                *(s + 1) = g_ascii_toupper (*(s + 1));
            }
        }

        str = s + 1;
    }

    g_print ("%s\n", string);
    return string;
}

static char *
sanitise_string (const char *string)
{
    int i;
    static char *blacklisted_prefix[] = {
        "tpz-", NULL
    };

    for (i = 0; blacklisted_prefix[i]; i++) {
        if (g_str_has_prefix (string, blacklisted_prefix[i])) {
            int len = strlen (string);
            int prefix_len = strlen (blacklisted_prefix[i]);
            char *s;

            s = g_strndup (string + prefix_len, len - prefix_len);
            return s;
        }
    }

    return g_strdup (string);
}

static gboolean
tv_regex_parse (VideoRegex *rx,
                const char *line,
                GHashTable *metadata)
{
    regmatch_t *matches;

    /* FIXME: Might be nice to slice this? */
    matches = g_new(regmatch_t, rx->matches);
    if (regexec (rx->rx, line, rx->matches, matches, 0) == 0) {
        char *name, *season, *episode, *title;

        name = get_match (line, matches[1]);
        name = g_strdelimit (name, "._", ' ');

        season = get_match (line, matches[2]);
        episode = get_match (line, matches[3]);
        title = get_title (line + matches[3].rm_eo);

        g_hash_table_insert (metadata, METADATA_VIDEO_SERIES_NAME, name);
        g_hash_table_insert (metadata, METADATA_VIDEO_SEASON, season);
        g_hash_table_insert (metadata, METADATA_VIDEO_EPISODE, episode);
        if (title) {
            char *s;
            title = g_strstrip (g_strdelimit (title, "._", ' '));

            s = g_utf8_strdown (title, -1);
            g_free (title);

            title = capitalise_string (s);

            s = sanitise_string (title);
            g_free (title);

            g_hash_table_insert (metadata, METADATA_VIDEO_TITLE, s);
        }
        g_free (matches);
        return TRUE;
    }

    g_free (matches);
    return FALSE;
}

static gboolean
tv_regex2_parse (VideoRegex *rx,
                 const char *line,
                 GHashTable *metadata)
{
    regmatch_t *matches;

    /* FIXME: Might be nice to slice this? */
    matches = g_new(regmatch_t, rx->matches);
    if (regexec (rx->rx, line, rx->matches, matches, 0) == 0) {
        char *name, *season, *episode, *se, *title, *s;
        int selen;

        name = get_match (line, matches[1]);
        name = g_strdelimit (name, "._", ' ');

        /* se represents either SSEE or SEE */
        se = get_match (line, matches[2]);

        selen = strlen (se);
        episode = g_strndup (se + selen - 2, 2);
        season = g_strndup (se, selen - 2);

        g_free (se);

        title = get_title (line + matches[2].rm_eo);

        g_hash_table_insert (metadata, METADATA_VIDEO_SERIES_NAME, name);
        g_hash_table_insert (metadata, METADATA_VIDEO_SEASON, season);
        g_hash_table_insert (metadata, METADATA_VIDEO_EPISODE, episode);
        if (title) {
            title = g_strstrip (g_strdelimit (title, "._", ' '));

            s = g_utf8_strdown (title, -1);
            g_free (title);

            title = capitalise_string (s);
            s = sanitise_string (title);
            g_free (title);

            g_hash_table_insert (metadata, METADATA_VIDEO_TITLE, s);
        }

        g_free (matches);
        return TRUE;
    }

    g_free (matches);
    return FALSE;
}

static gboolean
movie_regex_parse (VideoRegex *rx,
                   const char *line,
                   GHashTable *metadata)
{
    regmatch_t *matches;

    /* FIXME: Might be nice to slice this? */
    matches = g_new(regmatch_t, rx->matches);
    if (regexec (rx->rx, line, rx->matches, matches, 0) == 0) {
        char *name, *year;

        name = get_match (line, matches[1]);
        name = g_strdelimit (name, "._", ' ');

        year = get_match (line, matches[2]);

        g_hash_table_insert (metadata, METADATA_VIDEO_TITLE, name);
        g_hash_table_insert (metadata, METADATA_VIDEO_YEAR, year);

        g_free (matches);
        return TRUE;
    }

    g_free (matches);
    return FALSE;
}

static VideoRegex rxes[] = {
    {TV_REGEX, NULL, 4, tv_regex_parse},
    {TV_REGEX2, NULL, 3, tv_regex2_parse},
    {TV_REGEX3, NULL, 4, tv_regex_parse},
    {MOVIE_REGEX, NULL, 3, movie_regex_parse},
    {MOVIE_REGEX2, NULL, 3, movie_regex_parse},
    {NULL, NULL, 0, NULL}
};

static char *
remove_extension (const char *base)
{
    char *ext;
    char *name, *s;

    ext = strrchr (base, '.');
    if (ext) {
        name = g_strndup (base, ext - base);
    } else {
        name = g_strdup (base);
    }

    name = g_strstrip (g_strdelimit (name, "._", ' '));
    s = g_utf8_strdown (name, -1);
    g_free (name);

    name = capitalise_string (s);
    s = sanitise_string (name);
    g_free (name);

    return s;
}

static gboolean
parse_filename (const char *base,
                GHashTable *metadata)
{
    int i;
    char *name;

    for (i = 0; rxes[i].expression; i++) {
        if (rxes[i].rx == NULL) {
            regex_t *rx = g_new0 (regex_t, 1);

            if (regcomp (rx, rxes[i].expression, REG_EXTENDED) != 0) {
                g_free (rx);
                continue;
            }

            rxes[i].rx = rx;
        }

        if (rxes[i].func (&rxes[i], base, metadata) == TRUE) {
            return TRUE;
        }
    }

    /* None of the regexes succeeded, just get the title from the
       filename - extension */
    name = remove_extension (base);
    g_print ("Gave up with %s\n", name);
    g_hash_table_insert (metadata, METADATA_VIDEO_TITLE, name);

    return FALSE;
}

gboolean
bkl_task_video_get_metadata (GFile      *file,
                             GFileInfo  *info,
                             const char *mimetype,
                             GHashTable *metadata)
{
    char *base;

    if (!g_str_has_prefix (mimetype, "video/") &&
        !g_strrstr (mimetype, "vnd.rn-realmedia")) {
        return FALSE;
    }

    base = g_file_get_basename (file);
    if (parse_filename (base, metadata)) {
        g_free (base);
        return TRUE;
    }

    return TRUE;
}
