#!/usr/bin/python


import logging
import os
import re
import sys
import time
import urllib
import UserDict

DISTRO = "sid"

SUBDIRS_PASSED = ["pass"]
SUBDIRS_FAILED = ["fail", "bugged"]
SUBDIRS_ALL = SUBDIRS_PASSED + SUBDIRS_FAILED + ["new"]

MAX_FAILURES = 100

PACKAGES_URL = "http://liw.iki.fi/debian/dists/" + DISTRO + \
               "/main/binary-i386/Packages"


failed_html_prefix = """
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html 
     PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
    <title>Piuparts reports: failed packages</title>
    <link rel="stylesheet" href="piuparts.css" type="text/css"/>
</head>
<body>
<div class="intro">
    <h1>Piuparts reports</h1>
</div>
<div class="main">
<p>This page contains log files from <a
href="http://packages.debian.org/piuparts/">piuparts</a>,
a package installation, upgrading, and removal testing suite. 
Packages in Debian's <strong>testing</strong> distribution are tested
with piuparts, and log files of the failures are listed below.</p>

<p>Piuparts is run by Lars Wirzenius (<a
href="mailto:liw@iki.fi">liw@iki.fi</a>).</p>

<dl class="packagelist">
"""


failed_html_suffix = """
</dl>
</div>
</body>
</html>
"""


def setup_logging(log_level, log_file_name):
    logger = logging.getLogger()
    logger.setLevel(log_level)

    handler = logging.StreamHandler(sys.stdout)
    logger.addHandler(handler)
    
    if log_file_name:
        handler = logging.FileHandler(log_file_name)
        handler.setFormatter(formatter)
        logger.addHandler(handler)


class DependencySyntaxError(Exception):

    """Syntax error in package dependency declaration"""

    def __init__(self, msg, cursor):
        self._msg = "Error: %s: %s (text: '%s')" % \
                    (cursor.get_position(), msg, cursor.get_text(10))
        
    def __str__(self):
        return self._msg
    
    def __repr__(self):
        return self._msg


class Cursor:

    """Store an input string and a movable location in it"""
    
    def __init__(self, input):
        self._input = input
        self._len = len(self._input)
        self._pos = 0

    def skip_whitespace(self):
        while self._pos < self._len and self.get_char().isspace():
            self.next()

    def at_end(self):
        """Are we at the end of the input?"""
        self.skip_whitespace()
        return self._pos >= self._len
        
    def next(self):
        """Move to the next character"""
        if self._pos < self._len:
            self._pos += 1

    def get_char(self):
        """Return current character, None if at end"""
        if self._pos >= self._len:
            return None
        else:
            return self._input[self._pos]

    def get_text(self, length):
        """Return up to length characters from the current position"""
        if self._pos >= self._len:
            return ""
        else:
            return self._input[self._pos:self._pos + length]

    def match(self, regexp):
        """Match a regular expression against the current position
        
        The cursor is advanced by the length of the match, if any.
        
        """
        m = regexp.match(self._input[self._pos:])
        if m:
            self._pos += len(m.group())
        return m

    def match_literal(self, literal):
        """Match a literal string against the current position.
        
        Return True and move position if there is a match, else return
        False.
        
        """
        if self.get_text(len(literal)) == literal:
            self._pos += len(literal)
            return True
        else:
            return False

    def get_position(self):
        """Return current position, as string"""
        return "col %d" % self._pos


class SimpleDependency:

    """Express simple dependency towards another package"""

    def __init__(self, name, operator, version, arch):
        self.name = name
        self.operator = operator
        self.version = version
        self.arch = arch

    def __repr__(self):
        return "<DEP: %s, %s, %s, %s>" % (self.name, self.operator, 
                                          self.version, self.arch)


class DependencyParser:

    """Parse Debian package relationship strings
    
    Debian packages have a rich language for expressing their
    relationships. See the Debian Policy Manual, chapter 7 ("Declaring
    relationships between packages"). This Python module implements a
    parser for strings expressing such relationships.
    
    Syntax of dependency fields (Pre-Depends, Depends, Recommends,
    Suggests, Conflicts, Provides, Replaces, Enhances, Build-Depends,
    Build-Depends-Indep, Build-Conflicts, Build-Conflicts-Indep), in a
    BNF-like form:
    
        depends-field ::= EMPTY | dependency ("," dependency)*
        dependency ::= possible-dependency ("|" possible-dependency)*
        possible-dependency ::= package-name version-dependency? 
                                arch-restriction?
        version-dependency ::= "(" relative-operator version-number ")"
        relative-operator ::= "<<" | "<=" | "=" | ">=" | ">>"
        version-number ::= epoch? upstream-version debian-revision?
        arch-restriction ::= "[" arch-name arch-name* "]" |
                              "[" "!" arch-name ("!" arch-name)* "]"
        package-name ::= alphanumeric name-char name-char*
        epoch ::= integer ":"
        upstream-version ::= alphanumeric version-char*
            -- policy says "should start with digit", but not all packages do
        debian-revision ::= "-" debian-version-char debian-version-char*
        arch-name ::= alphanumeric alphanumeric*
        EMPTY ::= ""
        integer ::= digit digit*
        alphanumeric ::= 
            "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
            "u" | "v" | "w" | "x" | "y" | "z" | digit
        digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
        name-char ::= alphanumeric | "+" | "-" | "."
        version-char ::= alphanumeric | "." | "+" | "-" | ":"
        debian-version-char ::= alphanumeric | "." | "+"
    
    White space can occur between any tokens except inside package-name,
    version-number, or arch-name. Some of the headers restrict the syntax
    somewhat, e.g., Provides does not allow version-dependency, but this is
    not included in the syntax for simplicity. 
    
    """

    def __init__(self, cursor):
        self._cursor = cursor
        self._list = self._parse_dependencies()

    def get_dependencies(self):
        return self._list

    def _parse_dependencies(self):
        list = []
        dep = self._parse_dependency()
        while dep:
            list.append(dep)
            self._cursor.skip_whitespace()
            if self._cursor.at_end():
                break
            if not self._cursor.match_literal(","):
                raise DependencySyntaxError("Expected comma", self._cursor)
            dep = self._parse_dependency()
        return list

    def _parse_dependency(self):
        list = []
        dep = self._parse_possible_dependency()
        while dep:
            list.append(dep)
            self._cursor.skip_whitespace()
            if self._cursor.at_end() or not self._cursor.match_literal("|"):
                break
            dep = self._parse_possible_dependency()
        return list
        
    def _parse_possible_dependency(self):
        name = self._parse_package_name()
        if not name:
            return None
        (op, version) = self._parse_version_dependency()
        arch = self._parse_arch_restriction()
        return SimpleDependency(name, op, version, arch)

    _name_pat = re.compile(r"[a-z0-9][a-z0-9+.-]+")

    def _parse_package_name(self):
        self._cursor.skip_whitespace()
        if self._cursor.at_end():
            return None
        m = self._cursor.match(self._name_pat)
        if not m:
            raise DependencySyntaxError("Expected a package name",
                                        self._cursor)
        return m.group()

    _op_pat = re.compile(r"(<<|<=|=|>=|>>)")
    _version_pat = re.compile(r"(?P<epoch>\d+:)?" +
                              r"(?P<upstream>[a-zA-Z0-9][a-zA-Z0-9.+:-]*)" +
                              r"(?P<debian>-[a-zA-Z0-9.+]+)?")

    def _parse_version_dependency(self):
        self._cursor.skip_whitespace()
        if self._cursor.get_char() == "(":
            self._cursor.next()

            self._cursor.skip_whitespace()
            opm = self._cursor.match(self._op_pat)
            if not opm:
                raise DependencySyntaxError("Expected a version relation " +
                                            "operator", self._cursor)

            self._cursor.skip_whitespace()
            verm = self._cursor.match(self._version_pat)
            if not verm:
                raise DependencySyntaxError("Expected a version number",
                                            self._cursor)

            self._cursor.skip_whitespace()
            if self._cursor.get_char() != ")":
                raise DependencySyntaxError("Expected ')'", self._cursor)
            self._cursor.next()

            return opm.group(), verm.group()
        else:
            return None, None

    _arch_pat = re.compile(r"!?[a-zA-Z0-9-]+")

    def _parse_arch_restriction(self):
        self._cursor.skip_whitespace()
        if self._cursor.get_char() == "[":
            self._cursor.next()
            
            list = []
            while True:
                self._cursor.skip_whitespace()
                if self._cursor.get_char() == "]":
                    self._cursor.next()
                    break
                m = self._cursor.match(self._arch_pat)
                if not m:
                    raise DependencySyntaxError("Expected architecture name",
                                                self._cursor)
                list.append(m.group())
                    
            return list
        else:
            return None


def rfc822_like_header_parse(input):
    headers = []
    while 1:
        line = input.readline()
        if not line or line in ["\r\n", "\n"]:
            break
        if headers and line and line[0].isspace():
            headers[-1] = headers[-1] + line
        else:
            headers.append(line)
    return headers


class Package(UserDict.UserDict):

    def __init__(self, headers):
        UserDict.UserDict.__init__(self)
        self.headers = headers
        for header in headers:
            name, value = header.split(":", 1)
            self[name.strip()] = value.strip()
        self._parsed_deps = {}
        
    def dump(self, output_file):
        output_file.write("".join(self.headers))

    def parse_dependencies(self, header_name):
        if header_name in self._parsed_deps:
            depends = self._parsed_deps[header_name]
        else:
            parser = DependencyParser(Cursor(self[header_name]))
            depends = parser.get_dependencies()
            depends = [alternatives[0].name for alternatives in depends]
            self._parsed_deps[header_name] = depends
        return depends

    def dependencies(self):
        list = []
        for header in ["Depends", "Pre-Depends"]:
            if header in self:
                list += self.parse_dependencies(header)
        return list

    def is_testable(self):
        """Are we at all? Essential/required/important ones aren't."""
        return not (self.get("Essential", "") or
                    self.get("Priority", "") in ["required", "important"])


def read_packages_file(input):
    packages = []
    while 1:
        headers = rfc822_like_header_parse(input)
        if not headers:
            break
        packages.append(Package(headers))
    return packages


def log_name(package):
    return "%(Package)s_%(Version)s.log" % package


def log_exists(basename, subdirs):
    for subdir in subdirs:
        if os.path.exists(os.path.join(subdir, basename)):
            return True
    return False


def log_for_any_version_exists(package_name, subdirs):
    for subdir in subdirs:
        for basename in os.listdir(subdir):
            parts = basename.split("_", 1)
            if len(parts) == 2 and parts[0] == package_name:
                return True
    return False


def piuparts_command(package):
    return ("python piuparts.py " +
            "-d " + DISTRO + " " +
            "-m http://liw.iki.fi/debian "+
            "-b " + DISTRO  + ".tar.gz " +
            "-a " +
            package["Package"])
    

def get_recursive_dependencies(packages, package):
    deps = []
    more = package.dependencies()
    while more:
        dep = more[0]
        more = more[1:]
        if dep not in deps:
            deps.append(dep)
            if dep in packages:
                more += packages[dep].dependencies()

    # Break circular dependencies
    if package["Package"] in deps:
        deps.remove(package["Package"])

    return deps


def test_package(packages, package):
    if not package.is_testable():
        logging.debug("Skipping essential, required, or important package: " +
                      package["Package"])
        return "ignoring-important-required-essential"

    output_name = log_name(package)
    if log_exists(output_name, SUBDIRS_PASSED):
        logging.debug("Package already tested: %s" % package["Package"])
        return "already-tested"
    if log_for_any_version_exists(package["Package"], SUBDIRS_FAILED):
        logging.debug("Package already failed: %s" % package["Package"])
        return "already-failed"

    for dep in get_recursive_dependencies(packages, package):
        if dep not in packages:
            logging.debug("Unknown dependency: %s for: %s" % 
                          (dep, package["Package"]))
            return "unknown-dependency"
        if log_exists(log_name(packages[dep]), SUBDIRS_FAILED):
            logging.debug("Dependency failed: %s for: %s" % 
                          (dep, package["Package"]))
            return "dependency-failed"
        if packages[dep].is_testable() and \
           not log_exists(log_name(packages[dep]), SUBDIRS_PASSED):
            logging.debug("Dependency is untested: %s for: %s" % 
                          (dep, package["Package"]))
            if package["Package"] == "perl-modules":
                logging.debug("XXX " + log_name(packages[dep]))
            return "dependency-untested"

    logging.info("Opening log file %s" % output_name)
    new_name = os.path.join("new", output_name)
    output = file(new_name, "w")
    output.write(time.strftime("Start: %Y-%m-%d %H:%M:%S UTC\n", 
                               time.gmtime()))
    output.write("\n")
    package.dump(output)
    output.write("\n")
    
    command = piuparts_command(package)
    output.write("Executing: %s\n" % command)
    f = os.popen("{ %s; } 2>&1" % command, "r")
    for line in f:
        output.write(line)
    status = f.close()
    if status is None:
        status = 0

    output.write("\n")
    output.write(time.strftime("End: %Y-%m-%d %H:%M:%S UTC\n", 
                               time.gmtime()))
    output.close()
    if not os.WIFEXITED(status) or os.WEXITSTATUS(status) != 0:
        subdir = "fail"
    else:
        subdir = "pass"
    os.rename(new_name, os.path.join(subdir, output_name))
    logging.debug("Done with %s" % output_name)
    return subdir


def test_packages(max):
    logging.debug("Fetching %s" % PACKAGES_URL)
    packages_file = urllib.urlopen(PACKAGES_URL)
    packages = {}
    for package in read_packages_file(packages_file):
        if "Version" in package:
            packages[package["Package"]] = package
    packages_file.close()

    for name in ["new", "pass", "fail", "bugged"]:
        if not os.path.exists(name):
            os.mkdir(name)

    unprocessed = packages.keys()
    unprocessed.sort()

    fail_count = 0

    while 1:
        logging.info("----------------------------")
        logging.info("Iteration starts, %d unprocessed packages" %
                        len(unprocessed))
        count_by_result = {}
        still_not_done = []
        for name in unprocessed:
            result = test_package(packages, packages[name])
            count_by_result[result] = count_by_result.get(result, 0) + 1
            if result not in ["pass", "fail"]:
                still_not_done.append(name)
            elif result == "fail":
		logging.info("  FAILED!")
                fail_count += 1
                if fail_count >= max:
                    break
        logging.info("%d done this iteration" %
                      (len(unprocessed) - len(still_not_done)))
        logging.info("Results by category:")
        results = count_by_result.keys()
        results.sort()
        for result in results:
            logging.info("  %s: %d" % (result, count_by_result[result]))

        if still_not_done == [] or \
           still_not_done == unprocessed or \
           fail_count >= max:
            break
        unprocessed = still_not_done

    if len(unprocessed) == 0:
        logging.info("Phew, finished at last.")
    else:
        logging.info("Quitting, but %d remaining" % len(unprocessed))


def parse_log(f):
    # This relies heavily on the format of the log files.

    while 1:
        line = f.readline()
        if not line.strip():
            break
            
    return Package(rfc822_like_header_parse(f))


def listdir(dir):
    return [os.path.join(dir, base) for base in os.listdir(dir)]


def collect_packages(subdir):
    logs = listdir(subdir)
    packages = []
    for log in logs:
        f = file(log, "r")
        p = parse_log(f)
        p[" log-file-name"] = log
        packages.append(p)
        f.close()
    return packages


def make_package_list(output, packages):
    by_maint = {}
    for p in packages:
        m = p["Maintainer"]
        by_maint[m] = by_maint.get(m, []) + [p]

    maintainers = by_maint.keys()
    maintainers.sort()
    for m in maintainers:
        by_package = {}
        for p in by_maint[m]:
            name = p["Package"]
            by_package[name] = by_package.get(name, []) + [p]
        packages = by_package.keys()
        packages.sort()
        output.write("<dt>%s</dt>\n" % m)
        for name in packages:
            versions = by_package[name]
            versions = [(p["Version"], p) for p in versions]
            versions.sort()
            versions = [x[1] for x in versions]
            output.write("<dd>%s:" % name)
            for p in versions:
                output.write(" <a href='%s'>%s</a>" % 
                             (p[" log-file-name"], p["Version"]))
            output.write("</dd>\n\n")


def make_html_page(output_basename, prefix, suffix, subdir):
    packages = collect_packages(subdir)
    f = file(output_basename + ".html", "w")
    f.write(prefix)
    make_package_list(f, packages)
    f.write(suffix)


def make_html_pages():
    make_html_page("failed", failed_html_prefix, failed_html_suffix, "fail")


def main():
    setup_logging(logging.DEBUG, None)
    logging.info("run-piuparts starting up.")
    test_packages(MAX_FAILURES)
    if False:
        make_html_pages()


if __name__ == "__main__":
    main()
