package Lire::ReportParser;

use strict;

use vars qw( $SELF $LRML_NS %LRML_ELEMENTS %LRML_PCDATA_ELEMENTS
             %DBK_ELEMENTS %DBK_PCDATA_ELEMENTS );

use Carp;
use XML::Parser;

use Lire::Logger;
use Lire::ReportSpec;
use Lire::Report::TableInfo;

BEGIN {
    $LRML_NS = "http://www.logreport.org/LRML/";

    $SELF = undef;
}

##
## The whole XML::Parser machinery. We are using a global $SELF to
## hold the current instance because of the limitation in the
## XML::Parser interface which can't associate instance data and
## always act on the "global" level. This is needed to transform this
## static scoping in a dynamic object-oriented interface. Altough only
## one object can parse at a time.
##
## $SELF will be set when parse() or parsefile() is called. This is
## also where the XML::Parser handlers are set.
##
sub Init {
    my ($expat) = @_;

    # Cache namespaced elements
    %LRML_ELEMENTS = map { $_ => $expat->generate_ns_name( $_, $LRML_NS ) }
        qw( report section subreport missing-subreport
            title description hostname date timespan
            table entry name value group image file
            notes note
            table-info group-info column-info group-summary
          );

    %LRML_PCDATA_ELEMENTS = map { $_ => 1}
        qw( title hostname date timespan name value file );

    # Initialize DocBook processing
    %DBK_ELEMENTS = map { $_ => 1 } @{$SELF->known_dbk_elements() };
    %DBK_PCDATA_ELEMENTS = map { $_ => 1 } @{$SELF->known_dbk_pcdata_elements() };

    $SELF->{'lrp_expat'} = $expat;
    $SELF->parse_start();
}

sub Final {
    my ($expat) = @_;

    $SELF->parse_end();
}

sub Start {
    my ( $expat, $name, %attr ) = @_;

    return if $SELF->{'lrp_skipping'};
    my $ns = $expat->namespace($name);
    $ns ||= ""; # Remove warning
    if ( $ns eq $LRML_NS ) {
        # This is one of our element
        $SELF->error( "unknown element: $name" )
          unless exists $LRML_ELEMENTS{$name};

        {
            no strict 'refs';

            my $sub = $name . "_start";
            $sub =~ s/-/_/g;    # Hyphen aren't allowed in element name

            eval {
                return if $SELF->element_start( @_[1 .. $#_ ] );
                $SELF->$sub( %attr );
            };
            $SELF->error( $@ ) if $@;
        };
    } else {
        # If we are in lire:description, this is probably a
        # DocBook element
        if ( $expat->within_element( $LRML_ELEMENTS{'description'} ) ||
             $expat->within_element( $LRML_ELEMENTS{'note'} ) )
        {
            return if eval { $SELF->element_start( @_[1 .. $#_ ] ) };
            $SELF->error( $@ ) if $@;

            if ( exists $DBK_ELEMENTS{$name} ) {
                no strict 'refs';

                my $sub = "dbk_" . $name . "_start";
                $sub =~ s/-/_/g;        # Hyphen aren't allowed in element name

                eval {
                    $SELF->$sub( %attr );
                };
                $SELF->error( $@ ) if $@;
            } else {
                # Output warning only if there are known docbook elements
                $SELF->warning( "unknown DobBook element: $name" )
                  if (%DBK_ELEMENTS);
            }
        } else {
            $SELF->error( "unknown element: $name" );
        }
    }
}

sub End {
    my ( $expat, $name ) = @_;

    if ( $SELF->{'lrp_skipping'} ) {
        $SELF->{'lrp_skipping'} = 0
          if $expat->depth == $SELF->{'lrp_skip_level'};
        return;
    }

    my $ns = $expat->namespace($name);
    $ns ||= ""; # Remove warning
    if ( $ns eq $LRML_NS ) {
        # This is one of our element
        $SELF->error( "unknown element: $name" )
          unless exists $LRML_ELEMENTS{$name};

        {
            no strict 'refs';

            my $sub = $name . "_end";
            $sub =~ s/-/_/g;    # Hyphen aren't allowed in function names

            eval {
                return if $SELF->element_end( $name );
                $SELF->$sub();
            };
            $SELF->error( $@ ) if $@;
        }
    } else {
        # If we are in lire:description, this is probably a
        # DocBook element
        if ( $expat->within_element( $LRML_ELEMENTS{'description'} ) ||
             $expat->within_element( $LRML_ELEMENTS{'note'} ) )
        {
            return if eval { $SELF->element_end( $name ) };
            $SELF->error( $@ ) if $@;
            if ( exists $DBK_ELEMENTS{$name} ) {
                no strict 'refs';

                my $sub = "dbk_" . $name . "_end";
                $sub =~ s/-/_/g; # Hyphens aren't allowed in function names

                eval {
                    $SELF->$sub();
                };
                $SELF->error( $@ ) if $@;
            } else {
                # Output warning only if there are known docbook elements
                $SELF->warning( "unknown DocBook element: $name" )
                  if (%DBK_ELEMENTS);
            }
        } else {
            $SELF->error( "unknown element: $name" );
        }
    }
}

sub Char {
    my ( $expat, $str ) = @_;

    return if $SELF->{'lrp_skipping'};

    my $name = $expat->current_element;
    my $ns   = $expat->namespace($name);
    $ns ||= ""; # Remove warning
    if ( $ns eq $LRML_NS ) {
        # This is one of our element
        $SELF->error( "character in unknown element: $name" )
          unless exists $LRML_ELEMENTS{$name};

        if ( $LRML_PCDATA_ELEMENTS{$name} ) {
            no strict 'refs';

            my $sub = $name . "_char";
            $sub =~ s/-/_/g;    # Hyphen aren't allowed in element name

            eval {
                return if $SELF->pcdata( $str );
                $SELF->$sub( $str );
            };
            $SELF->error( $@ ) if $@;
        } elsif ( $str !~ /^\s*$/ ) {
            $SELF->error( "encountered non-whitespace character data in non-#PCDATA element: $str" );
        } else {
            eval { $SELF->ignorable_ws( $str ) };
            $SELF->error( $@ ) if $@;
        }
    } else {
        # If we are in lire:description, this is probably a
        # DocBook element
        if ( $expat->within_element( $LRML_ELEMENTS{'description'} ) ||
             $expat->within_element( $LRML_ELEMENTS{'note'} ))
        {
            if ( ! %DBK_PCDATA_ELEMENTS ) {
                # Nothing is known about DocBook
                eval { $SELF->pcdata( $str ) };
                $SELF->error( $@ ) if $@;
                return;
            }

            if ( exists $DBK_PCDATA_ELEMENTS{$name} ) {
                no strict 'refs';

                my $sub = "dbk_" . $name . "_char";
                $sub =~ s/-/_/g;        # Hyphen aren't allowed in element name

                eval {
                    $SELF->$sub( $str );
                };
                $SELF->error( $@ ) if $@;
            } else {
                eval { $SELF->ignorable_ws( $str ) };
                $SELF->error( $@ ) if $@;
            }
        }
    }
}

=pod

=head1 NAME

Lire::ReportParser - Event-driven API to manipulate Lire XML Reports

=head1 SYNOPSIS

    package MyParser;

    use base qw/ Lire::ReportParser /;

    sub parse_end {
        return "Finished";
    }

    package main::

    my $parser = new MyParser;
    my $result = eval { $parser->parsefile( "report.xml" ) };
    croak "Error parsing report.xml: $@" if $@
    print $result, "\n";

=head1 DESCRIPTION

The Lire::ReportParser module offers an event-driven interface to Lire
XML reports. It's primary purpose is to write custom handlers for the
Lire XML Report format.

=head1 USAGE

You create an instance of a subclass of Lire::ReportParser and use
either one of the parse() or parsefile() methods to process the XML
reports. You'll probably never use the Lire::ReportParser module
directly; you'll likely use one subclass which actually does something when
processing the document.

=head2 new( %args )


    my $parser = new Lire::ReportParser::ChartWriter( 'format' => "png" );

The new() method takes parameters in the form of 'key' => value pairs. The
available parameters are specific to each processor. There are no
generic parameters.

=head2 parse( $fh )

This method parses the Lire XML report and returns a processor-specific
result. It will die() if an error is encountered. Error should be
trapped using an eval() block. The parameter is an open file handle.

=cut

sub parse {
    my ( $self, $fh ) = @_;

    my $parser = new XML::Parser ( 'Handlers'     => {
                                                    'Init'  => \&Init,
                                                    'Final' => \&Final,
                                                    'Start' => \&Start,
                                                    'End'   => \&End,
                                                    'Char'  => \&Char,
                                                   },
                                   'Namespaces' => 1,
                                   'NoLWP'      => 1,
                                 );

    $SELF = $self;
    my $value = $parser->parse( $fh );
    $SELF = undef;

    return $value;
}

=pod

=head2 parsefile( $filename )

This method parses the Lire XML report and returns a processor-specific
result. It will die() if an error is encountered. Error should be
trapped using an eval() block. The parameter is the name of a file
which contains a Lire XML Report.

=cut

sub parsefile {
    my ( $self, $file ) = @_;

    my $parser = new XML::Parser ( 'Handlers'     => {
                                                    'Init'  => \&Init,
                                                    'Final' => \&Final,
                                                    'Start' => \&Start,
                                                    'End'   => \&End,
                                                    'Char'  => \&Char,
                                                   },
                                   'Namespaces' => 1,
                                   'NoLWP'      => 1,
                                 );

    $SELF = $self;
    my $value = $parser->parsefile( $file );
    $SELF = undef;

    return $value;
}


=pod

=head1 WRITING AN XML REPORT PROCESSOR

Using Lire::ReportParser, one can write an XML report processor.

The programming model is similar to the expat event-based interface or
the well-known SAX model. The principal difference with those models
is that this module offers hooks specifically tailored for Lire's XML
reports. For example, instead of having one generic element-start
event, you have methods for each specific type of element, making it
easy to hook on only the elements you're interested in. It also
offers some functions that make it easy to determine the context
(always a difficulty in event-based programming).

If you are uncomfortable with that kind of programming, there is also
an object-oriented API available to the XML reports. That API is more
similar to DOM type of programming. Its principal drawback is that its
less performant since it has to parse the whole XML document in memory
to build an object representation. But if you need to "navigate" the
document, it's a lot better than the event-based API.

The main way of using that API to write a custom XML report handler is
by subclassing the Lire::ReportParser module and overriding the
functions related to the elements you are interested in.

There are 3 categories of methods you can override.

=over 4

=item Customization Methods

Those are methods that customize the way the Lire::ReportParser will
operate. The most important one is the new() "constructor" and the one
that determines which DocBook elements will trigger events.

=item Generic element methods

Those are methods that are invoked on each element before the more
specific or higher ones and can be used to hook before the other events
are synthesized.

=item High-level methods

Those are methods that are called on high-level report elements and
remove the burden of tracking the XML processing from the processor.
For example, to process a value, you only have to override the
handle_value() method instead of hooking on the value_start(),
value_char() and value_end() method.

There will also be methods called I<element_name>_start() and
I<element_name>_end() that will be invoked as each XML element is
parsed. Elements that contains character data (as opposed to containing
only other XML elements) will trigger a I<element_name>_char() method.

=back

The module also provides some utility methods and context-determining
methods that you'll probably want to use.

=head1 NOTES ON NAMESPACE, ELEMENT NAME AND DOCBOOK

Elements in the Lire XML Report Markup Language are defined in the
http://www.logreport.org/LRML/ namespace (which is usually mapped to
the C<lire> prefix). THE METHODS DO NOT CONTAIN THE PREFIX. You
should override report_start() and not lire:report_start().

When an element name contains an hyphen C<->, this is mapped to an
underscore in the method's name. For example, the C<missing-subreport>
element will invoke the missing_subreport_start() and
missing_subreport_end() methods.

The name of the methods used to process the DocBook elements starts by
C<dbk_>: dbk_para_start(), dbk_para_end(), dbk_para_char(), etc.

Most Lire XML Reports contain some form of DocBook markup (in the
description or note elements), by default no specialize methods will
be invoked for those elements (only the generic one). (There is a
customization method that can be used to trigger specialized events).

For processors that are only interested in a plain text representation
of the description or notes, there is a
Lire::ReportParser::DocBookFormatter module from which they can
inherit to receive the appropriate plain text descriptions in a
handle_description() and handle_note() method without having to
process DocBook themselves.

=head1 UTILITY METHODS

This section documents the utility methods offered by the
Lire::ReportParser interface.

=cut

=pod

=head2 original_string( )

This method returns the string that triggered the event. That's
exactly the string as it appears in the source document.

=cut

sub original_string {
    my ($self) = @_;

    $self->{'lrp_expat'}->original_string;
}

=pod

=head2 depth( )

Returns the number of opened and not yet closed elements. In
I<element_name>_start() and I<element_name>_end() methods, the
just-opened (or just to be closed element) isn't counted.

=cut

sub depth {
    my ($self) = @_;

    $self->{'lrp_expat'}->depth;
}

=pod

=head2 in_element( $element_name )

This returns true if the innermost currently opened element has the
same name as $element_name. $element_name is a string. If the element
you are looking is a Lire XML Report Element, prefix it with C<lire:>.

Example:

    if ( $self->in_element( "lire:report") ) {
        # Parent element is a Lire report element.
    } elsif ( $self->in_element( "listitem" ) ) {
        # We are in a DocBook listitem element
    }

=cut

sub in_element {
    my ( $self, $element_name) = @_;

    if ( $element_name =~ /^lire:(.*)/ ) {
        $self->{'lrp_expat'}->in_element( $LRML_ELEMENTS{$1} );
    } else {
        $self->{'lrp_expat'}->in_element( $element_name );
    }
}

=pod

=head2 within_element( $element_name )

This returns the number of times an element is opened in the current
element ancestor. Like for the in_element(), the element's name should
starts with C<lire:> if it's a Lire XML Report element, otherwise it
looks for DocBook elements. Also, when called from *_start() or
*_end() methods, the "current" element isn't counted.

=cut

sub within_element {
    my ( $self, $element_name) = @_;

    if ( $element_name =~ /^lire:(.*)/ ) {
        $self->{'lrp_expat'}->within_element( $LRML_ELEMENTS{$1} );
    } else {
        $self->{'lrp_expat'}->within_element( $element_name );
    }
}

=pod

=head2 is_lire_element( $name )

The element_start() and element_end() methods receive as parameter the
name of the element currently being processed. The element name
doesn't contains the namespace prefix, you should use this method to
test wheter the name is a Lire XML Report element or a DocBook
element. This method will return true if the element name was declared
in the proper Lire namespace.

=cut

sub is_lire_element {
    my ( $self, $name) = @_;

    my $ns   = $self->expat->namespace($name);
    $ns ||= ""; # Remove warning
    return $ns eq $LRML_NS;
}

=pod

=head2 skip( )

No element methods will be invoked until the current element is
closed. This has the effect of restarting processing after the current
element close.

=cut

sub skip {
    my ( $self ) = @_;

    $self->{'lrp_skipping'}   = 1;
    $self->{'lrp_skip_level'} = $self->{'lrp_expat'}->depth;
}

=pod

=head2 error( $msg )

Aborts the current parse with the $msg error message. Use this method
instead of die()-ing.

=cut

sub error {
    my ( $self, $msg ) = @_;

    # Remove other at line message
    $msg =~ s/( at.*?line \d+\n*)//gm;

    my $line = $self->{'lrp_expat'}->current_line;

    die $msg, " at line ", $line, "\n";
}

=pod

=head2 warning( $msg )

Prints the $msg message.

=cut

sub warning {
    my ( $self, $msg ) = @_;

    my $line = $self->{'lrp_expat'}->current_line;

    lr_warn($msg . " at line " . $line );
}

=pod

=head2 expat( )

This method gives access to the underlying XML::Parser::Expat instance
which used to do the actual XML processing. This can be used for more
specialized processing.

=cut

sub expat {
    $_[0]{'lrp_expat'};
}

=pod

=head1 CUSTOMIZATION METHODS

This section documents the methods that can be overriden to control
some of the behavior the Lire::ReportParser instance.

=head2 new( [%args] )

The subclass should call its parent new method. No argument are
defined by the ReportParser base class. Of course, subclasses are free
to define arguments.

The object created is an hash reference. Subclasses are free to use
the hash to store some of their state. All keys used by the
Lire::ReportParser module are prefixed by C<lrp_>.

=cut

sub new {
    my $proto = shift;
    my $class = ref( $proto) || $proto;
    my %args  = @_;

    my $self = bless { 'lrp_skipping'     => 0,
                     }, $class;
}

=pod

=head2 known_dbk_elements( )

This method is called during initalization to let the subclass tell
which DocBook elements it is interested to receive events for. If you
don't override this, no specialized method will be called when
encountering DocBook elements.

The object should returns a reference to an array containing the
DocBook elements it wants a method to be invoked.

Example:

    sub known_dbk_elements {
        return [ qw/para listitem/ ];
    }

=cut

sub known_dbk_elements {
    return [];
}

=pod

=head2 known_dbk_pcdata_elements( )

This method is called during initalization to let the subclass tell it
in which DocBook elements containing PCDATA (that is elements that can
contains character data instead or in addition to other elements) the
processor is interested to receive events for. If you don't override
this, no specialized method will be called when encountering DocBook
elements.

The object should returns a reference to an array containing the
DocBook elements containing PCDATA it wants a method to be invoked.
This should be a subset of the elements returned by
known_dbk_elements() method.

Example:

    sub known_dbk_pcdata_elements {
        return [ "para" ];
    }

=cut

sub known_dbk_pcdata_elements {
    return [];
}

=pod

=head2 parse_start( )

This methods is invoked once before the document is parsed. It can be
used for any initialization the processor has to do.

=cut

sub parse_start {}

=pod

=head2 parse_end( )

This methods is invoked once after all the XML report was processed.
The value that this method returns will be returned by the parse() or
parsefile() method (whichever was used to start the parsing).

=cut

sub parse_end {}

=pod

=head1 GENERIC METHODS

This section documents the method that are called before the
specialized method or the high-level methods are called. In each of
those mehods, you should return 0 if you want the normal processing to
continue. Returning a true value means that the Lire::ReportParser
instance shouldn't invoke the higher-level methods.

=head2 element_start( $name, %attr )

This method is called each time an element start tag is encountered.
The $name parameter contains the element name (use is_lire_element()
to test for proper namespace). The remaining parameters are the
attribute names and their values that are set in the element.

If this method returns true, higher-level methods won't be invoked.

=cut

sub element_start {
    my ( $self, $name, %attr ) = @_;
    return 0;
}

=pod

=head2 element_end( $name )

This method is called each time an element end tag is encountered.
(For empty elements, this method is called just after the
element_start() method.) The $name parameter contains the element name
(use is_lire_element() to test for proper namespace).

If this method returns true, higher-level methods won't be invoked.

=cut

sub element_end {
    my ( $self,$name ) = @_;
    return 0;
}

=pod

=head2 pcdata( $text )

This method is called each time character data is encountered in
elements that are declared to contain parsed character data. The $text
parameter contains the character data.

When no DocBook elements were declared known, all DocBook elements
will be considered to conains character data.

If this method returns true, higher-level methods won't be invoked.

=cut

sub pcdata {
    my ( $self, $text ) = @_;
    return 0;
}

=pod

=head2 ignorable_ws( $text )

This method is called each time character data is encountered in
elements that B<are not> declared to contain parsed character data.
The $text parameter contains the white space data that should be
ignored.

The whitespace is reported so that formatting information can be
preserved by the processor.

Since no higher-level methods are invoked on ignorable white space,
the return value of this method is ignored.

=cut

sub ignorable_ws {
    my ( $self, $text ) = @_;
    return 0;
}

=pod

=head1 HIGH-LEVEL EVENT METHODS

For each element defined, an I<element_name>_start() and an
I<element_name>_end() method are invoked. For elements that contains
character data, an I<element_name>_char() method will also be invoked
altough, you probably want to hook onto the easier
handle_I<element_name>() methods in these cases.

When you override any of those mehod (except the
handle_I<element_name>() one), you B<must> invoke the parent method
also:

    sub subreport_start {
        my $self = shift;
        my ( $name, %attr ) = @_;

        $self->SUPER::subreport_start( @_ );

        # Processor specific handling.
    }

=head2 report_start( %attr )

Called when the report element start tag is encountered.

The only defined attribute is C<version>. The current version is 2.0,
but older 1.0 report can still be parsed.

=cut

sub report_start {
    my ( $self, %attr ) = @_;

    $self->error( "missing version attribute on root element\n" )
      unless defined $attr{'version'};

    if ( $attr{'version'} eq '1.0' ) {
        $self->{'compatibility_mode'} = 1;
    } elsif ( $attr{'version'} ne '2.0' ) {
        $self->error( "unsupported report's version: $attr{'version'}\n" );
    }

    $self->{'lrp_subreport_count'} = 0;
}

=pod

=head2 report_end( )

Called when the report element end tag is encountered.

=cut

sub report_end {}

sub title_start {
    my ( $self, %attr ) = @_;

    $self->{'lrp_curr_title'} = "";
}

sub title_char {
    my ( $self, $char ) = @_;

    $self->{'lrp_curr_title'} .= $char;
}

sub title_end {
    my ( $self ) = @_;

    $self->{'lrp_curr_title'} =~ s/\s+/ /g;
    $self->handle_title( $self->{'lrp_curr_title'} );
}

=pod

=head2 handle_title( $title )

Method invoked after the C<title> element was processed. The $title
parameter contains the content of the element. This can be a report's,
subreport's, section's or image's title. You'll need to use the
in_element() method to determine the context.

=cut

sub handle_title {
    my ( $self, $title ) = @_;
}

=pod

=head2 description_start( %attr )

Called when the start tag of the C<description> element is
encountered.

The description can be related to a section, the report or a
subreport. You'll probably need to use the in_element() method to
determine the context.

Remember that if you want to process the content of the description
element, you'll probably want to enable some DocBook elements.

=cut

sub description_start {}

=pod

=head2 description_end( )

Called when the C<description> closing tag is encountered.

=cut


sub description_end {}

=pod

=head2 notes_start()

Called when the start tag of the C<notes> element is
encountered.

The notes can be related to the report, the section or a
subreport. You'll probably need to use the in_element() method to
determine the context.

=cut

sub notes_start {}

=pod

=head2 notes_end( )

Called when the C<notes> closing tag is encountered.

=cut

sub notes_end {}

=pod

=head2 note_start( %attr )

Called when the start tag of the C<note> element is
encountered.

The note can be associated to a section, the report or a subreport.
You'll probably need to use the in_element() method to determine the
context.

Remember that if you want to process the content of the note element,
you'll probably want to enable some DocBook elements.

=cut

sub note_start {}

=pod

=head2 note_end( )

Called when the C<note> closing tag is encountered.

=cut

sub note_end {}

sub hostname_start {
    my ( $self, %attr ) = @_;

    $self->{'lrp_curr_hostname'} = "";
}

sub hostname_char {
    my ( $self, $char ) = @_;

    $self->{'lrp_curr_hostname'} .= $char;
}

sub hostname_end {
    my ( $self) = @_;

    $self->{'lrp_curr_hostname'} =~ s/\s+/ /g;
    $self->handle_hostname(  $self->{'lrp_curr_hostname'} );
}

=pod

=head2 handle_hostname( $hostname )

Called after the C<hostname> element was parsed. The $hostname
parameter contains the content of the element. The hostname can be
related to the overall report or only to a specific subreport. You'll
want to use the in_element() method to determine the context.

=cut

sub handle_hostname {
    my ( $self, $hostname ) = @_;
}

=pod

=head2 image_start( %attr )

Called when the C<image>'s opening tag is encountered. The C<format>
attribute will contains the image's format.

=cut

sub image_start {
    my ( $self, %attr ) = @_;
}

=pod

=head2 image_end( )

Called when the C<image>'s closing tag is encountered.

=cut

sub image_end {
    my ( $self ) = @_;

    $self->{'lrp_curr_hostname'} =~ s/\s+/ /g;
    $self->handle_hostname( $self->{'lrp_curr_hostname'} );
}

sub file_start {
    my ( $self, %attr ) = @_;

    $self->{'lrp_curr_file'} = "";
}

sub file_char {
    my ( $self, $char ) = @_;

    $self->{'lrp_curr_file'} .= $char;
}

sub file_end {
    my ( $self ) = @_;

    $self->handle_file( $self->{'lrp_curr_file'} );
}

=pod

=head2 handle_file( $filename )

Called after the C<filename> element was parsed. This element is
present when a chart was generated for the current subreport. The
$filename parameter contains the content of the element.

=cut

sub handle_file {
    my ( $self, $filename ) = @_;
}

sub date_start {
    my ( $self,  %attr ) = @_;

    $self->{'lrp_curr_date'}  = "";
    $self->{'lrp_curr_date_time'} = $attr{'time'};
}

sub date_char {
    my ( $self, $char ) = @_;

    $self->{'lrp_curr_date'} .= $char;
}

sub date_end {
    my ( $self ) = @_;

    my $date_infos = {
                      'date' => $self->{'lrp_curr_date'},
                      'time' => $self->{'lrp_curr_date_time'},
                     };
    if ( $self->in_element( "lire:report" ) ) {
        $self->{'lrp_report_date'} = $date_infos;
    } else {
        $self->{'lrp_subreport_date'} = $date_infos;
    }

    $self->handle_date( $self->{'lrp_curr_date'}, $self->{'lrp_curr_date_time'} );
}

=pod

=head2 handle_date( $date, $date_epoch )

Called after the C<date> element was parsed. The formatted date is
available in the $date parameter, the date in number of seconds since
the epoch is available in the $date_epoch parameter.

This can be the report's or a subreport's date, you'll need to use the
in_element() method to determine the appropriate context.

=cut

sub handle_date {}

sub timespan_start {
    my ( $self, %attr ) = @_;

    $self->{'lrp_curr_timespan'} = "";
    $self->{'lrp_curr_timespan_start'} = $attr{'start'};
    $self->{'lrp_curr_timespan_end'} = $attr{'end'};
}

sub timespan_char {
    my ( $self, $char ) = @_;

    $self->{'lrp_curr_timespan'} .= $char;
}

sub timespan_end {
    my ( $self) = @_;

    my $timespan_infos = {
                          'timespan' => $self->{'lrp_curr_timespan'},
                          'start'    => $self->{'lrp_curr_timespan_start'},
                          'end'      => $self->{'lrp_curr_timespan_end'},
                         };
    if ( $self->in_element( "lire:report" ) ) {
        $self->{'lrp_report_timespan'} = $timespan_infos;
    } else {
        $self->{'lrp_subreport_timespan'} = $timespan_infos;
    }
    $self->handle_timespan( $self->{'lrp_curr_timespan'},
                            $self->{'lrp_curr_timespan_start'},
                            $self->{'lrp_curr_timespan_end'} );
}

=pod

=head2 handle_timespan( $timespan, $epoch_start, $epoch_end )

Called after the C<timespan> element was parsed. The formatted
timespan is available in the $timespan parameter, the starting and
ending dates of the timespan are available as number of seconds since
the epoch in the $epoch_start and $epoch_end parameters.

This can be the timespan of the report or the subreport, you'll need
to use the in_element() method to determine the appropriate context.

=cut

sub handle_timespan {}


=pod

=head2 section_start( )

Called when the opening tag of a C<section> element is encountered.

=cut

sub section_start {
    my ( $self, %attr ) = @_;
    $self->{'lrp_section_subreport_count'} = 0;
}

=pod

=head2 section_end( )

Called when the closing tag of a C<section> element is encountered.

=cut

sub section_end {
    my ( $self ) = @_;
    $self->{'lrp_section_subreport_count'}    = 0;
}

=pod

=head2 missing_subreport_start( %attr )

Called when the opening tag of a C<missing-subreport> element is
encountered. The C<superservice> attribute contains the superservice's
of the subreport, the C<type> attribute contains the report
specification ID and the C<reason> attribute will contains the reason
why the subreport is missing.

=cut

sub missing_subreport_start {}

=pod

=head2 missing_subreport_end( )

Called when the closing tag of a C<missing-subreport> element is
encountered.

=cut

sub missing_subreport_end {}

=pod

=head2 subreport_start( %attr )

Called when the opening tag of the C<subreport> element is
encountered. The C<superservice> attribute contains the subreport's
superservice and the C<type> attribute contains the ID of the report
specification that was used to generate that subreport.

=cut

sub subreport_start {
    my ( $self, %attr ) = @_;

    $self->{'lrp_section_subreport_count'}++;

    $self->{'lrp_group_stack'} = [];
    $self->{'lrp_entry_stack'} = [];
    $self->{'lrp_subreport'}   = { %attr };
}

=pod

=head2 subreport_end( )

Called when the C<subreport>'s closing tag is encountered.

=cut

sub subreport_end {

    my ( $self ) = @_;

    delete $self->{'lrp_group_stack'};
    delete $self->{'lrp_subreport'};
    $self->{'lrp_curr_table_info'}        = undef;
    $self->{'lrp_curr_group_info'}        = undef;
    $self->{'lrp_subreport_count'}++;
}

=pod

=head2 table_start( %attr )

Called when the opening tag of the C<table> element is encountered.
The C<charttype> attribute contains the chart's type that should be
created for that data. The C<show> attribute contains the maximum
number of entries that should be displayed (there may more entries
than this number).

=cut

sub table_start {
    my ( $self,  %attr ) = @_;

    $self->{'lrp_subreport'}{'charttype'}   = $attr{'charttype'};

    my $group = { 'entry_count' => 0,
                  'show'        => $attr{'show'},
                };

    push @{$self->{'lrp_group_stack'}}, $group;

    if ( $self->{'compatibility_mode'} ) {
        # We need to generate TableInfo based on the current subreport
        # type
        my $super = $self->current_superservice;
        my $type  = $self->current_type;
        my $spec = eval { Lire::ReportSpec->load( $super, $type ) };
        if ( $@ ) {
            $self->warning( "can't process 1.0 report for which the report specification is unavailable: $@\n" );
            $self->skip;
            return;
        }

        $self->{'lrp_curr_table_info'} = $spec->create_table_info;
        $self->current_group->{'group_info'} = $self->{'lrp_curr_table_info'};
    }
}

=pod

=head2 table_end( )

Called when the C<table>'s closing tag is encountered.

=cut

sub table_end {}

=pod

=head2 table_info_start( %attr )

Called when the C<table-info>'s closing tag is encountered.

There should be no reason for subclasses to override this method. The
Lire::ReportParser takes care of parsing the C<table-info> content and
offers that information through a Lire::Report::TableInfo object which
is accessible through the current_table_info() method. Another
advantage of using the current_table_info() method to access that
information, is that the table information will be available in 1.0
report that didn't contain that information.

=cut

sub table_info_start {
    my ( $self,  %attr ) = @_;

    $self->{'lrp_curr_table_info'} = new Lire::Report::TableInfo;
    $self->{'lrp_curr_group_info'} = [ $self->{'lrp_curr_table_info'} ];
}

=pod

=head2 table_info_end()

Called when the C<table-info>'s closing tag is encountered. See
table_info_start() documentation for important comments.

=cut

sub table_info_end {
    my ( $self ) = @_;

    my $table_info = $self->current_table_info;
    $self->lrp_current_group->{'group_info'} = $table_info;
}

=pod

=head2 group_info_start( %attr )

Called when the C<group-info>'s opening tag is encountered. See
table_info_start() documentation for important comments.

=cut

sub group_info_start {
    my ( $self, %attr ) = @_;

    my $curr_info = ${$self->{'lrp_curr_group_info'}}[$#{$self->{'lrp_curr_group_info'}}];
    my $info = $curr_info->create_group_info( $attr{'name'} );
    push @{$self->{'lrp_curr_group_info'}}, $info;
}

=pod

=head2 group_info_end()

Called when the C<group-info>'s closing tag is encountered. See
table_info_start() documentation for important comments.

=cut

sub group_info_end {
    my ( $self ) = @_;

    pop @{$self->{'lrp_curr_group_info'}};
}

=pod

=head2 column_info_start( %attr )

Called when the C<column-info>'s opening tag is encountered. See
table_info_start() documentation for important comments.

=cut

sub column_info_start {
    my ( $self, %attr ) = @_;

    my $curr_info = $self->{'lrp_curr_group_info'}[-1];
    my $info = $curr_info->create_column_info( $attr{'name'}, $attr{'class'},
                                               $attr{'type'}, $attr{'label'} );
    $info->max_chars( $attr{'max-chars'} );
    $info->avg_chars( $attr{'avg-chars'} );
    $info->col_start( $attr{'col-start'} );
    $info->col_end( $attr{'col-end'} );
    $info->col_width( $attr{'col-width'} );
}

=pod

=head2 column_info_end()

Called when the C<column-info>'s closing tag is encountered. See
table_info_start() documentation for important comments.

=cut

sub column_info_end {}

=pod

=head2 group_summary_start( %attr )

Called when the C<group-summary>'s opening tag is encountered.

=cut

sub group_summary_start {
    my ( $self, %attr ) = @_;
}

=pod

=head2 group_summary_end()

Called when the C<group-summary>'s closing tag is encountered.

=cut

sub group_summary_end {
    my ( $self ) = @_;
}

=pod

=head2 group_start( %attr )

Called when the opening tag of the C<group> element is encountered.
C<group> elements introduce a kind of nested table. The C<show>
attribute contains the maximum number of entries that should be
displayed, altough more entries may be present in the report.

=cut

sub group_start {
    my ($self, %attr ) = @_;

    my $entry = $self->lrp_current_entry;
    $entry->{'child_idx'}++;

    my $info = $entry->{'group_info'}->info_by_index( $entry->{'child_idx'} );
    my $group = {
                 'entry_count' => 0,
                 'show'        => $attr{'show'},
                 'group_info'  => $info,
                };
    push @{$self->{'lrp_group_stack'}}, $group;
}

=pod

=head2 group_end( )

Called when the C<group>'s closing tag is encountered.

=cut

sub group_end {
    my ($self ) = @_;

    pop @{$self->{'lrp_group_stack'}};
}

=pod

=head2 entry_start( %attr )

Called when the opening tag of an C<entry> element is encountered.

=cut

sub entry_start {
    my ( $self, %attr ) = @_;

    my $group = $self->lrp_current_group;
    $group->{'entry_count'}++;

    push @{$self->{'lrp_entry_stack'}}, { %attr,
                                        'child_idx'  => -1,
                                        'group_info' => $group->{'group_info'},
                                      };
}

=pod

=head2 entry_end( )

Called when the C<entry>'s closing tag is encountered.

=cut

sub entry_end {
    my ($self) = @_;

    pop @{$self->{'lrp_entry_stack'}};
}

sub name_start {
    my ( $self, %attr ) = @_;

    my $entry = $self->lrp_current_entry;
    $entry->{'child_idx'}++;

    my $info =
      $attr{'col'} ? $self->current_table_info->column_info_by_name( $attr{'col'} )
        : $entry->{'group_info'}->info_by_index( $entry->{'child_idx'});

    $self->{'lrp_curr_name'} = { %attr,
                               'content' => "",
                               'col_info' => $info,
                             };
}

sub name_char {
    my ( $self, $char ) = @_;

    $self->{'lrp_curr_name'}{'content'} .= $char;
}

sub name_end {
    my ( $self ) = @_;

    $self->{'lrp_curr_name'}{'value'} = $self->{'lrp_curr_name'}{'content'}
      unless defined $self->{'lrp_curr_name'}{'value'};

    $self->handle_name( $self->{'lrp_curr_name'} );
}

=pod

=head2 handle_name( $name_rec )

Called after a C<name> element was parsed. The $name_rec parameter is
an hash reference which contains the different values of the name
datum. Keys that are defined in this hash:

=over 4

=item content

That's the actual content of the name element. This contains the name
in a format suitable for display.

=item value

This contains the unformatted value of the name. For example, when the
name is a time string, this attribute will contains the time in
seconds since epoch.

=item range

For some names, the actual content express a range (time, size, etc.).
This attribute contains the length of the range.

=item col_info

The Lire::ColumnInfo object describing the column in which this
name appears.

=back

=cut

sub handle_name {}

sub value_start {
    my ( $self, %attr ) = @_;

    # Value in group-summary are handled differently because
    # they aren't part of the entry children.
    my $entry = $self->lrp_current_entry;
    $entry->{'child_idx'}++
      unless $self->within_element( "lire:group-summary" );

    my $info =
      $attr{'col'} ? $self->current_table_info->column_info_by_name( $attr{'col'} )
        : $entry->{'group_info'}->info_by_index( $entry->{'child_idx'});

    $self->{'lrp_curr_value'} = {%attr,
                               'content' => "",
                               'col_info' => $info,
                              };
}

sub value_char {
    my ( $self, $char ) = @_;

    $self->{'lrp_curr_value'}{'content'} .= $char;
}

sub value_end {
    my ( $self ) = @_;

    $self->{'lrp_curr_value'}{'value'} = $self->{'lrp_curr_value'}{'content'}
      unless defined $self->{'lrp_curr_value'}{'value'};

    if ( $self->within_element( "lire:group-summary" ) ) {
        $self->handle_summary_value( $self->{'lrp_curr_value'} );
    } else {
        $self->handle_value( $self->{'lrp_curr_value'} );
    }
}

=pod

=head2 handle_value( $value_rec )

Called after a C<value> element was parsed. The $value_rec parameter is
an hash reference which contains the different values of the value
datum. Keys that are defined in this hash:

=over 4

=item content

That's the actual content of the value element. This contains the
value in a format suitable for display.

=item value

This contains the unformatted value. For example, when bytes are
displayed using "1M" or "1.1G", this will contains the value in bytes.

=item total

This is used by values that represent an average. It contains the
total which makes up the average.

=item n

This is used by values that represent an average. It contains the
total which was used in the division to compute the average.

=item col_info

The Lire::ColumnInfo object describing the column in which this
name appears.

=back

=cut

sub handle_value {}

=pod

=head2 handle_value( $value_rec )

Called after a C<value> element located in the group-summary element
was parsed. The $value_rec parameter is identical than in the
handle_value() method.

=cut

sub handle_summary_value {}

=pod

=head1 CONTEXT METHODS

Finally, here a bunch of additional methods that can be used to query
some context information when processing elements.

=cut

=pod

=head2 current_subreport_count( )

Returns the number of subreport that are present to date in the
report. That number is equals to the number of processed C<subreport>
elements, i.e. the current subreport isn't counted untill the closing
tag was processed.

=cut

sub current_subreport_count {
    return $_[0]{'lrp_subreport_count'};
}

=pod

=head2 current_section_subreport_count( )

Returns the number of subreport that are present to date in the
section. That number is equals to the number of processed C<subreport>
elements, i.e. the current subreport isn't counted untill the closing
tag was processed.

=cut

sub current_section_subreport_count {
    return $_[0]{'lrp_section_subreport_count'};
}

=pod

=head2 current_date( )

Returns the content of the C<date> element that applies to the current
element. This will either be the current subreport's date or the
default one taken from the C<report> element.

The date is returned as an hash reference which will contain the
formatted date in the C<date> key and the date in seconds since epoch
in the C<time> key.

=cut

sub current_date {
    return $_[0]{'lrp_subreport_date'} || $_[0]{'lrp_report_date'};
}


=pod

=head2 current_timespan( )

Returns the content of the C<timespan> element that applies to the
current element. This will either be the current subreport's date or
the default one taken from the C<report> element.

The timespan is returned as an hash reference which will contain the
formatted timespan in the C<timespan> key. The starting and ending
date of the timespan are available as seconds since epoch in the
C<start> and C<end> keys.

=cut

sub current_timespan {
    return $_[0]{'lrp_subreport_timespan'} || $_[0]{'lrp_report_timespan'};
}

=pod

=head2 current_superservice( )

Useful in C<subreport> context, it returns the superservice's of the
current subreport.

=cut

sub current_superservice {
    return $_[0]{'lrp_subreport'}{'superservice'};
}

=pod

=head2 current_type( )

Useful in C<subreport> context, it returns the ID of the report
specification that was used to generate the current subreport.

=cut

sub current_type {
    return $_[0]{'lrp_subreport'}{'type'};
}

=pod

=head2 current_charttype( )

Useful in C<table> context, it returns the type of chart that should
be generated for the current table.

=cut

sub current_charttype {
    return $_[0]{'lrp_subreport'}{'charttype'};
}

=pod

=head2 current_table_info()

Useful when processing C<group> and C<entry>, this returns a
Lire::Report;:TableInfo object which describes the layout of the
current table.

=cut

sub current_table_info {
    return $_[0]{'lrp_curr_table_info'};
}

#------------------------------------------------------------------------
# Methods lrp_current_group()
#
# Returns the hash reference containing the information on the current
# group
sub lrp_current_group {
    $_[0]{'lrp_group_stack'}[$#{$_[0]{'lrp_group_stack'}}];
}

#------------------------------------------------------------------------
# Methods lrp_current_entry()
#
# Returns the hash reference containing the information on the current
# entry
sub lrp_current_entry {
    $_[0]{'lrp_entry_stack'}[$#{$_[0]{'lrp_entry_stack'}}];
}

=pod

=head2 current_group_level( )

Useful in C<table> context, it returns the number of nested group
element. This will zero in the C<table> element and incremented by one
for every C<group> element opened afterwards.

=cut

sub current_group_level {
    return $#{$_[0]{'lrp_group_stack'}};
}

=pod

=head2 current_group_entry_show( )

Useful in C<table> and C<group> context, it returns the maximum number
of entries that should be displayed.

=cut

sub current_group_entry_show {
    $_[0]->lrp_current_group->{'show'};
}

=pod

=head2 show_current_entry( )

Useful in C<entry> context , this can be used to test whether or not
the current C<entry> should be displayed based on the current entry
index and the parent's C<show> attribute.

=cut

sub show_current_entry {
    my ($self) = shift;

    my $group = $self->lrp_current_group;

    return !defined $group->{'show'} || $group->{'entry_count'} <= $group->{'show'};
}

=pod

=head2 current_table_entry_count( )

Useful in C<table> context, it returns the number of entries that were
processed so far. This only reports the entries in the C<table>
element, not counting the one in the nested C<group>.

=cut

sub current_table_entry_count {
    return $_[0]{'lrp_group_stack'}[0]{'entry_count'};
}

# keep perl happy
1;

__END__

=pod

=head1 LIMITATIONS

WARNING: This module isn't multi-thread safe. You can't process more
than one file concurrently.

=head1 SEE ALSO

Lire::Report(3pm)
Lire::ReportParser::DocBookFormatter(3pm)
Lire::ReportParser::ReportBuilder(3pm)
Lire::ReportParser::AsciiWriter(3pm)
Lire::ReportParser::ChartWriter(3pm)
Lire::ReportParser::PloticusChartWriter(3pm)
Lire::ReportParser::ExcelWriter(3pm)
Lire::Report::TableInfo(3pm)

=head1 VERSION

$Id: ReportParser.pm,v 1.37 2004/03/26 00:27:34 wsourdeau Exp $

=head1 COPYRIGHT

Copyright (C) 2001-2002 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Francis J. Lacoste <flacoste@logreport.org>

=cut

