package Zim::Repository::Files;

use strict;
use vars qw/$CODESET/;
use POSIX qw(strftime);
use Encode;
use File::Spec;
use File::MimeInfo;
use Zim::Repository;

our $VERSION = '0.18';
our @ISA = qw/Zim::Repository/;

*CODESET = \$Zim::CODESET;
$CODESET ||= 'utf8';

=head1 NAME

Zim::Repository::Files - A file system based repository

=head1 DESCRIPTION

This module implements a file system based repository for zim.
See L<Zim::Repository> for the interface documentation.

=head1 METHODS

=over 4

=item C<new(PARENT, NAMESPACE, DIR)>

Simple constructor. DIR is the root directory of the repository.
NAMESPACE is the namespace that maps to that directory.

=cut

sub init { # called by new
	my $self = shift;
	
	unless (defined $self->{dir}) {
		# no dir given, find one based on namespace
		die "Zim::Repository::Files needs a directory to initialize\n"
			unless length $self->{parent}{dir};
		my @parts = grep length($_), split /:+/, $self->{namespace};
		$self->{dir} = Zim::File->resolve_file(
			{is_dir => 1}, $$self{parent}{dir}, @parts );
	}
	else { # check dir relative to parent
		$self->{dir} = Zim::File->abs_path(
				$self->{dir}, $self->{parent}{dir} );
	}
	$self->{config}{read_only} = (-w $self->{dir}) ? 0 : 1;
	$self->{cdir} = $self->{dir} . '/_changes';
	$self->{format} ||= 'wiki';
	$self->{ext} = ($self->{format} eq 'html')     ? 'html' :
	               ($self->{format} eq 'txt2tags') ? 't2t'  : 'txt' ;
		# FIXME HACK FIXME - this belongs in a Formats.pm
	
	$self->{cache} = Zim::File->new($self->{dir}, '.zim.cache');
	
	# Check version of cache
	unless ($self->{config}{read_only}) {
		my $line = '';
		if ($self->{cache}->exists) {
			my $fh = $self->{cache}->open();
			$line = <$fh>;
			$fh->close;
		}
		$self->{cache}->write("zim: version $VERSION\n")
			unless $line =~ m/zim: version $VERSION/;
	}
	
	return $self;
}

=item C<list_pages(NAMESPACE)>

Returns a list of pages. If possible it uses a cache.

=cut

# Directory structure:
#
# a.txt
# a/b.txt
# a/c.txt
# a/c/d.txt
#
# Page structure:
#
# a
# |__ b
# |__ c
#     |_d
#
# Cache:
#
# a: mtime /
# a:b mtime > links
# a:c: mtime /
# a:c: mtime > links
# a:c:d mtime > links

sub list_pages {
	my ($self, $namespace) = @_;
	
	my $dir = $self->dir($namespace);
	return () unless -d $dir;

	my $mtime = (stat $dir)[9];
	my ($cache_mtime, @pages);
	my $re = qr/^\Q$namespace\E(?:([^:\s]+:?) \d+ >| (\d+) \/)/;
	for ($self->{cache}->grep($re, 'lines')) {
		$_ =~ $re or next;
		if (defined $1 and length $1) { push @pages, $1 }
		else { # namespace itself - check index time
			$cache_mtime = $2;
			#warn "Found cache mtime $cache_mtime for $namespace (mtime is $mtime)\n";
			return $self->_cache_dir($namespace, $dir)
				unless $cache_mtime == $mtime ;
		}
	}
	#warn "Did not find cache mtime for $namespace\n" unless $cache_mtime;
	return $self->_cache_dir($namespace, $dir) unless $cache_mtime;
	return @pages;
}

sub _flush_cache {
	my $self = shift;
	$self->{cache}->remove if $self->{cache}->exists;
}

sub _cache_dir { # FIXME Can this be optimized ??
	my ($self, $namespace, $dir) = @_;
	warn "# Indexing $namespace\n";
	
	my @pages =
		grep defined($_),
		map {
			my $item = "$dir/$_";
			s/([^[:alnum:]_\.\-\(\)])/sprintf("%%%02X",ord($1))/eg;
			(-d $item)		? [$_.':' => $item] :
			(s/\.$$self{ext}$//)	? [$_ => $item]     : undef ;
		} 
		grep /^[[:alnum:]]/, Zim::File->list_dir($dir);
	#use Data::Dumper; warn Dumper \@pages;
	
	@pages = sort {lc($$a[0]) cmp lc($$b[0])} @pages;
	for (0 .. $#pages-1) { # cut doubles due to directories
		$pages[$_] = undef if $pages[$_+1][0] eq $pages[$_][0].':' ;
	}
	@pages = grep defined($_), @pages;
	#use Data::Dumper; warn Dumper \@pages;

	return map {$$_[0]} @pages if $self->{config}{read_only};
	
	my %items = ();
	my $index = '';
	for ($self->{cache}->read) {
		if (/^\Q$namespace\E(?:([^:\s]+:?) \d+ >| (\d+) \/)/) {
			$items{$1} = $_ if defined $1 and length $1;
			#warn "Item: $_\n";
		}
		else { $index .= $_ }
	}
	#use Data::Dumper; warn Dumper \%items;

	$index .= $namespace.' '.(stat $dir)[9]." /\n"; # cache mtime
	for my $p (@pages) {
		my ($name, $file) = @$p;
		#warn "Page: >>$$p[1]<< >>$$p[0]<<\n";
		if (exists $items{$name}) {
			$items{$name} =~ / (\d+) /;
			if ($1 == (stat $file)[9]) {
				$index .= $items{$name};
				next;
			}
		}
		#warn "Indexing page: $namespace$name\n";
		$index .= $self->_cache_string($namespace.$name);
	}
	$self->{cache}->write( $index );
	
	return map {$$_[0]} @pages;
}

sub _cache_page {
	my ($self, $page) = @_;
	my $name = $page->name;
	my ($index, $is_dir);
	return if $self->{config}{read_only};
	for ($self->{cache}->read) {
		if (/^\Q$name\E(:?) \d+ >/) { $is_dir = $1 }
		else { $index .= $_ }
	}
	if ($page->{status} eq 'deleted') {
		$self->{cache}->write($index);
	}
	else {
		$self->{cache}->write($index,
			$self->_cache_string($page, $is_dir));
	}
}

sub _cache_string {
	my ($self, $page, $is_dir) = @_;
	unless (ref $page) {
		$is_dir ||= ($page =~ /:$/);
		$page = $self->get_page($page);
	}
	my $mtime = (stat $page->{source}->path)[9] || '0';
	my @links = eval{ $page->list_links };
	my $key = $page->name;
	$key .= ':' if $is_dir;
	return $key . ' ' . $mtime . ' > ' . join(' ', @links) . "\n" ;
}

=item C<list_backlinks(PAGE)>

Returns a list with names of pages that link to this page.

=cut

sub list_backlinks {
	my ($self, $page) = @_;
	my $name = ref($page) ? $page : $page->name;
	my @links;
	# Used the regex: qr/^:\S+ \d+ >.*?\s\Q$name\E\s/
	# here before, but this can become very slow when utf8 is involved
	# so using two stage grep to make it more scalable
	for ($self->{cache}->grep(qr/\s\Q$name\E(\s|$)/, 'lines')) {
		$_ =~ /^(:\S+) \d+ >/ or next;
		my $l = $1;
		$l =~ s/:$//;
		push @links, $l unless $l eq $name;
	}
	return @links;
}

sub _search { # query is a hash ref with options etc
	my ($self, $query, $callback, $ns) = @_;
	$ns ||= $self->{namespace};
	warn "Searching $ns\n";
	
	my $reg = $$query{regex};
	unless ($reg) {
		$reg = quotemeta $$query{string};
		$reg = "\\b".$reg."\\b" if $$query{word};
		$reg = "(?i)".$reg unless $$query{case};
		$reg = qr/$reg/;
		#warn $reg;
		$$query{regex} = $reg;
	}
	
	for ($self->list_pages($ns)) {
		my $p = $ns.$_;
		my $is_dir = ($p =~ s/:$//);
		my $match = ($p =~ $reg) ? 1 : 0 ;
		$match += $self->file($p)->grep($reg, 'count');
		$callback->($match ? [$p, $match] : ());
		$self->_search($query, $callback, $p.':') if $is_dir; # recurs
	}
}

sub _match_word {
	my ($self, $page, $word) = @_;
	my $namespace = $page->namespace;
	$word =~ s/[^\w\.\:\-]/_/g;
	my $seen = 0;
	#warn "looking up \"$word\" in $namespace\n";
	my $re = qr/^\Q$namespace\E(?i)\Q$word\E(_|:?\s)/;
	for ($self->{cache}->grep($re)) {
		$_ =~ $re or next;
		if ($1 eq '_') { return 2 }
		elsif ($seen) { return 2 }
		else { $seen = 1 }
	}
	return $seen;
}

=item C<get_page(PAGE_NAME)>

Returns an object of the type L<Zim::Page>.

=cut

sub get_page {
	my ($self, $name, $source) = @_; # source is a private argument

	my $page = Zim::Page->new($self, $name);
	$source ||= $self->file($name); # case sensitive lookup
	$page->set_source($source);
	$page->set_format($self->{format});
	$page->properties->{base} = $source->dir;
	
	unless ($source->exists) {
		$page->{parse_tree} = $self->_template($page);
		$page->status('new');
	}
	$page->properties->{read_only} = $self->{config}{read_only};

	return $page;
}

=item C<resolve_case(\@LINK, \@PAGE)>

See L<Zim::Repository>.

=cut

sub resolve_case {
	my ($self, $link, $page) = @_;
	my $match;
	if ($page and @$page) {
		#warn "resolve_case: @$link @ @$page\n";
		my $anchor = shift @$link;
		for (reverse  -1 .. $#$page) {
			my $t = ':'.join(':', @$page[0..$_], $anchor);
			#warn "\ttrying: $t\n";
			## FIXME FIXME optimize the two below together
			my $file = $self->file($t, 1);
			my $dir = $self->dir($t, 1);
			next unless -f $file or -d $dir;
			$match = join ':', $t, @$link;
			last;
		}
	}
	else { $match = ':' . join ':', @$link } # absolute

	return undef unless $match;
	my $file = $self->file($match, 1);
	return $self->pagename($file);
}

#sub resolve_page {
#	my ($self, $name) = @_;
#	my $source = $self->file($name, 1); # case tolerant lookup
#	#warn "Resolved $name => $source\n";
#	$name = $self->pagename($source->path);
#	return defined($name) ? $self->get_page($name, $source) : undef;
#}

sub _template {
	# FIXME make template configurable
	my ($self, $page) = @_;
	$page->name =~ /([^:]+):*$/;
	my $title = ucfirst($1);
	$title =~ s/_/ /g;
	my $format = $self->root->{date_format} || '%A %d/%m/%Y';
	my $date = Encode::decode($CODESET,
		strftime($format, localtime)   );
	return ['Page', {%{$page->properties}},
			['head1', {}, $title],
			['Para',  {empty_lines => 1}, "Created $date\n"]
	];
}

=item C<copy_page(SOURCE, TARGET, UPDATE_LINKS)>

=cut

sub copy_page {
	my ($self, $old, $new, $update) = @_;
	my $source = $self->file($old);
	my $target = $self->file($new);
	Zim::File->copy($source, $target);
	@$new{'status', 'parse_tree'} = ('', undef);
	if ($update) {
		my ($from, $to) = ($source->name, $target->name);
		$self->get_page($_)->update_links($from => $to)
			for $source->list_backlinks ;
	}
}

=item C<move_page(SOURCE, TARGET)>

=cut

sub move_page {
	my ($self, $old, $new) = @_;
	
	# Move file
	my $source = $self->file($old);
	my $target = $self->file($new);

	die "No such page: $source\n" unless $source->exists;
	#warn "Moving $source to $target\n";
	Zim::File->move($source, $target);

	# update objects
	@$old{'status', 'parse_tree'} = ('deleted', undef);
	@$new{'status', 'parse_tree'} = ('', undef);
	$self->_cache_page($old);
	$self->_cache_page($new);
}

=item C<delete_page(PAGE)>

=cut

sub delete_page {
	my ($self, $page) = @_;

	my $file = $self->file($page);
	my $dir = $file->dir;
	if ($file->exists) { $file->remove }
	else { # border case where empty dir was left for some reason
		$dir = $self->dir($page);
		Zim::File->remove_dir($dir);
	}
	
	@$page{'status', 'parse_tree'} = ('deleted', undef) if ref $page;
	$self->_cache_page($page);
}

=item C<search()>

TODO

=cut

sub search {
	my ($self, $page, $query) = @_;
	
}

=back

=head2 Private methods

=over 4

=item C<file(PAGE, NOCASE)>

Returns a L<Zim::File> object for a page name.

NOCASE is a boolean that triggers a case in-sensitive lookup when true.

=item C<dir(PAGE, NOCASE)>

Returns a dir for a page name. This dir maps to the namespace below this page.

NOCASE is a boolean that triggers a case in-sensitive lookup when true.

=cut

sub file {
	my ($self, $page, $case_tolerant) = @_;
	#warn "Looking up filename for: $page\n";

	if (ref $page) {
		return $page->{source} if defined $page->{source};
		$page = $page->name;
	}

	if ($page.':' eq $self->{namespace}) { # index page sub namespace
		$page = '_index';
	}
	else {
		$page =~ s/^:*\Q$$self{namespace}\E:*//i;
	}

	my @parts =
		map {s/\%([A-Fa-z0-9]{2})/chr(hex($1))/eg; $_}
		grep length($_), split /:+/, $page;

	my $file = $case_tolerant
		? Zim::File->resolve_file({ext => $$self{ext}}, $$self{dir}, @parts)
		: join('/', $$self{dir}, @parts).'.'.$$self{ext} ;

	#warn "\t=> $file\n";
	$file = Zim::File->new($file);
	$file->set_check_mtime(1);
#	$file->signal_connect(write => \&_save_changes, $self);
	return $file;
}

sub dir {
	my ($self, $page, $case_tolerant) = @_;

	if (ref $page) { $page = $page->name } # looking for dir _below_

	if ($page.':' eq $self->{namespace}) { # index page
		return $$self{dir};
	}
	else {
		$page =~ s/^:*\Q$self->{namespace}\E:*//i;
	}
	my @parts =
		map {s/\%([A-Fa-z0-9]{2})/chr(hex($1))/eg; $_}
		grep length($_), split /:+/, $page;

	my $dir = $case_tolerant
		? Zim::File->resolve_file({ext => $$self{ext}, is_dir => 1}, $$self{dir}, @parts)
		: join('/', $$self{dir}, @parts)  ;

	return $dir;
}

=item C<pagename(FILE)>

Returns the page name corresponding to FILE. FILE does not actually
need to exist and can be a directory as well as a file.

=cut

sub pagename {
	my ($self, $file) = @_;
	#warn "looking up pagename for: $file\n";
	$file = File::Spec->abs2rel($file, $self->{dir})
		if File::Spec->file_name_is_absolute($file);
	my @parts =
		map {s/([^[:alnum:]_\.\-\(\)])/sprintf("%%%02X",ord($1))/eg; $_}
		grep length($_), File::Spec->splitdir($file);
	return undef unless @parts;
	$parts[-1] =~ s/\.\Q$$self{ext}\E$//;
	return $self->{namespace} . join ':', @parts;
}

=item C<save_changes(FILE)>

Tries to save changes in the "_changes" dir.
FILE should be a L<Zim::File> object. Returns boolean for succes.
Needs the 'diff' command to work.

=cut

sub save_changes { _save_changes($_[1], $_[1]->path, $_[0]) }

sub _save_changes {
	my ($file, $path, $self) = @_;
	return 0 unless $self->{cdir};
	#warn "saving changes for $file\n";
	my $old = File::Spec->abs2rel($path, $self->{dir});
	$old = File::Spec->catfile($self->{cdir}, $old);
	Zim::File->touch($old);
	my $log = Zim::File->new($old.'.log');
	$log->append("%% $ENV{USER} ".localtime()."\n");
	`diff $old $file >> $log`;
	Zim::File->copy($file, $old);

	return 1;
}


1;

__END__

=back

=head1 BUGS

Please mail the author if you find any bugs.

=head1 AUTHOR

Jaap Karssenberg || Pardus [Larus] E<lt>pardus@cpan.orgE<gt>

Copyright (c) 2005 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

L<Zim>, L<Zim::Page>

=cut
