package Zim::Repository::Files;

use strict;
use vars qw/$CODESET/;
use POSIX qw(strftime);
use Encode;
use IO::File;
use File::Spec;
use File::Copy;
use File::MimeInfo;
use Zim::File;
use Zim::Repository::Base;
use Zim::Page::Text;

our $VERSION = '0.16';
our @ISA = qw/Zim::Repository::Base/;

my $case_tolerant_fs = File::Spec->case_tolerant();

*CODESET = \$Zim::CODESET;
$CODESET ||= 'utf8';

=head1 NAME

Zim::Repository::Files - A file system based repository

=head1 DESCRIPTION

This module implements a file system based repository for zim.
See L<Zim::Repository> for the interface documentation.

=head1 METHODS

=over 4

=item new(NAMESPACE, DIR)

Simple constructor. DIR is the root directory of the repository.
NAMESPACE is the namespace that maps to that directory.

=cut

sub init { # called by new
	my ($self, $dir, $format) = @_;
	
	$dir = Zim::File->abs_path($dir);
	$self->{dir} = $dir;
	$self->{format} = $format || 'wiki';
	$self->{cache} = Zim::File->new($dir, '.zim.cache');
	
	# Check version of cache
	if (-w $self->{cache}->dir) {
		my $line = '';
		if ($self->{cache}->exists) {
			my $fh = $self->{cache}->open();
			$line = <$fh>;
			$fh->close;
		}
		$self->{cache}->write("zim: version $VERSION\n")
			unless $line =~ m/zim: version $VERSION/;
	}
	
	$self->{ext} = ($self->{format} eq 'wiki') ? 'txt' : 'html';
		# FIXME HACK FIXME - this belongs in a Formats.pm
	
	return $self;
}

=item list_pages(NAMESPACE)

=cut

sub list_pages {
	my ($self, $namespace) = @_;
	
	my $dir = $self->dir($namespace);
	return () unless -d $dir;

	my $mtime = (stat $dir)[9];
	#warn "Reading $namespace from cache\n";
	my ($cache_mtime, @pages);
	for ($self->{cache}->read) {
		/^\Q$namespace\E(?:([^:\s]+:?)\s+=>|\s+\*(\d+))/ or next;
		if (length $1) { push @pages, $1 }
		else { # namespace itself - check index time
			$cache_mtime = $2;
			return $self->_cache_dir($namespace, $dir)
				if $cache_mtime < $mtime ;
		}
	}
	return $self->_cache_dir($namespace, $dir) unless $cache_mtime;
	return sort {lc($a) cmp lc($b)} @pages;
}

sub _flush_cache {
	my $self = shift;
	$self->{cache}->remove;
}

sub _cache_dir {
	my ($self, $namespace, $dir) = @_;
	warn "Indexing $namespace\n";
	
	my @pages = grep {defined $_} map {
		my $item = File::Spec->catfile($dir, $_);
		if (-d $item) { s/(_files)?$/:/ }
		else { $_ = undef unless s/\.$$self{ext}$// }
		s/[^:\w\.\-\(\)]/_/g;
		$_;
	} grep /^\w/, Zim::File->list_dir($dir);
	
	@pages = sort {lc($a) cmp lc($b)} @pages;
	for (0 .. $#pages) { # cut doubles due to directories
		$pages[$_] = undef if $pages[$_+1] eq $pages[$_].':' ;
	}
	@pages = grep {defined $_} @pages;
	
	return @pages if $self->{parent}{config}{read_only};
	
	my $index = join '',
		grep {! /^\Q$namespace\E(?:[^:\s]+:?\s|\s+\*\d+)/ }
		$self->{cache}->read;
	$index .= $namespace.' *'.(stat $dir)[9]."\n"; # cache mtime
	for my $p (@pages) {
		# BUG: need "my $p" construct to prevent dletion of items in @pages !??
		$index .= $self->_cache_string($namespace.$p);
	}
	$self->{cache}->write( $index );
	
	return @pages;
}

sub _cache_page {
	my ($self, $page) = @_;
	my $name = $page->name;
	my ($index, $is_dir);
	for ($self->{cache}->read) {
		if (/^\Q$name\E(:?)\s+=>/) { $is_dir = $1 }
		else { $index .= $_ }
	}
	$self->{cache}->write($index, $self->_cache_string($page, $is_dir))
}

sub _cache_string {
	my ($self, $page, $is_dir) = @_;
	unless (ref $page) {
		$is_dir = ($page =~ /:$/);
		$page = $self->get_page($page);
	}
	my @links;
	for (map $$_[1], $page->_list_links) {
		my ($t, $link) = $page->parse_link($_);
		next unless $t eq 'page';
		$link = $page->resolve_link($link);
		push @links, $link->name if $link;
	}
	$self->wipe_array(\@links);
	my $key = $page->name;
	$key .= ':' if $is_dir;
	return $key . ' => ' . join(' ', @links) . "\n" ;
}

sub _list_backlinks {
	my ($self, $page) = @_;
	my $name = $page->name;
	my @links;
	for ($self->{cache}->read) {
		/^(:\S+?):?\s+.*?\=\>.*?\s\Q$name\E\s/ or next;
		push @links, $1;
	}
	return @links;
}

sub _search { # query is a hash ref with options etc
	my ($self, $query, $ns) = @_;
	$ns ||= $self->{namespace};
	warn "Searching: $ns\n";
	
	my $reg = $$query{regex};
	unless ($reg) {
		$reg = quotemeta $$query{string};
		$reg = "\\b".$reg."\\b" if $$query{word};
		$reg = "(?i)".$reg unless $$query{case};
		$reg = qr/$reg/;
		#warn $reg;
		$$query{regex} = $reg;
	}
	
	my @matches;
	for ($self->list_pages($ns)) {
		my $p = $ns.$_;
		my $is_dir = ($p =~ s/:$//);
		my $match = $self->file($p)->grep($reg);
		push @matches, [$p, $match] if $match;
		#warn "$1 matches $match times\n" if $match;
		push @matches, $self->_search($query, $p.':') if $is_dir;
	}
	
	return @matches;
}

sub _match_word {
	my ($self, $page, $word) = @_;
	my $namespace = $page->namespace;
	$word =~ s/[^\w\.\:\-]/_/g;
	my $seen = 0;
	warn "looking up \"$word\" in $namespace\n";
	for ($self->{cache}->read) {
		next unless /^\s*\Q$namespace\E(?i)\Q$word\E(_|:?\s)/;
		if ($1 eq '_') { return 2 }
		elsif ($seen) { return 2 }
		else { $seen = 1 }
	}
	return $seen;
}

=item get_page(PAGE_NAME)

Returns an object of the type L<Zim::Page::Text>.

=cut

sub get_page {
	my ($self, $name, $source) = @_; # source is a private argument

	my $page = Zim::Page::Text->new($self, $name);
	$source ||= $self->file($name); # case sensitive lookup
	$page->set_source($source);
	$page->set_format($self->{format});
	$page->properties->{base} = $source->dir;
	
	unless ($source->exists) {
		$page->{parse_tree} = $self->_template($page);
		$page->status('new');
	}

	return $page;
}

=item C<resolve_page(NAME)>

Like C<get_page()> but with case in-sensitive lookup for a page name.

=cut

sub resolve_page {
	my ($self, $name) = @_;
	my $source = $self->file($name, 1); # case tolerant lookup
	$name = $self->pagename($source->path);
	#warn "Resolved page: $name => $file => $page\n";
	$self->get_page($name, $source);
}

sub _template {
	# FIXME make template configurable
	my ($self, $page) = @_;
	$page->name =~ /([^:]+):*$/;
	my $title = ucfirst($1);
	$title =~ s/_/ /g;
	my $format = $self->root->{date_string} || '%A %d/%m/%Y';
	my $date = Encode::decode($CODESET,
		strftime($format, localtime)   );
	return	['Document', $page->properties,
			['head1', {}, $title],
			['Para',  {empty_lines => 1}, "Created $date\n"]
		];
}

=item copy_page(SOURCE, TARGET)

=cut

sub copy_page {
	my ($self, $old, $new) = @_;
	my $source = $self->file($old);
	my $target = $self->file($new);
	Zim::File->copy($source, $target);
	@$new{'status', 'parse_tree'} = ('', undef);
}

=item move_page(SOURCE, TARGET)

=cut

sub move_page {
	my ($self, $old, $new) = @_;
	
	# Move file
	my $source = $self->file($old);
	my $target = $self->file($new);
	Zim::File->move($source, $target) if $source->exists;

	# Move tree below file
	# FIXME make this optional (?) - think boutupdating links here
	#my $old_tree = $self->dir($old);
	#my $new_tree = $self->dir($new);
	#Zim::File->move($old_tree, $new_tree) if -d $old_tree;
	
	# update objects
	@$old{'status', 'parse_tree'} = ('deleted', undef) if ref $old;
	@$new{'status', 'parse_tree'} = ('', undef)        if ref $new;
}

=item delete_page(PAGE)

=cut

sub delete_page {
	my ($self, $page) = @_;

	my $file = $self->file($page);
	my $dir = $file->dir;
	if ($file->exists) { $file->remove }
	else { # border case where empty dir was left for some reason
		$dir = $self->dir($page);
		Zim::File->remove_dir($dir);
	}
	
	@$page{'status', 'parse_tree'} = ('deleted', undef) if ref $page;
}

=item C<search()>

TODO

=cut

sub search {
	my ($self, $page, $query) = @_;
	
}

=back

=head2 Private methods

=over 4

=item C<file(PAGE, CASE)>

Returns a L<Zim::File> object for a page name.

CASE is a boolean that triggers a case in-sensitive lookup when true.

=item C<dir(PAGE, CASE)>

Returns a dir for a page name. This dir maps to the namespace below this page.

CASE is a boolean that triggers a case in-sensitive lookup when true.

=cut

sub dir {
	my ($self, $page, $case_tolerant) = @_;
	$self->file($page, $case_tolerant, 'DIR');
}

sub file {
	# TODO optimise using File::Glob
	# TODO make file() call dir() instead of the other way
	my ($self, $page, $case_tolerant, $is_dir) = @_;

	if (ref $page) { # page object
		return $page->{source} if defined $page->{source} and ! $is_dir;
		$page = $page->name;
	}
	
	$page =~ s/^:*$self->{namespace}:*//i;
	$page =~ s/:+$//;
	
	my @dirs = split /:+/, $page;
	my $basename = $is_dir ? '' : pop(@dirs) ;

	# Find the right dir
	my $dir = File::Spec->catdir($self->{dir}, @dirs);
	unless (!$case_tolerant_fs and  -d $dir) {
		$dir = $self->{dir};
		for (0 .. $#dirs) {
			my $new_dir = $self->_grep_dir(
				$dir, $dirs[$_], 1, $case_tolerant );
			if (defined $new_dir) { $dir = $new_dir }
			else {
				$dir = File::Spec->catdir($dir, @dirs[$_ .. $#dirs]);
				last;
			}
		}
	}
	
	return $dir if $is_dir;

	# Check the filename
	my $file = $self->_grep_dir($dir, $basename, 0, $case_tolerant);
	if (defined $file) { $file =~ s/(\.$$self{ext})?$/.$$self{ext}/ } # just to be sure
	else { $file = File::Spec->catfile($dir, $basename .'.'.$$self{ext}) }

	#warn "resolved $page to file $file\n";
	return Zim::File->new($file);
}

sub _grep_dir { # find a file or subdir
	my ($self, $dir, $basename, $is_dir, $case_tolerant) = @_;
	return undef unless -d $dir;
	my $extended = $basename . ($is_dir ? '_files' : '.'.$$self{ext});
	my $wrong_type = 0;
	
	unless ($case_tolerant and $case_tolerant_fs) {
		if ($is_dir) {
			my $path = File::Spec->catdir($dir, $basename);
			return $path if -d $path;
			$wrong_type = 1 if -e _;
		}
		else {
			my $path = File::Spec->catfile($dir, $basename);
			return $path if -f $path;
			$wrong_type = 1 if -e _;
		}
	
		my $path = $is_dir
			? File::Spec->catdir($dir, $extended)
			: File::Spec->catfile($dir, $extended) ;
		return $path if $wrong_type or -e $path;
		return undef unless $case_tolerant;
	}
	
	my @check = (lc($basename), lc($extended));
	my $suggestion;
	my $other_type = lc $basename . ($is_dir ? '.'.$$self{ext} : '_files');
	opendir DIR, $dir or die "Could not list dir $dir\n";
	while (my $item = readdir DIR) { 
		# FIXME for case_tolerant_fs it would be nicer to check
		# case sensitive version first in the listing
		eval { $item = Encode::decode_utf8($item, 1) };
		if (grep {$_ eq lc($item)} @check) {
			if ($is_dir) {
				$item = File::Spec->catdir($dir, $item);
				unless (-d $item) {
					$suggestion = $item;
					next;
				}
			}
			else {
				$item = File::Spec->catfile($dir, $item);
				unless (-f $item) {
					$suggestion = $item;
					next;
				}
			}
			closedir DIR;
			return $item;
		}
		elsif (lc($item) eq $other_type) {
			$suggestion = $is_dir
				? File::Spec->catdir($dir, $item)
		       		: File::Spec->catfile($dir, $item) ;
		}
	}
	closedir DIR;

	if ($suggestion) { # for dir a like-named file, for file a like-name dir
		if ($is_dir) { $suggestion =~ s/\.$$self{ext}$//          }
		else         { $suggestion =~ s/(_files)?$/\.$$self{ext}/ }
		return $suggestion;
	}
	
	return undef;
}

=item pagename(FILE)

Returns the page name corresponding to FILE. FILE does not actually
need to exist and can be a directory as well as a file.

=cut

sub pagename {
	my ($self, $file) = @_;
	$file = File::Spec->abs2rel($file, $self->{dir})
		if File::Spec->file_name_is_absolute($file);
	my @parts = map {s/_files$//; $_} grep length($_), File::Spec->splitdir($file);
	$parts[-1] =~ s/\.$$self{ext}$//;
	return $self->{namespace} . join ':', @parts;
}

=item C<get_source(SOURCE, MODE)>

Returns an IO::File object or undef.

=cut

sub get_source {
	my ($self, $source, $mode) = @_;
	
	my $file = $source->path;
	my $cb = undef;
	if ($mode eq 'r') {
		return undef unless -e $file;
		die "File not readable: $file\n" unless -r _;
		$source->{mtime} = $source->stat->{mtime};
	}
	elsif ($mode eq 'w') {
		die "File not writable: $file\n" if -e $file && ! -w _;
		die "File has changed on disk since reading\n"
			if  defined $source->{mtime}
			and $source->{mtime} < $source->stat->{mtime} ;
		$source->make_dir;
		$cb = sub { $source->{mtime} = $source->stat->{mtime} };
	}
	else { die "unknown mode: $mode" }
	
	return $source->open($mode, $cb);
}

# Methods below for filesystem interaction - subtle differences with File::Path

sub _mkdir {
	my ($dir, $file) = @_; # if $file is true $dir is a file
	my ($vol, $dirs) = File::Spec->splitpath($dir, $file ? 0 : 1);
	my @dirs = File::Spec->splitdir($dirs);
	my $path = File::Spec->catpath($vol, shift @dirs);
	mkdir $path or die "Could not create dir $path\n"
		if length $path and ! -d $path;
	while (@dirs) {
		$path = File::Spec->catdir($path, shift @dirs);
		mkdir $path or die "Could not create dir $path\n"
			unless -d $path;
	}
}

sub _rmdir {
	my $dir = shift;
	#print STDERR "rmdir $dir\n";
	rmdir $dir or return; # fails when non-empty
	my ($vol, $dirs) = File::Spec->splitpath($dir, 1);
	my @dirs = File::Spec->splitdir($dirs);
	while (@dirs) {
		pop @dirs;
		$dir = File::Spec->catdir($vol, @dirs);
		#print STDERR "rmdir $dir\n";
		rmdir $dir or last; # fails when non-empty
	}
}

1;

__END__

=back

=head1 BUGS

Please mail the author if you find any bugs.

=head1 AUTHOR

Jaap Karssenberg || Pardus [Larus] E<lt>pardus@cpan.orgE<gt>

Copyright (c) 2005 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

L<Zim>, L<Zim::Page>

=cut
