package Zim;

use strict;
use POSIX qw(strftime);
use File::Spec;
use File::MimeInfo;

our $VERSION = 0.04;

=head1 NAME

Zim - A wiki repository

=head1 SYNOPSIS

	use Zim;
	
	my $zim = Zim->new($root_dir);
	my $page = Zim->load_page('Home');
	while (defined ($_ = $page->read_block)) {
		# render page content
		# ....
	}

=head1 DESCRIPTION

This is developer documentation, for the user manual try
executing C<zim --doc>. For more commandline options see L<zim>(1).

This package contains a set of classes used to represent a
repository of wiki pages. The base repository maps all wiki pages
to text files with a wiki-style markup. But in the interface towards
frontend applications like the L<zim>(1) GUI program an abstract page object
is used to hide the physical filesystem. These page objects contain
the content of their files as a parse tree, so the frontend application
can concentrate at rendring this content in a visualy pleasing way.

=head1 METHODS

=head2 Zim

This class defines the public interface to the repository.
By default it represents filesystem based repositories,
but it can be overloaded for more advanged models.

=head3 Public Methods

The following methods can be used by the GUI.

=over 4

=item new(DIR)

Simple constructor. DIR is the root directory of the repository.

=cut

sub new {
	my ($class, $namespace, $dir) = @_;
	$dir = File::Spec->canonpath($dir);
	my $self = bless {
		namespace => $namespace,
		root      => $dir
	}, $class;
	
	my $conf = File::Spec->catfile($dir, '.zim.config');
	if (-f $conf and -r _) {
		$self->{conffile} = $conf;
		for (split /\n/, Zim::Page::File::_read_file($conf)) {
			/^:(\S+?)=(\S+)/ or next;
			my ($subspace, $mod) = ($1, $2);
			$subspace =~ s/:+$//;
			eval "use $mod;";
			die if $@;
			my $dir = $self->filename($subspace, 'DIR');
			$self->{plugins}{$subspace} = $mod->new($subspace, $dir);
			#print "plug: $mod for $namespace\n";
		}
	}

	return $self;
}

=item list_pages(NAMESPACE)

Lists pages in NAMESPACE. Sub-namespaces have a trailing ':'
in the listing.

=cut

sub list_pages {
	my ($self, $namespace) = @_;
	
	$namespace =~ s/:*$/:/;
	if (my $plug = $self->belongs_to_plugin($namespace)) {
		return $self->{plugins}{$plug}->list_pages($namespace);
	}

	my $dir = $self->filename($namespace, 'DIR');
	#print "list pages $namespace => $dir\n";
	return () unless -d $dir;
	opendir DIR, $dir or die "Could not list dir $dir\n";
	my @items = grep {defined $_} map {
		my $item = File::Spec->catfile($dir, $_);
		if (-d $item) { s/(_files)?$/:/ }
		else {
			if (mimetype($item) =~ /^text/) { s/\.txt$// }
			else { $_ = undef }
		}
		$_;
	} grep {! /^\./} readdir DIR;
	closedir DIR;
	return sort {lc($a) cmp lc($b)} @items;
}

sub belongs_to_plugin {
        my ($self, $page) = @_;
	$page =~ s/^:*$self->{namespace}:*//;
        my ($plug) = grep {$page =~ /^$_:/} keys %{$self->{plugins}};
        return $plug;
}


=item page_exists(PAGE_NAME)

Returns TRUE when there is an existing page under this name.

=cut

sub page_exists {
	my ($self, $name) = @_;
	
	if (my $plug = $self->belongs_to_plugin($name)) {
		return $self->{plugins}{$plug}->page_exists($name);
	}
	
	return -f $self->filename($name);
}

=item load_page(PAGE_NAME)

Returns an object of the type Zim::Page.

=cut

sub load_page {
	my ($self, $name) = @_;

	if (my $plug = $self->belongs_to_plugin($name)) {
		return $self->{plugins}{$plug}->load_page($name);
	}
	
	my $page = Zim::Page::File->new($self, $name);
	#print "page $page => file ".$page->filename."\n";
	if (-f $page->filename) { $page->read_file }
	else {
		$page->push_blocks( $self->_template($name) );
		$page->{status} = 'new';
	}

	return $page;
}

=item save_page(PAGE)

Saves the data for the page object PAGE.

=cut

sub save_page {
	my ($self, $page) = @_;

	if ($page->isa('Zim::Page::File')) {
		$page->write_file;
		$page->{status} = '';
		return 1;
	}
	else {
		warn "Can't save page ".$page->name."\n";
		return 0;
	}
}

=item move_page(SOURCE, TARGET)

Moves a page from SOURCE to TARGET.
SOURCE and TARGET can be either page names or page objects.

=cut

sub move_page {
	my ($self, $source, $target) = @_;

	$source = $self->load_page($source)
		unless ref $source;
	$target = $self->load_page($target)
		unless ref $target;
	
	$source->rewind;
	$target->clear;
	while (defined ($_ = $source->read_block)) {
		$target->push_blocks($_);
	}

	$self->delete_page($source);
	$self->save_page($target);
}

=item delete_page(PAGE)

Deletes a page.
PAGE can be either a page name or a page object.

=cut

sub delete_page {
	my ($self, $page) = @_;

	$page = Zim::Page::File->new($self, $page)
		unless ref $page;

	if ($page->isa('Zim::Page::File')) {
		$page->remove_file
			if -f $page->filename;
		$page->{status} = 'deleted';
	}
	else {
		warn "Can't delete page ".$page->pagename."\n";
	}
}

sub _template {
	# FIXME make template configurable
	my ($self, $name) = @_;
	$name =~ /([^:]+):*$/;
	my $title = $1;
	$title = ucfirst($title) unless $title =~ /[A-Z]/;
	return Zim::Formatter->text2blocks(
		"====== $title ======\n",
		'Created '.strftime('%A %d/%m/%Y %H:%M', localtime)."\n\n"
	);
}

=back

=head3 Private Methods

These methods are specific to filesystem based repositories
and should therefor never be called directly from the GUI.

=over 4

=item filename(NAME, DIR)

Gives the filename corresponding to a page name.
NAME is a string. (This lookup is more difficult then it might seem.)

DIR is a boolean that can be set to tell that NAME is expected to
be a directory. This behaviour is also triggered if the page name
ends with a ':'.

In list context filename, directory and real page name are returned.

=cut

sub filename {
	my ($self, $page, $is_dir) = @_;

	$page =~ s/^:*$self->{namespace}:*//;
	$is_dir++ if $page =~ s/:+$//;
	
	my @dirs = split /:+/, $page;
	my $basename = pop @dirs unless $is_dir;

	# check the dirs
	my @realname;
	my $dir = File::Spec->catdir($self->{root}, @dirs);
	if (-d $dir) { @realname = @dirs }
	else {
		my $path = $self->{root};
		for (0 .. $#dirs) {
			my $casepath = File::Spec->catdir($path, $dirs[$_]);
			if (-e $casepath) { $path = $casepath }
			else {
				$dirs[$_] = lc($dirs[$_]);
				$path = File::Spec->catdir($path, $dirs[$_]);
			}
			push @realname, $dirs[$_];
			$dirs[$_] .= '_files' if -f $path; # existing files
		}
		$dir = File::Spec->catdir($self->{root}, @dirs);
	}

	if ($is_dir) {
		return $dir unless wantarray;
		my $realname = join ':', $self->{namespace}, @realname;
		return $dir, $dir, $realname;
	}

	# check which filename to use
	my $file;
	for ($basename, lc($basename)) {
		$basename = $_;
		$file = File::Spec->catfile($dir, $_);
		last if -f $file;
		$file = "$file.txt";
		last if -f $file;
	}
	push @realname, $basename;
	#print "$page => file: $file, realname: @realname\n";

	return $file unless wantarray;
	my $realname = join ':', $self->{namespace}, @realname;
	return $file, $dir, $realname;
}

=item pagename(FILE)

Returns the page name corresponding to FILE. FILE does not actually
need to exist and can be a directory as well as a file.

=cut

sub pagename {
	my ($self, $file) = @_;
	# $file =~ s/^\Q$self->{root}\E//;
	my @parts = map  {s/_files$//; $_} File::Spec->splitdir($file);
	$parts[-1] =~ s/\.txt$//;
	return join ':', $self->{namespace}, @parts;
}

package Zim::Formatter;

no warnings;

=back

=head2 Zim::Formatter

This is the default parser for Zim.
It uses a wiki-style syntax to format plain text.

=over 4

=item text2blocks(TEXT)

Parses a piece of plain text into an array of data blocks.

=cut

sub text2blocks {
	# split in headers and paragraphs
	# for headers it depends on the source whether they are
	# followed by a empty line or not, for paragraphs (normal,
	# pre, lists, etc.) we always append an extra line break
	#
	# FIXME because undef is returned for empty lines blocks of
	# empty lines disappear
	my ($self, @text) = @_;
	return grep {defined $_} map {
		#print STDERR "#>>$_<<\n";
		(! /\S/)        ? $_                            :
		/^(\t|\s\s)/    ? ($self->parse_pre($_),  ) :
		/\n?^==+\s+\S+/ ? ($self->parse_head($_)       ) :
		                  ($self->parse_block($_), ) ;
	} map {
		split /((?:^\s*\n)+|\n?^==+\s+\S.*\n(?:\s*\n)?)/m,
	} @text;
}

sub parse_head { # parse a header
	my ($self, $head) = @_;
	$head =~ s/^(==+)\s+(.*?)(?:\s+==+|\s*)(\n(?:\s*\n)?)/$2/;
	my $h = 7 - length($1); # =X6 => head1, =X5 => head2 etc.
	my $lb = (length($3) == 1) ? "\n" : "\n\n";
	return ["head$h" => $head], $lb;
}

sub parse_pre { # parse pre formated paragraphs
	my ($self, $pre) = @_;
	my ($indent) = ($pre =~ /^(\s+)/);
	$pre =~ s/^$indent//mg;
	return ['pre' => $pre];
}

our @parser_subs = qw/parse_image parse_link parse_styles/;

sub parse_block { # parse a block of text
	my ($self, @text) = @_;
	for my $sub (@parser_subs) {
		@text = map {ref($_) ? $_ : ($self->$sub($_))} @text;
	}
	return @text;
}

sub parse_image {
	my ($self, $text) = @_;
	my $i = 0;
	return map {
		($i++ % 2) ? [image => $_] : $_
	} split /\{\{(.+?)\}\}/, $text;
}

sub parse_link { # parse links and urls
	my ($self, $text) = @_;
	my $i = 0;
	return map {
		if ($i++ % 2) {
			if (s/^\[\[(.+)\]\]$/$1/) {
				if (/^(.*)\|(.*)$/) { [link => $2, $1] }
				else                { [link => $_]     }
			}
			elsif (/^\w+:\/\//)  { [link => $_ ]             }
			elsif (s/^mailto://) { [link => $_, "mailto:$_"] }
			elsif (/@/)	     { [link => $_, "mailto:$_"] }
			else                 { [link => $_ ]             }
		} else { $_ }
	} split /(
		\[\[.+?\]\]                |
		\b\w+:\/\/\S+[^\s\,\.\;\)]   |
		\b(?:mailto:)?\S+@\S+\.\w+
	)/x, $text;
}

sub parse_styles { # parse blocks of bold, italic and underline
	my ($self, $text) = @_;
	my $i = 0;
	return	map {
		unless ($i++ % 2) { $_ }
		elsif (/^\'\'(.+)\'\'$/) { [pre       => $1] }
		elsif (/^\*\*(.+)\*\*$/) { [bold      => $1] }
		elsif (/^\/\/(.+)\/\/$/) { [italic    => $1] }
		elsif (/^\_\_(.+)\_\_$/) { [underline => $1] }
	} split /(?<!\S)(
		\'\'.+?\'\' |
		\*\*.+?\*\* |
		\/\/.+?\/\/ |
		__.*?__
	)(?!\S)/x, $text;
}

=item blocks2text(BLOCK, BLOCK, ..)

Serialises data blocks into a piece of plain text.

=cut

sub blocks2text { ## unknown tags just fall through as plain text
	my ($self, @blocks) = @_;
	my $text;
	while (@blocks) {
		#use Data::Dumper; print Dumper $blocks[0];
		unless (ref $blocks[0]) {
			$text .= shift @blocks;
			next;
		}
		my ($tag, @node) = @{ shift @blocks };
		if ($tag eq 'link') { 
			@node = (@node > 1) ? (
					($node[1] =~ s/^mailto://)
						? $node[1]
						: "[[$node[1]|$node[0]]]" ) : 
				($node[0] =~ /^\w+:\/\//)
					? ($node[0]) : ("[[$node[0]]]") ;
		}
		elsif ($tag eq 'pre')  {
			for (@node) {
				if (/\n/) {  s/^/    /mg  }
				else      { $_ = "''$_''" }
			}
		}
		elsif ($tag =~ /^head(\d)$/) {
			my $n = 7 - $1;
			@node = map {('='x$n)." $_ ".('='x$n)} @node;
		}
		elsif ($tag eq 'bold')      { $_ = "**$_**"  for @node }
		elsif ($tag eq 'italic')    { $_ = "//$_//"  for @node }
		elsif ($tag eq 'underline') { $_ = "__$_\__" for @node }
		$text .= join '', @node;
	}
	$text =~ s/\n?$/\n/; # unix files should always end with \n
	return $text;
}

package Zim::Page;

=back

=head2 Zim::Page

This class defines the public interface for page objects.

=over 4

=item new(PARENT, NAME)

Simple constructor. PARENT should be a repository object of class Zim.
NAME is the page name for this object.

=cut

sub new {
	my ($class, $parent, $name) = @_;
	$name =~ s/^:+|:+$//g;
	bless {
		zim => $parent,
		pagename => $name,
		iter => 0,
		data => [],
	}, $class ;
}

=item name( )

Returns the name of the page.

=cut

sub name { $_[0]->{pagename} }

=item realname( )

Returns a name for the current page that can be used for comparing
page objects. By default the real name is the same as the page name,
but for example for file based pages where a case insensitive lookup
is done these will differ.

=cut

sub realname { $_[0]->{pagename} }

=item basename( )

Returns the last part of the page name.

=cut

sub basename {
	my $name = $_[0]->{pagename};
	$name =~ s/^(.*:+)//;
	return $name;
}

=item namespace( )

Returns the first part of the page name.
Including a trailing ':'.

=cut

sub namespace {
	my $name = $_[0]->{pagename};
	$name =~ /^(.*:+)/;
	return defined($1) ? $1 : ':';
}

=item status(STRING)

Set or get a status string for this page.
Typical status strings are 'new' and 'deleted'.

=cut

sub status {
	my ($self, $status) = @_;
	$self->{status} = $status if defined $status;
	return $self->{status};
}

=item resolv_link(LINK)

Returns a page name for a page linked from this page.
This method allows you to have relative links.

=cut

sub resolve_link {
	my ($self, $link) = @_;
	$link = ($link =~ s/^\.//) ? $self->name.':'.$link : # sub namespace
		($link !~ /:/) ? $link = $self->namespace.$link : # relative link
		$link ;
	return $link;
}

=item read_block( )

Returns one block of data or undef on the end of the list.
Always use this method to access the data in a page.

=cut

sub read_block {
	my $self = shift;
	if ($self->{iter} >= @{$self->{data}}) {
		$self->{iter} = 0;
		return undef;
	}
	return $self->{data}[ $self->{iter}++ ];
}

=item rewind( )

Resets the iterator for L<read_block( )>.

=cut

sub rewind { $_[0]->{iter} = 0 }

=item push_blocks(BLOCK, BLOCK, ...)

Append more data to this page.

=cut

sub push_blocks {
	my $self = shift;
	push @{$self->{data}}, @_;
}

=item push_block(TAG, CONTENT)

Append one block of data to this page.

=cut

sub push_block {
	my ($self, $tag, @node) = @_;
	if ($tag eq 'normal') {
		push @{$self->{data}}, @node;
	}
	else {
		push @{$self->{data}}, [$tag, @node];
	}
}

=item unshift_block(TAG, CONTENT)

Prepend a block of data to this page.

=cut

sub unshift_block {
	my ($self, $tag, @node) = @_;
	if ($tag eq 'normal') {
		unshift @{$self->{data}}, @node;
	}
	else {
		unshift @{$self->{data}}, [$tag, @node];
	}
}

=item clean( )

Logical cleanup of the data tree.

=cut

sub clean {
	my $self = shift;
	for my $i (1 .. $#{$self->{data}}) {
		if (ref $self->{data}[$i]) {
			if ($self->{data}[$i][0] eq 'normal') {
				my (undef, @text) = @{$self->{data}[$i]};
				$self->{data}[$i] = join '', @text;
			}
			else { next }
		}
		
		next if ref $self->{data}[$i-1];
		$self->{data}[$i] = $self->{data}[$i-1] . $self->{data}[$i];
		$self->{data}[$i-1] = undef;
	}
	@{$self->{data}} = grep defined($_), @{$self->{data}};
}

=item clear( )

Clear all data from this page.

=cut

sub clear {
	$_[0]->{data} = [];
	$_[0]->{iter} = 0;
}

=item save( )

=item delete( )

=item move( )

The methods C<save()>, C<delete()> and C<move()> are aliases
for the methods C<save_page()>, C<delete_page()> and C<move_page()>
in the repository object. See class Zim.

=cut

sub save { $_[0]->{zim}->save_page(@_) }

sub delete { $_[0]->{zim}->delete_page(@_) }

sub move { $_[0]->{zim}->move_page(@_) }

package Zim::Page::File;

=back

=head2 Zim::Page::File;

This class defines a private interface for page objects that
are stored in plain text files. It is used by the class Zim.
These methods should never be called directly from the GUI.

This class derives from Zim::Page.

=over 4

=cut

# Overloaded methods don't need doc

our @ISA = 'Zim::Page';

sub new {
	my ($class, $parent, $name) = @_;
	$name =~ s/^:+|:+$//g;
	die "Can't create $class object without a name!\n"
		unless length $name;
	my $self = bless {
		zim => $parent,
		pagename => $name,
		iter => 0,
		data => [],
	}, $class ;
	@{$self}{qw/filename dirname realname/} =
       		$parent->filename($name);
	return $self;
}

sub realname { return $_[0]->{realname} }

=item filename( )

Returns the filename for the current object.

=cut

sub filename {
	return wantarray 
		? ( @{$_[0]}{qw/filename dirname/} )
		: $_[0]->{filename} ;
}

=item read_file( )

Reads content from the file corresponding to this page and calls
Zim::Formatter to parse the text.

=cut

sub read_file {
	my $self = shift;
	return unless -f $self->{filename};
	$self->{data} = [
		Zim::Formatter->text2blocks( _read_file($self->{filename}) )
	];
}

=item write_file( )

Calls Zim::Formatter to convert the data structure to plain text and
writes this to the corresponding text file.

=cut

sub write_file {
	my $self = shift;
	_mkdir( $self->{dirname} ) unless -d $self->{dirname};
	_write_file( $self->{filename},
		Zim::Formatter->blocks2text( @{ $self->{data} } )
	);
}

=item remove_file( )

Removes the current file (effectively deleting the page) and
also the current directory if it is empty.

=cut

sub remove_file {
	my $self = shift;
	unlink $self->{filename}
		or die "Could not remove file $self->{filename}\n";
	_rmdir($self->{dirname});
}

=back

The following functions are used by the methods. These are not
methods themselfs and should be called without object reference.

=over 4

=item _read_file(FILE)

Returns the contents of a file.

=cut

sub _read_file {
	my $file = shift;
	open TXT, $file or die "Could not read $file\n";
	binmode TXT, ':utf8' unless $] < 5.008;
	my $text = join '', <TXT>;
	close TXT;
	return $text;
}

=item _write_file(FILE, TEXT)

Writes file contents.

=cut

sub _write_file {
	my $file = shift;
	open TXT, ">$file" or die "Could not write $file\n";
	binmode TXT, ':utf8' unless $] < 5.008;
	print TXT @_;
	close TXT or die "Could not write $file\n";
}

=item _mkdir(DIR)

Creates the complete path for DIR.

=cut

sub _mkdir {
	my ($vol, $dirs) = File::Spec->splitpath(shift(@_), 'NO_FILE');
	my @dirs = File::Spec->splitdir($dirs);
	my $path = File::Spec->catpath($vol, shift @dirs);
	mkdir $path or die "Could not create dir $path\n"
		if length $path and ! -d $path;
	while (@dirs) {
		$path = File::Spec->catdir($path, shift @dirs);
		mkdir $path or die "Could not create dir $path\n"
			unless -d $path;
	}
}

=item _rmdir(DIR)

Removes DIR and parent directories if empty.

=cut

sub _rmdir {
	my ($vol, $dirs) = File::Spec->splitpath(shift(@_), 'NO_FILE');
	my @dirs = File::Spec->splitdir($dirs);
	my $path = File::Spec->catpath($vol, shift @dirs);
	rmdir $path; # fails when not empty
	while (@dirs) {
		$path = File::Spec->catdir($path, shift @dirs);
		last unless -d $path;
		rmdir $path; # fails when not empty
	}
}

1;

__END__

=back

=head1 DATA

Zim uses a simple data structure to represent a page with markup.
This data representation is used by the GUI program so that the 
program is independent of the data format used for the actual text
files. (However for some tasks, like the copying and pasting of text,
the GUI program may use Zim::Formatter directly.)

To illustrate the data format the following example gives the 
data structure for a piece of POD documentation:

Text:

	=head1 NAME
	
	Foo - supplement to Bar
	
	=head1 SYNOPSIS
	
		use Foo;
		Foo->bar;
	
	=head1 SEE ALSO
	
	L<Bar>

Parses to:

	[head1 => 'NAME'],
	"\n\nFoo - supplement to Bar\n\n",
	[head1 => 'SYNOPSIS'],
	"\n\n",
	[pre   => "use Foo;\nFoo->bar;\n"], # indenting removed
	"\n",
	[head1 => 'SEE ALSO'],
	"\n\n",
	[link  => 'Bar', 'Bar'],  # link text and link target
	"\n"

FIXME a more complicated structure may be needed when we want to
support for example tables.

=head1 BUGS

Please mail the author if you find any bugs.

=head1 AUTHOR

Jaap Karssenberg || Pardus [Larus] E<lt>pardus@cpan.orgE<gt>

Copyright (c) 2005 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

L<zim>(1)

=cut
