package Zim::Formats::Wiki;

use strict;
no warnings;

our $VERSION = '0.10';

# TODO: some tags can be nested: email links for example
# TODO: header in the middle of a para doesn't get recognized


=head1 NAME

Zim::Formats::Wiki - Wiki text parser

=head1 DESCRIPTION

This is the default parser for Zim.
It uses a wiki-style syntax to format plain text.

FIXME more verbose description

=head1 METHODS

=over 4

=item load_tree(IO, PAGE)

Reads plain text from a filehandle and returns a parse tree.

=cut

sub load_tree { # TODO whitelines between verbatim blocks should be preserved
	my ($class, $io, $page) = @_;

	my @tree;
	my $para = '';
	while (<$io>) {
		if (!/\S/) {
			push @tree, $class->parse_para($para) if $para =~ /\S/;
			$tree[-1][1]{empty_lines}++ if scalar @tree;
			$para = '';
		}
		elsif (/^==+\s+\S+/) {
			push @tree, $class->parse_para($para) if $para =~ /\S/;
			push @tree, $class->parse_head($_);
			$para = '';
		}
		else { $para .= $_ }
	}
	push @tree, $class->parse_para($para) if $para =~ /\S/;
	
	#use Data::Dumper; print STDERR Dumper \@tree;
	return ['Document', {}, @tree];
}

sub parse_head { # parse a header
	my ($class, $head) = @_;
	chomp $head;
	$head =~ s/^(==+)\s+(.*?)(\s+==+|\s*)$/$2/;
	my $level = 7 - length($1); # =X6 => head1, =X5 => head2 etc.
	return ['head'.$level, {}, $head];
}

sub parse_para {
	my ($class, $text) = @_;
	return ( ($text =~ /^(?!\t|\s\s+)/s) # one tab or two spaces
		? ['Para', {}, $class->parse_block($text)]
		: $class->parse_verbatim($text)            );
}

sub parse_verbatim { # parse pre formated paragraphs
	my ($class, $pre) = @_;
	my ($indent) = ($pre =~ /^(\s+)/);
	$pre =~ s/^$indent//mg;
	return ['Verbatim', {}, $pre];
}

our @parser_subs = qw/parse_link parse_styles parse_image/;

sub parse_block { # parse a block of text
	my ($class, @text) = @_;
	for my $sub (@parser_subs) {
		@text = map {ref($_) ? $_ : ($class->$sub($_))} @text;
	}
	return @text;
}

sub parse_image {
	my ($class, $text) = @_;
	my $i = 0;
	return map {
		($i++ % 2) ? ['image', {src => $_}] : $_
	} split /\{\{(.+?)\}\}/, $text;
}

sub parse_link { # parse links and urls
	my ($class, $text) = @_;
	my $i = 0;
	return map {
		if ($i++ % 2) {
			my $text;
			if (s/^\[\[(.+)\|(.+)\]\]$/$1/) { $text = $2 }
			else { s/^\[\[(.+)\]\]$/$1/ }
			
			if (/^mailto:|^\S+\@\S+\.\w+$/) {
				$text = $_ unless length $text;
				$_ =~ s/^(mailto:)?/mailto:/;
			}
			
			['link', {to => $_}, length($text) ? $text : $_ ]
		} else { $_ }
	} split /(
		\[\[.+?\]\]                              |
		\b\w+:\/\/     \S*\[\S+\](?:\S+[\w\/])?  |
		\b\w+:\/\/     \S+[\w\/]                 |
		\bmailto:\S+\@ \S*\[\S+\](?:\S+[\w\/])?  |
		\bmailto:\S+\@ \S+[\w\/]                 |
		\b\S+\@\S+\.\w+\b
	)/x, $text;
		# The host name in an uri can be "[hex:hex:..]" for ipv6
		# but we do not want to match "[http://foo.org]"
		# See rfc3986 for the official -but unpractical- regex
}

sub parse_styles { # parse blocks of bold, italic and underline
	my ($class, $text) = @_;
	my $i = 0;
	return	map {
		unless ($i++ % 2) { $_ }
		elsif (/^\'\'(.+)\'\'$/) { ['verbatim',  {}, $1] }
		elsif (/^\*\*(.+)\*\*$/) { ['bold',      {}, $1] }
		elsif (/^\/\/(.+)\/\/$/) { ['italic',    {}, $1] }
		elsif (/^\_\_(.+)\_\_$/) { ['underline', {}, $1] }
	} split /(
		\'\'.+?\'\' |
		\*\*.+?\*\* |
		(?<!\:)\/\/.+?\/\/ |
		__.+?__
	)/x, $text;
}

=item save_tree(IO, TREE, PAGE)

Serializes the parse tree into a piece of plain text and writes this
to a filehandle.

=cut

sub save_tree {
	# TODO add support for recursive tags
	my ($class, $io, $tree) = @_;

	my $old_fh = select $io;
	eval { $class->_save_tree($tree) };
	select $old_fh;
	die $@ if $@;
}

sub _save_tree {
	my ($class, $tree) = @_;
	
	my ($name, $opt) = splice @$tree, 0, 2;
	die "Invalid parse tree"
		unless length $name and ref($opt) eq 'HASH';

	while (@$tree) {
		my $node = shift @$tree;
		unless (ref $node) {
			print $node;
			next;
		}
		
		my ($tag, $meta, @node) = @$node;
		if ($tag eq 'Para') {
			$class->_save_tree($node);
		}
		elsif ($tag eq 'Verbatim')  {
			s/^/    /mg for @node;
			print @node;
		}
		elsif ($tag =~ /^head(\d)$/) {
			my $n = 7 - $1;
			print ( ('='x$n)." $node[0] ".('='x$n)."\n" );
		}
		elsif ($tag eq 'image') {
			print '{{'.$$meta{src}.'}}';
		}
		elsif ($tag eq 'link') {
			my $to = $$meta{to};
			$to =~ s/^mailto:// unless $node[0] =~ /^mailto:/;
			print (
				($to ne $node[0]) ? "[[$to|$node[0]]]" :
				($to =~ m#^\w+://|^(?:mailto:)?\S+\@\S+\.\w+#)
					? $to : "[[$to]]" );
		}
		# per line markup for remaining tags ...
		elsif ($tag eq 'bold') {
			print map { /\S/ ? "**$_**" : $_} split /(\n)/, $node[0]
		}
		elsif ($tag eq 'italic') {
			print map { /\S/ ? "//$_//" : $_} split /(\n)/, $node[0]
		}
		elsif ($tag eq 'underline') {
			print map { /\S/ ? "__$_\__" : $_} split /(\n)/, $node[0]
		}
		elsif ($tag eq 'verbatim') {
			print map { /\S/ ? "''$_''" : $_} split /(\n)/, $node[0]
		}
		else { die "Unkown tag in wiki parse tree: $tag\n" }
		
		print "\n"x$$meta{empty_lines} if $$meta{empty_lines};
	}
}

1;

__END__

=back

=head1 AUTHOR

Jaap Karssenberg (Pardus) E<lt>pardus@cpan.orgE<gt>

Copyright (c) 2005 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

L<Zim>,
L<Zim::Repository>,
L<Zim::Page>

=cut

