package Zim::Formats::Wiki;

use strict;
no warnings;

our $VERSION = 0.08;

# TODO: some tags can be nested: email links for example
# TODO: header in the middle of a para doesn't get recognized


=head1 NAME

Zim::Formats::Wiki - Wiki text parser

=head1 SYNOPSIS

	use Zim::Formats::Wiki;
	
	my $parsetree = Zim::Formats::Wiki->load_tree($text);
	# ... modify the parse tree ...
	$text = Zim::Formats::Wiki->save_tree($parsetree);

=head1 DESCRIPTION

This is the default parser for Zim.
It uses a wiki-style syntax to format plain text.

FIXME more verbose description

=head1 METHODS

=over 4

=item load_tree(TEXT, PAGE)

Parses a piece of plain text into a parse tree.
Returns the parse tree.

=cut

sub _extension { return 'txt' }

sub load_tree {
	# split in headers and paragraphs
	# for headers it depends on the source whether they are
	# followed by a empty line or not, for paragraphs (normal,
	# pre, lists, etc.) we always append an extra line break
	#
	# FIXME add line count
	# FIXME add empty lines after para in meta data
	my ($class, $text) = @_;
	my @tree = grep {length $_} map {
		#print STDERR "#>>$_<<\n";
		(! /\S/)        ? $_                             :
		/^(\t|\s\s)/    ? ($class->parse_verbatim($_), ) :
		/\n?^==+\s+\S+/ ? ($class->parse_head($_)      ) :
		                  ($class->parse_para($_),     ) ;
	} split /((?:^\s*\n)+|\n?^==+\s+\S.*\n(?:\s*\n)?)/m, $text;
	return ['Document', {}, @tree];
}

sub parse_head { # parse a header
	my ($class, $head) = @_;
	$head =~ s/^(==+)\s+(.*?)(?:\s+==+|\s*)(\n(?:\s*\n)?)/$2/;
	my $h = 7 - length($1); # =X6 => head1, =X5 => head2 etc.
	my $lb = (length($3) == 1) ? "\n" : "\n\n";
	return ["head$h", {}, $head], $lb;
}

sub parse_verbatim { # parse pre formated paragraphs
	my ($class, $pre) = @_;
	my ($indent) = ($pre =~ /^(\s+)/);
	$pre =~ s/^$indent//mg;
	return ['Verbatim', {}, $pre];
}

our @parser_subs = qw/parse_link parse_styles parse_image/;

sub parse_para {
	my ($class, $text) = @_;
	return ['Para', {}, $class->parse_block($text)];
}

sub parse_block { # parse a block of text
	my ($class, @text) = @_;
	for my $sub (@parser_subs) {
		@text = map {ref($_) ? $_ : ($class->$sub($_))} @text;
	}
	return @text;
}

sub parse_image {
	my ($class, $text) = @_;
	my $i = 0;
	return map {
		($i++ % 2) ? ['image', {src => $_}] : $_
	} split /\{\{(.+?)\}\}/, $text;
}

sub parse_link { # parse links and urls
	my ($class, $text) = @_;
	my $i = 0;
	return map {
		if ($i++ % 2) {
			my $text;
			if (s/^\[\[(.+)\|(.+)\]\]$/$1/) { $text = $2 }
			else { s/^\[\[(.+)\]\]$/$1/ }
			
			if (s/^mailto:// or /\@\S+\.\w+$/) {
				$text = $_ unless length $text;
				$_ = "mailto:$_";
			}
			
			['link', {to => $_}, length($text) ? $text : $_ ]
		} else { $_ }
	} split /(
		\[\[.+?\]\]                |
		\b\w+:\/\/\S+[^\s\,\.\;\)] |
		\b(?:mailto:)?\S+\@\S+\.\w+
	)/x, $text;
}

sub parse_styles { # parse blocks of bold, italic and underline
	my ($class, $text) = @_;
	my $i = 0;
	return	map {
		unless ($i++ % 2) { $_ }
		elsif (/^\'\'(.+)\'\'$/) { ['verbatim',  {}, $1] }
		elsif (/^\*\*(.+)\*\*$/) { ['bold',      {}, $1] }
		elsif (/^\/\/(.+)\/\/$/) { ['italic',    {}, $1] }
		elsif (/^\_\_(.+)\_\_$/) { ['underline', {}, $1] }
	} split /(?<!\S)(
		\'\'.+?\'\' |
		\*\*.+?\*\* |
		\/\/.+?\/\/ |
		__.*?__
	)(?![^\s\,\.\;\)])/x, $text;
}

=item save_tree(TREE, PAGE)

Serializes the parse tree into a piece of plain text.
Returns the text.

=cut

sub save_tree { ## unknown tags just fall through as plain text
	# TODO add support for recursive tags
	my ($class, $tree) = @_;
	my ($name, $opt) = splice @$tree, 0, 2;
	die "Invalid parse tree"
		unless length $name and ref($opt) eq 'HASH';

	my $text;
	while (@$tree) {
		my $node = shift @$tree;
		unless (ref $node) {
			$text .= $node;
			next;
		}
		my ($tag, $meta, @node) = @$node;
		if ($tag eq 'Para') {
			$text .= $class->save_tree($node);
		}
		elsif ($tag eq 'Verbatim')  {
			s/^/    /mg for @node;
			$text .= join '', @node;
		}
		elsif ($tag =~ /^head(\d)$/) {
			my $n = 7 - $1;
			$text .= ('='x$n)." $node[0] ".('='x$n);
		}
		elsif ($tag eq 'image') {
			$text .= '{{'.$$meta{src}.'}}';
		}
		elsif ($tag eq 'link') {
			my $to = $$meta{to};
			$to =~ s/^mailto://;
			if ($to ne $node[0]) { $text .= "[[$to|$node[0]]]" }
			elsif ($to =~ m#^\w+://#) { $text .= $to }
			else { $text .= "[[$to]]" }
		}
		elsif ($tag eq 'bold')      { $text .= "**$node[0]**"  }
		elsif ($tag eq 'italic')    { $text .= "//$node[0]//"  }
		elsif ($tag eq 'underline') { $text .= "__$node[0]\__" }
		elsif ($tag eq 'verbatim')  { $text .= "''$node[0]''"  }
	}
	$text =~ s/\n?$/\n/; # unix files should always end with \n
	return $text;
}

1;

__END__

=back

=head1 AUTHOR

Jaap Karssenberg (Pardus) E<lt>pardus@cpan.orgE<gt>

Copyright (c) 2005 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 SEE ALSO

L<Zim>,
L<Zim::Repository>,
L<Zim::Page>

=cut

