source: cgi-wiki-formatter-usemod/trunk/lib/CGI/Wiki/Formatter/UseMod.pm @ 130

Last change on this file since 130 was 130, checked in by kake, 18 years ago

Macro doc clarification.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 15.3 KB
Line 
1package CGI::Wiki::Formatter::UseMod;
2
3use strict;
4
5use vars qw( $VERSION @_links_found );
6$VERSION = '0.11';
7
8use URI::Escape;
9use Text::WikiFormat as => 'wikiformat';
10use HTML::PullParser;
11use URI::Find::Delimited;
12
13=head1 NAME
14
15CGI::Wiki::Formatter::UseMod - UseModWiki-style formatting for CGI::Wiki
16
17=head1 DESCRIPTION
18
19A formatter backend for L<CGI::Wiki> that supports UseMod-style formatting.
20
21=head1 SYNOPSIS
22
23  use CGI::Wiki::Formatter::UseMod;
24
25  # Instantiate - see below for parameter details.
26  my $formatter = CGI::Wiki::Formatter::UseMod->new( %config );
27
28  # Format some text.
29  my $cooked = $formatter->format($raw);
30
31  # Find out which other nodes that text would link to.
32  my @links_to = $formatter->find_internal_links($raw);
33
34=head1 METHODS
35
36=over 4
37
38=item B<new>
39
40  my $formatter = CGI::Wiki::Formatter::UseMod->new(
41                 extended_links      => 0, # $FreeLinks
42                 implicit_links      => 1, # $WikiLinks
43                 force_ucfirst_nodes => 1, # $FreeUpper
44                 use_headings        => 1, # $UseHeadings
45                 allowed_tags        => [qw(b i)], # defaults to none
46                 macros              => {},
47                 node_prefix         => 'wiki.pl?',
48                 node_suffix         => '',
49                 edit_prefix         => 'wiki.pl?action=edit;id=',
50                 edit_suffix         => '',
51                 munge_urls          => 0,
52  );
53
54Parameters will default to the values shown above (apart from
55C<allowed_tags>, which defaults to allowing no tags).
56
57=over 4
58
59=item B<Internal links>
60
61C<node_prefix>, C<node_suffix>, C<edit_prefix> and C<edit_suffix>
62allow you to control the URLs generated for links to other wiki pages.
63So for example with the defaults given above, a link to the Home node
64will have the URL C<wiki.pl?Home> and a link to the edit form for the
65Home node will have the URL C<wiki.pl?action=edit;id=Home>
66
67(Note that of course the URLs that you wish to have generated will
68depend on how your wiki application processes its CGI parameters - you
69can't just put random stuff in there and hope it works!)
70
71=item B<Internal links - advanced options>
72
73If you wish to have greater control over the links, you may use the
74C<munge_node_name> parameter.  The value of this should be a
75subroutine reference.  This sub will be called on each internal link
76after all other formatting and munging I<except> URL escaping has been
77applied.  It will be passed the node name as its first parameter and
78should return a node name.  Note that this will affect the URLs of
79internal links, but not the link text.
80
81Example:
82
83  # The formatter munges links so node names are ucfirst.
84  # Ensure 'state51' always appears in lower case in node names.
85  munge_node_name => sub {
86                         my $node_name = shift;
87                         $node_name =~ s/State51/state51/g;
88                         return $node_name;
89                     }
90
91B<Note:> This is I<advanced> usage and you should only do it if you
92I<really> know what you're doing.  Consider in particular whether and
93how your munged nodes are going to be treated by C<retrieve_node>.
94
95=item B<URL munging>
96
97If you set C<munge_urls> to true, then your URLs will be more
98user-friendly, for example
99
100  http://example.com/wiki.cgi?Mailing_List_Managers
101
102rather than
103
104  http://example.com/wiki.cgi?Mailing%20List%20Managers
105
106The former behaviour is the actual UseMod behaviour, but requires a
107little fiddling about in your code (see C<node_name_to_node_param>),
108so the default is to B<not> munge URLs.
109
110=item B<Macros>
111
112Be aware that macros are processed I<after> filtering out disallowed
113HTML tags and I<before> transforming from wiki markup into HTML.  They
114are also not called in any particular order.
115
116The keys of macros should be either regexes or strings. The values can
117be strings, or, if the corresponding key is a regex, can be coderefs.
118The coderef will be called with the first nine substrings captured by
119the regex as arguments. I would like to call it with all captured
120substrings but apparently this is complicated.
121
122=back
123
124Macro examples:
125
126  macros => {
127
128      '@SEARCHBOX' =>
129                qq(<form action="wiki.pl" method="get">
130                   <input type="hidden" name="action" value="search">
131                   <input type="text" size="20" name="terms">
132                   <input type="submit"></form>),
133
134      qr/\@INDEX\s+\[Category\s+([^\]]+)]/ =>
135            sub { return "{an index of things in category $_[0]}" }
136
137  }
138
139=cut
140
141sub new {
142    my ($class, @args) = @_;
143    my $self = {};
144    bless $self, $class;
145    $self->_init(@args) or return undef;
146    return $self;
147}
148
149sub _init {
150    my ($self, %args) = @_;
151
152    # Store the parameters or their defaults.
153    my %defs = ( extended_links      => 0,
154                 implicit_links      => 1,
155                 force_ucfirst_nodes => 1,
156                 use_headings        => 1,
157                 allowed_tags        => [],
158                 macros              => {},
159                 node_prefix         => 'wiki.pl?',
160                 node_suffix         => '',
161                 edit_prefix         => 'wiki.pl?action=edit;id=',
162                 edit_suffix         => '',
163                 munge_urls          => 0,
164                 munge_node_name     => undef,
165               );
166
167    my %collated = (%defs, %args);
168    foreach my $k (keys %defs) {
169        $self->{"_".$k} = $collated{$k};
170    }
171
172    return $self;
173}
174
175=item B<format>
176
177  my $html = $formatter->format($submitted_content, $wiki);
178
179Escapes any tags which weren't specified as allowed on creation, then
180interpolates any macros, then translates the raw Wiki language
181supplied into HTML.
182
183A L<CGI::Wiki> object can be supplied as an optional second parameter.
184This object will be used to determine whether a linked-to node exists
185or not, and alter the presentation of the link accordingly. This is
186only really in here for use when this method is being called from
187within L<CGI::Wiki>.
188
189=cut
190
191sub format {
192    my ($self, $raw, $wiki) = @_;
193    $raw =~ s/\r\n/\n/sg; # CGI newline is \r\n not \n
194    my $safe = "";
195
196    my %allowed = map {lc($_) => 1, "/".lc($_) => 1} @{$self->{_allowed_tags}};
197
198    # Parse the HTML - even if we're not allowing any tags, because we're
199    # using a custom escaping routine rather than CGI.pm
200    my $parser = HTML::PullParser->new(doc   => $raw,
201                                       start => '"TAG", tag, text',
202                                       end   => '"TAG", tag, text',
203                                       text  => '"TEXT", tag, text');
204    while (my $token = $parser->get_token) {
205        my ($flag, $tag, $text) = @$token;
206        if ($flag eq "TAG" and !defined $allowed{lc($tag)}) {
207            $safe .= $self->_escape_HTML($text);
208        } else {
209            $safe .= $text;
210        }
211    }
212
213    # Now do any inline links.
214    my $finder = URI::Find::Delimited->new( ignore_quoted => 1 );
215    $finder->find(\$safe);
216
217    # Now process any macros.
218    my %macros = %{$self->{_macros}};
219    foreach my $key (keys %macros) {
220        my $value = $macros{$key};
221        if ( ref $value && ref $value eq 'CODE' ) {
222            $safe =~ s/$key/$value->($1, $2, $3, $4, $5, $6, $7, $8, $9)/eg;
223        } else {
224          $safe =~ s/$key/$value/g;
225        }
226    }
227
228    # Finally set up config and call Text::WikiFormat.
229    my %format_opts = ( extended       => $self->{_extended_links},
230                        prefix         => $self->{_node_prefix},
231                        implicit_links => $self->{_implicit_links} );
232
233    my %format_tags = (
234        # chromatic made most of the regex below.  I will document it when
235        # I understand it properly.
236        indent   => qr/^(?:\t+|\s{4,}|\s*\*?(?=\**\*+))/,
237        newline => "", # avoid bogus <br />
238        paragraph       => [ "<p>", "</p>\n", "", "\n", 1 ], # no bogus <br />
239        extended_link_delimiters => [ '[[', ']]' ],
240        blocks                   => {
241                         ordered         => qr/^\s*([\d]+)\.\s*/,
242                         unordered       => qr/^\s*\*\s*/,
243                         definition      => qr/^:\s*/
244                                    },
245        definition               => [ "<dl>\n", "</dl>\n", "<dt><dd>", "\n" ],
246        # we don't label unordered lists as "not indented" so we can nest them.
247        indented   => {
248                        definition => 0,
249                        ordered    => 0
250                       }, 
251        blockorder => [ qw( header line ordered unordered code definition paragraph )],
252        nests      => { map { $_ => 1} qw( ordered unordered ) },
253        link                     => sub {
254            my ($link, $opts) = @_;
255            $opts ||= {};
256
257            my $title;
258            ($link, $title) = split(/\|/, $link, 2) if $opts->{extended};
259            $title =~ s/^\s*// if $title; # strip leading whitespace
260            $title ||= $link;
261
262            if ( $self->{_force_ucfirst_nodes} ) {
263                $link = $self->_do_freeupper($link);
264            }
265            $link = $self->_munge_spaces($link);
266
267            $link = $self->{_munge_node_name}($link)
268              if $self->{_munge_node_name};
269
270            my $editlink_not_link = 0;
271            # See whether the linked-to node exists, if we can.
272            if ( $wiki && !$wiki->node_exists( $link ) ) {
273                $editlink_not_link = 1;
274            }
275
276            $link =~ s/ /_/g if $self->{_munge_urls};
277
278            $link = uri_escape( $link );
279
280            if ( $editlink_not_link ) {
281                my $prefix = $self->{_edit_prefix};
282                my $suffix = $self->{_edit_suffix};
283                return qq|[$title]<a href="$prefix$link$suffix">?</a>|;
284            } else {
285                my $prefix = $self->{_node_prefix};
286                my $suffix = $self->{_node_suffix};
287                return qq|<a href="$prefix$link$suffix">$title</a>|;
288            }
289        },
290    );
291
292    return wikiformat($safe, \%format_tags, \%format_opts );
293}
294
295# CGI.pm is sometimes awkward about actually performing CGI::escapeHTML
296# if there's a previous instantiation - in the calling script, for example.
297# So just do it here.
298sub _escape_HTML {
299    my ($self, $text) = @_;
300    $text =~ s{&}{&amp;}gso;
301    $text =~ s{<}{&lt;}gso;
302    $text =~ s{>}{&gt;}gso;
303    $text =~ s{"}{&quot;}gso;
304    return $text;
305}
306
307=item B<find_internal_links>
308 
309  my @links_to = $formatter->find_internal_links( $content );
310 
311Returns a list of all nodes that the supplied content links to.
312 
313=cut 
314 
315sub find_internal_links { 
316    my ($self, $raw) = @_;
317 
318    @_links_found = (); 
319 
320    my %format_opts = ( extended       => $self->{_extended_links},
321                        implicit_links => $self->{_implicit_links} );
322
323    my %format_tags = ( extended_link_delimiters => [ '[[', ']]' ],
324                        link => sub {
325                            my ($link, $opts) = @_;
326                            $opts ||= {};
327                            my $title;
328                            ($link, $title) = split(/\|/, $link, 2)
329                              if $opts->{extended};
330                            if ( $self->{_force_ucfirst_nodes} ) {
331                                $link = $self->_do_freeupper($link);
332                            }
333                            $link = $self->{_munge_node_name}($link)
334                              if $self->{_munge_node_name};
335                            $link = $self->_munge_spaces($link);
336                            push @CGI::Wiki::Formatter::UseMod::_links_found,
337                                                                         $link;
338                            return ""; # don't care about output
339                                     }
340    );
341
342    my $foo = wikiformat($raw, \%format_tags, \%format_opts);
343
344    my @links = @_links_found;
345    @_links_found = ();
346    return @links;
347}
348
349
350=item B<node_name_to_node_param>
351
352  use URI::Escape;
353  $param = $formatter->node_name_to_node_param( "Recent Changes" );
354  my $url = "wiki.pl?" . uri_escape($param);
355
356In usemod, the node name is encoded prior to being used as part of the
357URL. This method does this encoding (essentially, whitespace is munged
358into underscores). In addition, if C<force_ucfirst_nodes> is in action
359then the node names will be forced ucfirst if they weren't already.
360
361Note that unless C<munge_urls> was set to true when C<new> was called,
362this method will do nothing.
363
364=cut
365
366sub node_name_to_node_param {
367    my ($self, $node_name) = @_;
368    return $node_name unless $self->{_munge_urls};
369    my $param = $node_name;
370    $param = $self->_munge_spaces($param);
371    $param = $self->_do_freeupper($param) if $self->{_force_ucfirst_nodes};
372    $param =~ s/ /_/g;
373
374    return $param;
375}
376
377=item B<node_param_to_node_name>
378
379  my $node = $q->param('node') || "";
380  $node = $formatter->node_param_to_node_name( $node );
381
382In usemod, the node name is encoded prior to being used as part of the
383URL, so we must decode it before we can get back the original node name.
384
385Note that unless C<munge_urls> was set to true when C<new> was called,
386this method will do nothing.
387
388=cut
389
390sub node_param_to_node_name {
391    my ($self, $param) = @_;
392    return $param unless $self->{_munge_urls};
393
394    # Note that this might not give us back exactly what we started with,
395    # since in the encoding we collapse and trim whitespace; but this is
396    # how usemod does it (as of 0.92) and usemod is what we're emulating.
397    $param =~ s/_/ /g;
398
399    return $param;
400}
401
402sub _do_freeupper {
403    my ($self, $node) = @_;
404
405    # This is the FreeUpper usemod behaviour, slightly modified from
406    # their regexp, as we need to do it before we check whether the
407    # node exists ie before we substitute the spaces with underscores.
408    $node = ucfirst($node);
409    $node =~ s|([- _.,\(\)/])([a-z])|$1.uc($2)|ge;
410
411    return $node;
412}
413
414sub _munge_spaces {
415    my ($self, $node) = @_;
416
417    # Yes, we really do only munge spaces, not all whitespace. This is
418    # how usemod does it (as of 0.92).
419    $node =~ s/ +/ /g;
420    $node =~ s/^ //;
421    $node =~ s/ $//;
422
423    return $node
424}
425
426=head1 AUTHOR
427
428Kake Pugh (kake@earth.li).
429
430=head1 COPYRIGHT
431
432     Copyright (C) 2003 Kake Pugh.  All Rights Reserved.
433
434This module is free software; you can redistribute it and/or modify it
435under the same terms as Perl itself.
436
437=head1 CREDITS
438
439The OpenGuides London team (L<http://openguides.org/london/>) sent
440some very helpful bug reports. A lot of the work of this module is
441done within chromatic's module, L<Text::WikiFormat>.
442
443=head1 CAVEATS
444
445This doesn't yet support all of UseMod's formatting features and
446options, by any means.  This really truly I<is> a 0.0* release. Please
447send bug reports, omissions, patches, and stuff, to me at
448C<kake@earth.li>.
449
450=head1 NOTE ON USEMOD COMPATIBILITY
451
452UseModWiki "encodes" node names before making them part of a URL, so
453for example a node about Wombat Defenestration will have a URL like
454
455  http://example.com/wiki.cgi?Wombat_Defenestration
456
457So if we want to emulate a UseModWiki exactly, we need to munge back
458and forth between node names as titles, and node names as CGI params.
459
460  my $formatter = CGI::Wiki::Formatter::UseMod->new( munge_urls => 1 );
461  my $node_param = $q->param('id') || $q->param('keywords') || "";
462  my $node_name = $formatter->node_param_to_node_name( $node_param );
463
464  use URI::Escape;
465  my $url = "http://example.com/wiki.cgi?"
466    . uri_escape(
467       $formatter->node_name_to_node_param( "Wombat Defenestration" )
468                 );
469
470=head1 SEE ALSO
471
472=over 4
473
474=item * L<CGI::Wiki>
475
476=item * L<Text::WikiFormat>
477
478=item * UseModWiki (L<http://www.usemod.com/cgi-bin/wiki.pl>)
479
480=back
481
482=cut
483
4841;
Note: See TracBrowser for help on using the repository browser.