source: cgi-wiki-formatter-usemod/trunk/lib/CGI/Wiki/Formatter/UseMod.pm @ 131

Last change on this file since 131 was 131, checked in by kake, 17 years ago

0.12 - Indentation causes <pre>.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 15.5 KB
Line 
1package CGI::Wiki::Formatter::UseMod;
2
3use strict;
4
5use vars qw( $VERSION @_links_found );
6$VERSION = '0.12';
7
8use URI::Escape;
9use Text::WikiFormat as => 'wikiformat';
10use HTML::PullParser;
11use URI::Find::Delimited;
12
13=head1 NAME
14
15CGI::Wiki::Formatter::UseMod - UseModWiki-style formatting for CGI::Wiki
16
17=head1 DESCRIPTION
18
19A formatter backend for L<CGI::Wiki> that supports UseMod-style formatting.
20
21=head1 SYNOPSIS
22
23  use CGI::Wiki::Formatter::UseMod;
24
25  # Instantiate - see below for parameter details.
26  my $formatter = CGI::Wiki::Formatter::UseMod->new( %config );
27
28  # Format some text.
29  my $cooked = $formatter->format($raw);
30
31  # Find out which other nodes that text would link to.
32  my @links_to = $formatter->find_internal_links($raw);
33
34=head1 METHODS
35
36=over 4
37
38=item B<new>
39
40  my $formatter = CGI::Wiki::Formatter::UseMod->new(
41                 extended_links      => 0, # $FreeLinks
42                 implicit_links      => 1, # $WikiLinks
43                 force_ucfirst_nodes => 1, # $FreeUpper
44                 use_headings        => 1, # $UseHeadings
45                 allowed_tags        => [qw(b i)], # defaults to none
46                 macros              => {},
47                 node_prefix         => 'wiki.pl?',
48                 node_suffix         => '',
49                 edit_prefix         => 'wiki.pl?action=edit;id=',
50                 edit_suffix         => '',
51                 munge_urls          => 0,
52  );
53
54Parameters will default to the values shown above (apart from
55C<allowed_tags>, which defaults to allowing no tags).
56
57=over 4
58
59=item B<Internal links>
60
61C<node_prefix>, C<node_suffix>, C<edit_prefix> and C<edit_suffix>
62allow you to control the URLs generated for links to other wiki pages.
63So for example with the defaults given above, a link to the Home node
64will have the URL C<wiki.pl?Home> and a link to the edit form for the
65Home node will have the URL C<wiki.pl?action=edit;id=Home>
66
67(Note that of course the URLs that you wish to have generated will
68depend on how your wiki application processes its CGI parameters - you
69can't just put random stuff in there and hope it works!)
70
71=item B<Internal links - advanced options>
72
73If you wish to have greater control over the links, you may use the
74C<munge_node_name> parameter.  The value of this should be a
75subroutine reference.  This sub will be called on each internal link
76after all other formatting and munging I<except> URL escaping has been
77applied.  It will be passed the node name as its first parameter and
78should return a node name.  Note that this will affect the URLs of
79internal links, but not the link text.
80
81Example:
82
83  # The formatter munges links so node names are ucfirst.
84  # Ensure 'state51' always appears in lower case in node names.
85  munge_node_name => sub {
86                         my $node_name = shift;
87                         $node_name =~ s/State51/state51/g;
88                         return $node_name;
89                     }
90
91B<Note:> This is I<advanced> usage and you should only do it if you
92I<really> know what you're doing.  Consider in particular whether and
93how your munged nodes are going to be treated by C<retrieve_node>.
94
95=item B<URL munging>
96
97If you set C<munge_urls> to true, then your URLs will be more
98user-friendly, for example
99
100  http://example.com/wiki.cgi?Mailing_List_Managers
101
102rather than
103
104  http://example.com/wiki.cgi?Mailing%20List%20Managers
105
106The former behaviour is the actual UseMod behaviour, but requires a
107little fiddling about in your code (see C<node_name_to_node_param>),
108so the default is to B<not> munge URLs.
109
110=item B<Macros>
111
112Be aware that macros are processed I<after> filtering out disallowed
113HTML tags and I<before> transforming from wiki markup into HTML.  They
114are also not called in any particular order.
115
116The keys of macros should be either regexes or strings. The values can
117be strings, or, if the corresponding key is a regex, can be coderefs.
118The coderef will be called with the first nine substrings captured by
119the regex as arguments. I would like to call it with all captured
120substrings but apparently this is complicated.
121
122=back
123
124Macro examples:
125
126  macros => {
127
128      '@SEARCHBOX' =>
129                qq(<form action="wiki.pl" method="get">
130                   <input type="hidden" name="action" value="search">
131                   <input type="text" size="20" name="terms">
132                   <input type="submit"></form>),
133
134      qr/\@INDEX\s+\[Category\s+([^\]]+)]/ =>
135            sub { return "{an index of things in category $_[0]}" }
136
137  }
138
139=cut
140
141sub new {
142    my ($class, @args) = @_;
143    my $self = {};
144    bless $self, $class;
145    $self->_init(@args) or return undef;
146    return $self;
147}
148
149sub _init {
150    my ($self, %args) = @_;
151
152    # Store the parameters or their defaults.
153    my %defs = ( extended_links      => 0,
154                 implicit_links      => 1,
155                 force_ucfirst_nodes => 1,
156                 use_headings        => 1,
157                 allowed_tags        => [],
158                 macros              => {},
159                 node_prefix         => 'wiki.pl?',
160                 node_suffix         => '',
161                 edit_prefix         => 'wiki.pl?action=edit;id=',
162                 edit_suffix         => '',
163                 munge_urls          => 0,
164                 munge_node_name     => undef,
165               );
166
167    my %collated = (%defs, %args);
168    foreach my $k (keys %defs) {
169        $self->{"_".$k} = $collated{$k};
170    }
171
172    return $self;
173}
174
175=item B<format>
176
177  my $html = $formatter->format($submitted_content, $wiki);
178
179Escapes any tags which weren't specified as allowed on creation, then
180interpolates any macros, then translates the raw Wiki language
181supplied into HTML.
182
183A L<CGI::Wiki> object can be supplied as an optional second parameter.
184This object will be used to determine whether a linked-to node exists
185or not, and alter the presentation of the link accordingly. This is
186only really in here for use when this method is being called from
187within L<CGI::Wiki>.
188
189=cut
190
191sub format {
192    my ($self, $raw, $wiki) = @_;
193    $raw =~ s/\r\n/\n/sg; # CGI newline is \r\n not \n
194    my $safe = "";
195
196    my %allowed = map {lc($_) => 1, "/".lc($_) => 1} @{$self->{_allowed_tags}};
197
198    # Parse the HTML - even if we're not allowing any tags, because we're
199    # using a custom escaping routine rather than CGI.pm
200    my $parser = HTML::PullParser->new(doc   => $raw,
201                                       start => '"TAG", tag, text',
202                                       end   => '"TAG", tag, text',
203                                       text  => '"TEXT", tag, text');
204    while (my $token = $parser->get_token) {
205        my ($flag, $tag, $text) = @$token;
206        if ($flag eq "TAG" and !defined $allowed{lc($tag)}) {
207            $safe .= $self->_escape_HTML($text);
208        } else {
209            $safe .= $text;
210        }
211    }
212
213    # Now do any inline links.
214    my $finder = URI::Find::Delimited->new( ignore_quoted => 1 );
215    $finder->find(\$safe);
216
217    # Now process any macros.
218    my %macros = %{$self->{_macros}};
219    foreach my $key (keys %macros) {
220        my $value = $macros{$key};
221        if ( ref $value && ref $value eq 'CODE' ) {
222            $safe =~ s/$key/$value->($1, $2, $3, $4, $5, $6, $7, $8, $9)/eg;
223        } else {
224          $safe =~ s/$key/$value/g;
225        }
226    }
227
228    # Finally set up config and call Text::WikiFormat.
229    my %format_opts = ( extended       => $self->{_extended_links},
230                        prefix         => $self->{_node_prefix},
231                        implicit_links => $self->{_implicit_links} );
232
233    my %format_tags = (
234        # chromatic made most of the regex below.  I will document it when
235        # I understand it properly.
236        indent   => qr/^(?:\t+|\s{4,}|\s*\*?(?=\**\*+))/,
237        newline => "", # avoid bogus <br />
238        paragraph       => [ "<p>", "</p>\n", "", "\n", 1 ], # no bogus <br />
239        extended_link_delimiters => [ '[[', ']]' ],
240        blocks                   => {
241                         ordered         => qr/^\s*([\d]+)\.\s*/,
242                         unordered       => qr/^\s*\*\s*/,
243                         definition      => qr/^:\s*/,
244                         pre             => qr/^\s+/
245                                    },
246        definition               => [ "<dl>\n", "</dl>\n", "<dt><dd>", "\n" ],
247        pre                      => [ "<pre>\n", "</pre>\n", "", "\n" ],
248        # we don't label unordered lists as "not indented" so we can nest them.
249        indented   => {
250                        definition => 0,
251                        ordered    => 0,
252                        pre        => 0,
253                       }, 
254        blockorder => [ qw( header line ordered unordered code definition pre paragraph )],
255        nests      => { map { $_ => 1} qw( ordered unordered ) },
256        link                     => sub {
257            my ($link, $opts) = @_;
258            $opts ||= {};
259
260            my $title;
261            ($link, $title) = split(/\|/, $link, 2) if $opts->{extended};
262            $title =~ s/^\s*// if $title; # strip leading whitespace
263            $title ||= $link;
264
265            if ( $self->{_force_ucfirst_nodes} ) {
266                $link = $self->_do_freeupper($link);
267            }
268            $link = $self->_munge_spaces($link);
269
270            $link = $self->{_munge_node_name}($link)
271              if $self->{_munge_node_name};
272
273            my $editlink_not_link = 0;
274            # See whether the linked-to node exists, if we can.
275            if ( $wiki && !$wiki->node_exists( $link ) ) {
276                $editlink_not_link = 1;
277            }
278
279            $link =~ s/ /_/g if $self->{_munge_urls};
280
281            $link = uri_escape( $link );
282
283            if ( $editlink_not_link ) {
284                my $prefix = $self->{_edit_prefix};
285                my $suffix = $self->{_edit_suffix};
286                return qq|[$title]<a href="$prefix$link$suffix">?</a>|;
287            } else {
288                my $prefix = $self->{_node_prefix};
289                my $suffix = $self->{_node_suffix};
290                return qq|<a href="$prefix$link$suffix">$title</a>|;
291            }
292        },
293    );
294
295    return wikiformat($safe, \%format_tags, \%format_opts );
296}
297
298# CGI.pm is sometimes awkward about actually performing CGI::escapeHTML
299# if there's a previous instantiation - in the calling script, for example.
300# So just do it here.
301sub _escape_HTML {
302    my ($self, $text) = @_;
303    $text =~ s{&}{&amp;}gso;
304    $text =~ s{<}{&lt;}gso;
305    $text =~ s{>}{&gt;}gso;
306    $text =~ s{"}{&quot;}gso;
307    return $text;
308}
309
310=item B<find_internal_links>
311 
312  my @links_to = $formatter->find_internal_links( $content );
313 
314Returns a list of all nodes that the supplied content links to.
315 
316=cut 
317 
318sub find_internal_links { 
319    my ($self, $raw) = @_;
320 
321    @_links_found = (); 
322 
323    my %format_opts = ( extended       => $self->{_extended_links},
324                        implicit_links => $self->{_implicit_links} );
325
326    my %format_tags = ( extended_link_delimiters => [ '[[', ']]' ],
327                        link => sub {
328                            my ($link, $opts) = @_;
329                            $opts ||= {};
330                            my $title;
331                            ($link, $title) = split(/\|/, $link, 2)
332                              if $opts->{extended};
333                            if ( $self->{_force_ucfirst_nodes} ) {
334                                $link = $self->_do_freeupper($link);
335                            }
336                            $link = $self->{_munge_node_name}($link)
337                              if $self->{_munge_node_name};
338                            $link = $self->_munge_spaces($link);
339                            push @CGI::Wiki::Formatter::UseMod::_links_found,
340                                                                         $link;
341                            return ""; # don't care about output
342                                     }
343    );
344
345    my $foo = wikiformat($raw, \%format_tags, \%format_opts);
346
347    my @links = @_links_found;
348    @_links_found = ();
349    return @links;
350}
351
352
353=item B<node_name_to_node_param>
354
355  use URI::Escape;
356  $param = $formatter->node_name_to_node_param( "Recent Changes" );
357  my $url = "wiki.pl?" . uri_escape($param);
358
359In usemod, the node name is encoded prior to being used as part of the
360URL. This method does this encoding (essentially, whitespace is munged
361into underscores). In addition, if C<force_ucfirst_nodes> is in action
362then the node names will be forced ucfirst if they weren't already.
363
364Note that unless C<munge_urls> was set to true when C<new> was called,
365this method will do nothing.
366
367=cut
368
369sub node_name_to_node_param {
370    my ($self, $node_name) = @_;
371    return $node_name unless $self->{_munge_urls};
372    my $param = $node_name;
373    $param = $self->_munge_spaces($param);
374    $param = $self->_do_freeupper($param) if $self->{_force_ucfirst_nodes};
375    $param =~ s/ /_/g;
376
377    return $param;
378}
379
380=item B<node_param_to_node_name>
381
382  my $node = $q->param('node') || "";
383  $node = $formatter->node_param_to_node_name( $node );
384
385In usemod, the node name is encoded prior to being used as part of the
386URL, so we must decode it before we can get back the original node name.
387
388Note that unless C<munge_urls> was set to true when C<new> was called,
389this method will do nothing.
390
391=cut
392
393sub node_param_to_node_name {
394    my ($self, $param) = @_;
395    return $param unless $self->{_munge_urls};
396
397    # Note that this might not give us back exactly what we started with,
398    # since in the encoding we collapse and trim whitespace; but this is
399    # how usemod does it (as of 0.92) and usemod is what we're emulating.
400    $param =~ s/_/ /g;
401
402    return $param;
403}
404
405sub _do_freeupper {
406    my ($self, $node) = @_;
407
408    # This is the FreeUpper usemod behaviour, slightly modified from
409    # their regexp, as we need to do it before we check whether the
410    # node exists ie before we substitute the spaces with underscores.
411    $node = ucfirst($node);
412    $node =~ s|([- _.,\(\)/])([a-z])|$1.uc($2)|ge;
413
414    return $node;
415}
416
417sub _munge_spaces {
418    my ($self, $node) = @_;
419
420    # Yes, we really do only munge spaces, not all whitespace. This is
421    # how usemod does it (as of 0.92).
422    $node =~ s/ +/ /g;
423    $node =~ s/^ //;
424    $node =~ s/ $//;
425
426    return $node
427}
428
429=head1 AUTHOR
430
431Kake Pugh (kake@earth.li).
432
433=head1 COPYRIGHT
434
435     Copyright (C) 2003-2004 Kake Pugh.  All Rights Reserved.
436
437This module is free software; you can redistribute it and/or modify it
438under the same terms as Perl itself.
439
440=head1 CREDITS
441
442The OpenGuides London team (L<http://openguides.org/london/>) sent
443some very helpful bug reports. A lot of the work of this module is
444done within chromatic's module, L<Text::WikiFormat>.
445
446=head1 CAVEATS
447
448This doesn't yet support all of UseMod's formatting features and
449options, by any means.  This really truly I<is> a 0.* release. Please
450send bug reports, omissions, patches, and stuff, to me at
451C<kake@earth.li>.
452
453=head1 NOTE ON USEMOD COMPATIBILITY
454
455UseModWiki "encodes" node names before making them part of a URL, so
456for example a node about Wombat Defenestration will have a URL like
457
458  http://example.com/wiki.cgi?Wombat_Defenestration
459
460So if we want to emulate a UseModWiki exactly, we need to munge back
461and forth between node names as titles, and node names as CGI params.
462
463  my $formatter = CGI::Wiki::Formatter::UseMod->new( munge_urls => 1 );
464  my $node_param = $q->param('id') || $q->param('keywords') || "";
465  my $node_name = $formatter->node_param_to_node_name( $node_param );
466
467  use URI::Escape;
468  my $url = "http://example.com/wiki.cgi?"
469    . uri_escape(
470       $formatter->node_name_to_node_param( "Wombat Defenestration" )
471                 );
472
473=head1 SEE ALSO
474
475=over 4
476
477=item * L<CGI::Wiki>
478
479=item * L<Text::WikiFormat>
480
481=item * UseModWiki (L<http://www.usemod.com/cgi-bin/wiki.pl>)
482
483=back
484
485=cut
486
4871;
Note: See TracBrowser for help on using the repository browser.