Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

202 lines
6.4 KiB

  1. # ======================================================================
  2. #
  3. # Copyright (C) 2000-2001 Paul Kulchenko ([email protected])
  4. # SOAP::Lite is free software; you can redistribute it
  5. # and/or modify it under the same terms as Perl itself.
  6. #
  7. # $Id: XML::Parser::Lite.pm,v 0.20 2001/07/18 15:15:14 $
  8. #
  9. # ======================================================================
  10. package XML::Parser::Lite;
  11. use strict;
  12. use vars qw($VERSION);
  13. $VERSION = '0.20';
  14. sub new {
  15. my $self = shift;
  16. my $class = ref($self) || $self;
  17. return $self if ref $self;
  18. $self = bless {} => $class;
  19. my %parameters = @_;
  20. $self->setHandlers(); # clear first
  21. $self->setHandlers(%{$parameters{Handlers} || {}});
  22. return $self;
  23. }
  24. sub setHandlers {
  25. my $self = shift;
  26. no strict 'refs'; local $^W;
  27. # clear all handlers if called without parameters
  28. unless (@_) { foreach (qw(Start End Char Final Init)) { *$_ = sub {} } }
  29. while (@_) { my($name => $func) = splice(@_, 0, 2); *$name = $func }
  30. return $self;
  31. }
  32. sub regexp {
  33. my $patch = shift || '';
  34. my $package = __PACKAGE__;
  35. # This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
  36. # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
  37. # Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998.
  38. # Copyright (c) 1998, Robert D. Cameron.
  39. # The following code may be freely used and distributed provided that
  40. # this copyright and citation notice remains intact and that modifications
  41. # or additions are clearly identified.
  42. my $TextSE = "[^<]+";
  43. my $UntilHyphen = "[^-]*-";
  44. my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
  45. my $CommentCE = "$Until2Hyphens>?";
  46. my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
  47. my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
  48. my $S = "[ \\n\\t\\r]+";
  49. my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
  50. my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
  51. my $Name = "(?:$NameStrt)(?:$NameChar)*";
  52. my $QuoteSE = "\"[^\"]*\"|'[^']*'";
  53. my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
  54. my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
  55. my $S1 = "[\\n\\r\\t ]";
  56. my $UntilQMs = "[^?]*\\?+";
  57. my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
  58. my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
  59. my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
  60. my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
  61. my $PI_CE = "$Name(?:$PI_Tail)?";
  62. # these expressions were modified for backtracking and events
  63. my $EndTagCE = "($Name)(?{${package}::end(\$2)})(?:$S)?>";
  64. my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'";
  65. my $ElemTagCE = "($Name)(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)(?{[\@{\$^R||[]},\$4=>defined\$5?\$5:\$6]}))*(?:$S)?(/)?>(?{${package}::start(\$3,\@{\$^R||[]})})(?{\${7} and ${package}::end(\$3)})";
  66. my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
  67. # Next expression is under "black magic".
  68. # Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE',
  69. # but it doesn't work under Perl 5.005 and only magic with
  70. # (?:....)?? solved the problem.
  71. # I would appreciate if someone let me know what is the right thing to do
  72. # and what's the reason for all this magic.
  73. # Seems like a problem related to (?:....)? rather than to ?{} feature.
  74. # Tests are in t/31-xmlparserlite.t if you decide to play with it.
  75. "(?:($TextSE)(?{${package}::char(\$1)}))$patch|$MarkupSPE";
  76. }
  77. sub compile { local $^W;
  78. # try regexp as it should be, apply patch if doesn't work
  79. foreach (regexp(), regexp('??')) {
  80. eval qq{sub parse_re { use re "eval"; 1 while \$_[0] =~ m{$_}go }; 1} or die;
  81. last if eval { parse_re('<foo>bar</foo>'); 1 }
  82. };
  83. *compile = sub {};
  84. }
  85. setHandlers();
  86. compile();
  87. sub parse {
  88. init();
  89. parse_re($_[1]);
  90. final();
  91. }
  92. my(@stack, $level);
  93. sub init {
  94. @stack = (); $level = 0;
  95. Init(__PACKAGE__, @_);
  96. }
  97. sub final {
  98. die "not properly closed tag '$stack[-1]'\n" if @stack;
  99. die "no element found\n" unless $level;
  100. Final(__PACKAGE__, @_)
  101. }
  102. sub start {
  103. die "multiple roots, wrong element '$_[0]'\n" if $level++ && !@stack;
  104. push(@stack, $_[0]);
  105. Start(__PACKAGE__, @_);
  106. }
  107. sub char {
  108. Char(__PACKAGE__, $_[0]), return if @stack;
  109. # check for junk before or after element
  110. # can't use split or regexp due to limitations in ?{} implementation,
  111. # will iterate with loop, but we'll do it no more than two times, so
  112. # it shouldn't affect performance
  113. for (my $i=0; $i < length $_[0]; $i++) {
  114. die "junk '$_[0]' @{[$level ? 'after' : 'before']} XML element\n"
  115. if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there
  116. }
  117. }
  118. sub end {
  119. pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n";
  120. End(__PACKAGE__, $_[0]);
  121. }
  122. # ======================================================================
  123. 1;
  124. __END__
  125. =head1 NAME
  126. XML::Parser::Lite - Lightweight regexp-based XML parser
  127. =head1 SYNOPSIS
  128. use XML::Parser::Lite;
  129. $p1 = new XML::Parser::Lite;
  130. $p1->setHandlers(
  131. Start => sub { shift; print "start: @_\n" },
  132. Char => sub { shift; print "char: @_\n" },
  133. End => sub { shift; print "end: @_\n" },
  134. );
  135. $p1->parse('<foo id="me">Hello World!</foo>');
  136. $p2 = new XML::Parser::Lite
  137. Handlers => {
  138. Start => sub { shift; print "start: @_\n" },
  139. Char => sub { shift; print "char: @_\n" },
  140. End => sub { shift; print "end: @_\n" },
  141. }
  142. ;
  143. $p2->parse('<foo id="me">Hello <bar>cruel</bar> World!</foo>');
  144. =head1 DESCRIPTION
  145. This Perl module gives you access to XML parser with interface similar to
  146. XML::Parser interface. Though only basic calls are supported (init, final,
  147. start, char, and end) you should be able to use it in the same way you use
  148. XML::Parser. Due to using experimantal regexp features it'll work only on
  149. Perl 5.6 and may behave differently on different platforms.
  150. =head1 SEE ALSO
  151. XML::Parser
  152. =head1 COPYRIGHT
  153. Copyright (C) 2000-2001 Paul Kulchenko. All rights reserved.
  154. This library is free software; you can redistribute it and/or modify
  155. it under the same terms as Perl itself.
  156. This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
  157. Copyright (c) 1998, Robert D. Cameron.
  158. =head1 AUTHOR
  159. Paul Kulchenko (paulclinger@yahoo.com)
  160. =cut