Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
7.0 KiB

  1. #---------------------------------------------------------------------
  2. package ParseTable;
  3. #
  4. # Copyright (c) Microsoft Corporation. All rights reserved.
  5. #
  6. # Version: 1.00 (07/12/2000) : (JeremyD) inital version
  7. # 1.01 (08/25/2000) : (JeremyD) allow single heading tables
  8. #---------------------------------------------------------------------
  9. use strict;
  10. use vars qw(@ISA @EXPORT $VERSION);
  11. use IO::File;
  12. use Carp;
  13. use Exporter;
  14. @ISA = qw(Exporter);
  15. @EXPORT = qw(parse_table_lines parse_table_file);
  16. $VERSION = '1.01';
  17. sub parse_table_lines (\@;$) {
  18. my $lines_ref = shift; # the array of lines is modified in place
  19. my $storage = shift; # an array or hash ref to stuff the data in, if
  20. # this is not a ref we quietly discard the data
  21. # this could be useful to skip one table
  22. my @heading; # the current set of headings
  23. LINE:
  24. while (my $line = shift @$lines_ref) {
  25. chomp $line;
  26. next LINE if $line =~ /^\s*$/; # skip empty lines
  27. if ($line =~ /^\s*[#;](.*)/) { # comments may contain headings
  28. my $comment = $1;
  29. if ($comment =~ /^\s*(?:\[\w+\]\s*)+$/) { # bracketed names seperated
  30. # by whitespace
  31. if (@heading) { # already have headings, must be a new table
  32. unshift @$lines_ref, $line; # this line is part of the next
  33. # table, we need to put it back
  34. last LINE; # a new table implies the end of the current one
  35. } else { # found our first set of headings
  36. while ($comment =~ /\[(\w+)\]/g) { # look for headings
  37. push @heading, $1;
  38. }
  39. }
  40. }
  41. next LINE; # done parsing this comment
  42. }
  43. next unless @heading; # no data processing until we have our headings
  44. # fields are seperated by 2 or more white space characters, however
  45. # a single tab will also suffice
  46. my @data = split /(?=\t)\s+|\s{2,}/, $line;
  47. next unless $#heading == $#data; # require 1 data field per heading
  48. # use our current headings as keys and make a hash of the data
  49. my %hash;
  50. for (my $i=0; $i<@heading; $i++) {
  51. $hash{$heading[$i]} = $data[$i];
  52. }
  53. # store our current line's data in the reference passed to us
  54. if (ref $storage eq 'ARRAY') {
  55. push @$storage, \%hash;
  56. } elsif (ref $storage eq 'HASH') {
  57. $storage->{$data[0]} = \%hash;
  58. } else {
  59. # do nothing
  60. # this allows skipping a table by passing in a non-ref storage
  61. }
  62. }
  63. # the data array was modified in place, parsed lines have been removed
  64. # successive calls will parse any remaining tables found in the data array
  65. # return the number of unparsed lines, 0 indicates no remaining tables
  66. return scalar @$lines_ref;
  67. }
  68. sub parse_table_file ($;@) {
  69. my $filename = shift;
  70. my @store_refs = @_;
  71. my $fh = new IO::File $filename, "r";
  72. if (defined $fh) {
  73. my @lines = $fh->getlines;
  74. my $i = 0;
  75. while (@lines) {
  76. parse_table_lines(@lines, $store_refs[$i++]);
  77. }
  78. undef $fh;
  79. } else {
  80. croak "Unable to open file $filename: $!";
  81. }
  82. }
  83. 1;
  84. __END__
  85. =head1 NAME
  86. ParseTable - Extract data from a formatted text table
  87. =head1 SYNOPSIS
  88. use ParseTable;
  89. parse_table_file("foobar.txt", \%table_one, \@table_two, ...);
  90. $lines_remaining = parse_table_lines(@data_lines,\%table);
  91. =head1 DESCRIPTION
  92. This module provides an easy way to extract formatted data from text files.
  93. =over 4
  94. =item parse_table_file( $filename, @storage_refs )
  95. parse_table_file takes a filename to parse and a list of storage locations
  96. for the tables found within that file.
  97. =item parse_table_lines( @data_lines, $storage_ref )
  98. parse_table_lines takes an array of data lines and a storage location for
  99. the first table found in the lines. It modifies the array in place and returns
  100. the number of unparsed lines.
  101. =back
  102. The format for a table is:
  103. ;comments
  104. ; [heading1] [heading2]
  105. item1 item2
  106. item3 with internal space item4
  107. item5 item6
  108. Each line of data in a table is stored as a hash with the heading names as
  109. keys and the data items as values.
  110. If an array reference is specified as the storage location the data hash for
  111. each line will be pushed on to the array.
  112. If the storage location is a hash reference then the data hash for each line
  113. will be stored using the value of the first column as the key. In the case of
  114. duplicate data items the last one appearing in the table takes precedence.
  115. =head1 EXAMPLES
  116. parse_table_file("codetable.txt",\@data)
  117. for $data (@data) {
  118. print "$data->{Lang} is the lang code for $data->{Comments}\n";
  119. }
  120. parse_table_file("codetable.txt",\%data,\%flavors)
  121. print "your site is $data->{$user_lang}{Site}\n";
  122. print "your flavor is $flavors->{$user_lang}{$user_arch}\n";
  123. codetable.txt:
  124. ;
  125. ; This is just an example of a file with two tables
  126. ;
  127. ;[Lang] [LCID] [Class] [Site] [Comments]
  128. ;-------------------------------------------------------------
  129. ;
  130. ARA 0x0401 @CS REDMOND Arabic
  131. CHS 0x0804 @FE REDMOND Chinese Simplified (PR China)
  132. CHT 0x0404 @FE REDMOND Chinese Traditional (Taiwan Region)
  133. CHH 0x0404 @FE REDMOND Chinese Traditional (Hong Kong Region)
  134. FR 0x040C @EU DUBLIN French
  135. GER 0x0407 @EU REDMOND German
  136. ;[Lang] [x86] [ia64]
  137. ;=============================================
  138. USA per;pro;srv;ads;dtc pro;ads;dtc
  139. GER per;pro;srv;ads pro;ads
  140. CHT per;pro;srv;ads pro;ads
  141. CHH per;pro;srv;ads pro;ads
  142. CHS per;pro;srv;ads pro;ads
  143. ARA per;pro pro
  144. =head1 NOTES
  145. The parser can handle blank lines and comments beginning with either ';' or
  146. '#'.
  147. A heading line must appear before any data lines. A heading line is a special
  148. form of comment consisting of field names enclosed in brackets [].
  149. Data lines must have exactly as many fields as heading lines.
  150. Data fields must be seperated by 2 or more spaces. Single spaces within data
  151. items do not require quoting or escaping.
  152. Quoting and escaping are not supported in any way. This means you may not
  153. have a data field with the value "" (empty string) or more than 1 space in a row.
  154. Storage locations are not before parsing begins.
  155. Heading names must match the regex /\w+/.
  156. Should probably be expanded to handle returning a plain array for single column
  157. tables (lists of filenames, etc).
  158. =head1 SEE ALSO
  159. hashtext.pm
  160. =head1 AUTHOR
  161. Jeremy Devenport <JeremyD>
  162. =head1 COPYRIGHT
  163. Copyright (c) Microsoft Corporation. All rights reserved.
  164. =cut