Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

189 lines
5.1 KiB

  1. package I18N::Collate;
  2. =head1 NAME
  3. I18N::Collate - compare 8-bit scalar data according to the current locale
  4. ***
  5. WARNING: starting from the Perl version 5.003_06
  6. the I18N::Collate interface for comparing 8-bit scalar data
  7. according to the current locale
  8. HAS BEEN DEPRECATED
  9. That is, please do not use it anymore for any new applications
  10. and please migrate the old applications away from it because its
  11. functionality was integrated into the Perl core language in the
  12. release 5.003_06.
  13. See the perllocale manual page for further information.
  14. ***
  15. =head1 SYNOPSIS
  16. use I18N::Collate;
  17. setlocale(LC_COLLATE, 'locale-of-your-choice');
  18. $s1 = new I18N::Collate "scalar_data_1";
  19. $s2 = new I18N::Collate "scalar_data_2";
  20. =head1 DESCRIPTION
  21. This module provides you with objects that will collate
  22. according to your national character set, provided that the
  23. POSIX setlocale() function is supported on your system.
  24. You can compare $s1 and $s2 above with
  25. $s1 le $s2
  26. to extract the data itself, you'll need a dereference: $$s1
  27. This module uses POSIX::setlocale(). The basic collation conversion is
  28. done by strxfrm() which terminates at NUL characters being a decent C
  29. routine. collate_xfrm() handles embedded NUL characters gracefully.
  30. The available locales depend on your operating system; try whether
  31. C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
  32. direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
  33. C<ls /usr/lib/locale>. Not all the locales that your vendor supports
  34. are necessarily installed: please consult your operating system's
  35. documentation and possibly your local system administration. The
  36. locale names are probably something like C<xx_XX.(ISO)?8859-N> or
  37. C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
  38. variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
  39. European character set.
  40. =cut
  41. # I18N::Collate.pm
  42. #
  43. # Author: Jarkko Hietaniemi <F<[email protected]>>
  44. # Helsinki University of Technology, Finland
  45. #
  46. # Acks: Guy Decoux <F<[email protected]>> understood
  47. # overloading magic much deeper than I and told
  48. # how to cut the size of this code by more than half.
  49. # (my first version did overload all of lt gt eq le ge cmp)
  50. #
  51. # Purpose: compare 8-bit scalar data according to the current locale
  52. #
  53. # Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
  54. #
  55. # Exports: setlocale 1)
  56. # collate_xfrm 2)
  57. #
  58. # Overloads: cmp # 3)
  59. #
  60. # Usage: use I18N::Collate;
  61. # setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
  62. # $s1 = new I18N::Collate "scalar_data_1";
  63. # $s2 = new I18N::Collate "scalar_data_2";
  64. #
  65. # now you can compare $s1 and $s2: $s1 le $s2
  66. # to extract the data itself, you need to deref: $$s1
  67. #
  68. # Notes:
  69. # 1) this uses POSIX::setlocale
  70. # 2) the basic collation conversion is done by strxfrm() which
  71. # terminates at NUL characters being a decent C routine.
  72. # collate_xfrm handles embedded NUL characters gracefully.
  73. # 3) due to cmp and overload magic, lt le eq ge gt work also
  74. # 4) the available locales depend on your operating system;
  75. # try whether "locale -a" shows them or man pages for
  76. # "locale" or "nlsinfo" work or the more direct
  77. # approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
  78. # Not all the locales that your vendor supports
  79. # are necessarily installed: please consult your
  80. # operating system's documentation.
  81. # The locale names are probably something like
  82. # 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
  83. # for example 'fr_CH.ISO8859-1' is the Swiss (CH)
  84. # variant of French (fr), ISO Latin (8859) 1 (-1)
  85. # which is the Western European character set.
  86. #
  87. # Updated: 19961005
  88. #
  89. # ---
  90. use POSIX qw(strxfrm LC_COLLATE);
  91. require Exporter;
  92. @ISA = qw(Exporter);
  93. @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
  94. @EXPORT_OK = qw();
  95. use overload qw(
  96. fallback 1
  97. cmp collate_cmp
  98. );
  99. sub new {
  100. my $new = $_[1];
  101. if ($^W && $] >= 5.003_06) {
  102. unless ($please_use_I18N_Collate_even_if_deprecated) {
  103. warn <<___EOD___;
  104. ***
  105. WARNING: starting from the Perl version 5.003_06
  106. the I18N::Collate interface for comparing 8-bit scalar data
  107. according to the current locale
  108. HAS BEEN DEPRECATED
  109. That is, please do not use it anymore for any new applications
  110. and please migrate the old applications away from it because its
  111. functionality was integrated into the Perl core language in the
  112. release 5.003_06.
  113. See the perllocale manual page for further information.
  114. ***
  115. ___EOD___
  116. $please_use_I18N_Collate_even_if_deprecated++;
  117. }
  118. }
  119. bless \$new;
  120. }
  121. sub setlocale {
  122. my ($category, $locale) = @_[0,1];
  123. POSIX::setlocale($category, $locale) if (defined $category);
  124. # the current $LOCALE
  125. $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
  126. }
  127. sub C {
  128. my $s = ${$_[0]};
  129. $C->{$LOCALE}->{$s} = collate_xfrm($s)
  130. unless (defined $C->{$LOCALE}->{$s}); # cache when met
  131. $C->{$LOCALE}->{$s};
  132. }
  133. sub collate_xfrm {
  134. my $s = $_[0];
  135. my $x = '';
  136. for (split(/(\000+)/, $s)) {
  137. $x .= (/^\000/) ? $_ : strxfrm("$_\000");
  138. }
  139. $x;
  140. }
  141. sub collate_cmp {
  142. &C($_[0]) cmp &C($_[1]);
  143. }
  144. # init $LOCALE
  145. &I18N::Collate::setlocale();
  146. 1; # keep require happy