Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

234 lines
8.7 KiB

  1. // File Name: detjpncs.c
  2. // Owner: Tetsuhide Akaishi
  3. // Revision: 1.00 02/21/'93 Tetsuhide Akaishi
  4. //
  5. // Modified by v-chikos
  6. #include "win32.h"
  7. #include "fechrcnv.h"
  8. // The DetectJapaneseCode function find out what kind of code set is there in
  9. // a character string.
  10. //
  11. //
  12. // UCHAR *string Points to the character string to be checked.
  13. //
  14. // int count Specifies the size in bytes of the string pointed
  15. // to by the string parameter.
  16. //
  17. // Return Value
  18. // The function return the followings values.
  19. //
  20. // Value Meaning
  21. // CODE_ONLY_SBCS There are no Japanese character in the
  22. // string.
  23. // CODE_JPN_JIS JIS Code Set. There are JIS Code Set
  24. // character in the string.
  25. // CODE_JPN_EUC EUC Code Set. There are EUC Code Set
  26. // character in the string.
  27. // CODE_JPN_SJIS Shift JIS Code Set. There are Shift JIS
  28. // Code Set character in the string.
  29. //
  30. //
  31. // Note: CODE_UNKNOWN == CODE_ONLY_SBCS
  32. // added by v-chikos for IIS 2.0J
  33. #define GetNextChar(r) \
  34. { \
  35. if ( --count ) \
  36. c = *++string; \
  37. else \
  38. return (r); \
  39. }
  40. int DetectJPNCode ( UCHAR *string, int count )
  41. {
  42. int i;
  43. int c;
  44. for ( ; count > 0; count--, string++ ) {
  45. c = *string;
  46. if ( c == ESC ) { // check for jis (iso-2022-jp)
  47. if ( count < 3 )
  48. return CODE_UNKNOWN;
  49. c = *++string; count--;
  50. if ( c == KANJI_IN_1ST_CHAR &&
  51. ( *(string+1) == KANJI_IN_2ND_CHAR1 || // ESC $ B
  52. *(string+1) == KANJI_IN_2ND_CHAR2 )) // ESC $ @
  53. return CODE_JPN_JIS;
  54. else if ( c == KANJI_OUT_1ST_CHAR &&
  55. ( *(string+1) == KANJI_OUT_2ND_CHAR1 || // ESC ( B
  56. *(string+1) == KANJI_OUT_2ND_CHAR2 )) // ESC ( J
  57. return CODE_JPN_JIS;
  58. else
  59. return CODE_UNKNOWN;
  60. } else if ( (0x81 <= c && c <= 0x8d) || (0x8f <= c && c <= 0x9f) ) { // 1
  61. // found sjis 1st
  62. return CODE_JPN_SJIS;
  63. } else if ( 0x8e == c ) { // 2
  64. // found sjis 1st || euc Kana 1st (SS2)
  65. GetNextChar( CODE_UNKNOWN )
  66. if ( (0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xa0) || (0xe0 <= c && c <= 0xfc) ) // 2-1
  67. // found sjis 2nd
  68. return CODE_JPN_SJIS;
  69. else if ( 0xa1 <= c && c <= 0xdf ) // 2-2
  70. // found sjis 2nd || euc Kana 2nd (sjis || euc)
  71. continue;
  72. else
  73. // illegal character code sequence
  74. return CODE_UNKNOWN;
  75. } else if ( 0xf0 <= c && c <= 0xfe ) { // 4
  76. // found euc 1st
  77. return CODE_JPN_EUC;
  78. } else if ( 0xe0 <= c && c <= 0xef ) { // 5
  79. // found sjis 1st || euc 1st
  80. GetNextChar( CODE_UNKNOWN )
  81. if ( (0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xa0) ) // 5-1
  82. // found sjis 2nd
  83. return CODE_JPN_SJIS;
  84. else if ( 0xfd <= c && c <= 0xfe ) // 5-2
  85. // found euc 2nd
  86. return CODE_JPN_EUC;
  87. else if ( 0xa1 <= c && c <= 0xfc ) // 5-3
  88. // found sjis 2nd || euc 2nd (sjis || euc)
  89. continue;
  90. else
  91. // illegal character code sequence
  92. return CODE_UNKNOWN;
  93. } else if ( 0xa1 <= c && c <= 0xdf ) { // 3
  94. // found sjis Kana || euc 1st
  95. GetNextChar( CODE_JPN_SJIS )
  96. if ( c <= 0x9f ) // 3-4
  97. // not euc 2nd byte
  98. return CODE_JPN_SJIS;
  99. else if ( 0xa1 <= c && c <= 0xdf ) // 3-2
  100. // found sjis kana || euc 2nd (sjis || euc)
  101. continue;
  102. else if ( 0xe0 <= c && c <= 0xef ) { // 3-3
  103. // found sjis 1st || euc 2nd
  104. sjis1stOReuc2nd:
  105. GetNextChar( CODE_JPN_EUC )
  106. if ( 0xfd <= c && c <= 0xfe ) // 3-3-5
  107. // found euc 1st
  108. return CODE_JPN_EUC;
  109. else if ( (0x80 <= c && c <= 0x8d) || (0x8f <= c && c <= 0xa0) ) // 3-3-2
  110. // found sjis 2nd
  111. return CODE_JPN_SJIS;
  112. else if ( 0x40 <= c && c <= 0x7e ) // 3-3-1
  113. // found sjis 2nd || sbcs (sjis || euc)
  114. continue;
  115. else if ( 0x8e == c ) { // 3-3-3
  116. // found sjis 2nd || euc kana 1st
  117. GetNextChar( CODE_JPN_SJIS )
  118. if ( 0xa1 <= c && c <= 0xdf )
  119. // found sjis Kana || euc Kana 2nd (sjis || euc)
  120. continue;
  121. else
  122. // not found euc kana 2nd
  123. return CODE_JPN_SJIS;
  124. } else if ( 0xa1 <= c && c <= 0xfc ) { // 3-3-4
  125. // found sjis 2nd || euc 1st
  126. GetNextChar( CODE_JPN_SJIS )
  127. if ( 0xa1 <= c && c <= 0xdf ) // 3-3-4-1
  128. // found sjis kana || euc 2nd (sjis || euc)
  129. continue;
  130. if ( 0xe0 <= c && c <= 0xef ) // 3-3-4-2
  131. // found sjis 1st || euc 2nd
  132. goto sjis1stOReuc2nd;
  133. if ( 0xf0 <= c && c <= 0xfe ) // 3-3-4-3
  134. // found euc 2nd
  135. return CODE_JPN_EUC;
  136. else
  137. // not found euc 2nd
  138. return CODE_JPN_SJIS;
  139. } else
  140. // not found sjis 2nd
  141. return CODE_JPN_EUC;
  142. } else if ( 0xf0 <= c && c <= 0xfe ) // 3-1
  143. return CODE_JPN_EUC;
  144. else
  145. return CODE_UNKNOWN;
  146. }
  147. }
  148. return CODE_ONLY_SBCS;
  149. // |<-----sjis1st---->| |<-sjisKana->|<-sjis1st->|
  150. // ss2 |<------euc1st-------------------->|
  151. // |81 8d|8e|8f 9f|a0|a1 df|e0 ef|f0 fe|
  152. // |<--1-->|2 |<--1-->| |<-----3---->|<----5---->|<---4--->|
  153. // case 1 sjis
  154. // case 4 euc
  155. // case 2
  156. // |<---sjis2nd--->| |<------sjis2nd---------------------->|
  157. // |<-eucKana2nd->|
  158. // |40 7e|7f|80 a0|a1 df|e0 fc|
  159. // |<-------1----->| |<----1--->|<------2----->|<----1---->|
  160. // case 5
  161. // |<----sjis2nd----->| |<---------sjis2nd---------------->|
  162. // |<--------euc2nd------>|
  163. // |40 7e|7f|80 a0|a1 fc|fd fe|
  164. // |<--------1------->| |<----1---->|<-------3------>|<-2->|
  165. // case 3
  166. // |<-----sjis1st---->| |<-sjisKana->|<-sjis1st->|
  167. // |<------euc2nd-------------------->|
  168. // |81 9f|a0|a1 df|e0 ef|f0 fe|
  169. // <--------4--------->| |<-----2---->|<----3---->|<--1---->|
  170. // case 3-3
  171. // |<--sjis2nd-->| |<------------sjis2nd------------------>|
  172. // ss2 |<------euc1st------>|
  173. // |40 7e|7f|80 8d|8e|8f a0|a1 fc|fd fe|
  174. // |<-----1----->| |<--2-->|3 |<--2-->|<------4----->|<-5->|
  175. // case 3-3-4
  176. // |<-sjisKana->|<-sjis1st->|
  177. // |<------euc2nd-------------------->|
  178. // |a1 df|e0 ef|f0 fe|
  179. // |<-----1---->|<----2---->|<--3---->|
  180. #if 0 // old code
  181. for ( i = 0 ; i < count ; i++, string++ ) {
  182. if ( *string == ESC ) {
  183. if ( *(string+1) == KANJI_IN_1ST_CHAR &&
  184. ( *(string+2) == KANJI_IN_2ND_CHAR1 || // ESC $ B
  185. *(string+2) == KANJI_IN_2ND_CHAR2 )) { // ESC $ @
  186. return CODE_JPN_JIS;
  187. }
  188. if ( *(string+1) == KANJI_OUT_1ST_CHAR &&
  189. ( *(string+2) == KANJI_OUT_2ND_CHAR1 || // ESC ( B
  190. *(string+2) == KANJI_OUT_2ND_CHAR2 )) { // ESC ( J
  191. return CODE_JPN_JIS;
  192. }
  193. } else if ( *(string) >= 0x0081) {
  194. if ( *(string) < 0x00a0 ) {
  195. return CODE_JPN_SJIS;
  196. }
  197. else if ( *(string) < 0x00e0 || *(string) > 0x00ef) {
  198. return CODE_JPN_EUC;
  199. }
  200. if ( *(string+1) < 0x00a1) {
  201. return CODE_JPN_SJIS;
  202. }
  203. else if ( *(string+1) > 0x00fc) {
  204. return CODE_JPN_EUC;
  205. }
  206. }
  207. }
  208. return CODE_ONLY_SBCS;
  209. #endif // 0
  210. }