Source code of Windows XP (NT5)
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
// File Name: detjpncs.c
// Owner: Tetsuhide Akaishi
// Revision: 1.00 02/21/'93 Tetsuhide Akaishi
//
// Modified by v-chikos
#include "win32.h"
#include "fechrcnv.h"
// The DetectJapaneseCode function find out what kind of code set is there in
// a character string.
//
//
// UCHAR *string Points to the character string to be checked.
//
// int count Specifies the size in bytes of the string pointed
// to by the string parameter.
//
// Return Value
// The function return the followings values.
//
// Value Meaning
// CODE_ONLY_SBCS There are no Japanese character in the
// string.
// CODE_JPN_JIS JIS Code Set. There are JIS Code Set
// character in the string.
// CODE_JPN_EUC EUC Code Set. There are EUC Code Set
// character in the string.
// CODE_JPN_SJIS Shift JIS Code Set. There are Shift JIS
// Code Set character in the string.
//
//
// Note: CODE_UNKNOWN == CODE_ONLY_SBCS
// added by v-chikos for IIS 2.0J
#define GetNextChar(r) \
{ \ if ( --count ) \ c = *++string; \ else \ return (r); \ }
int DetectJPNCode ( UCHAR *string, int count ) { int i; int c;
for ( ; count > 0; count--, string++ ) { c = *string;
if ( c == ESC ) { // check for jis (iso-2022-jp)
if ( count < 3 ) return CODE_UNKNOWN; c = *++string; count--; if ( c == KANJI_IN_1ST_CHAR && ( *(string+1) == KANJI_IN_2ND_CHAR1 || // ESC $ B
*(string+1) == KANJI_IN_2ND_CHAR2 )) // ESC $ @
return CODE_JPN_JIS; else if ( c == KANJI_OUT_1ST_CHAR && ( *(string+1) == KANJI_OUT_2ND_CHAR1 || // ESC ( B
*(string+1) == KANJI_OUT_2ND_CHAR2 )) // ESC ( J
return CODE_JPN_JIS; else return CODE_UNKNOWN;
} else if ( (0x81 <= c && c <= 0x8d) || (0x8f <= c && c <= 0x9f) ) { // 1
// found sjis 1st
return CODE_JPN_SJIS;
} else if ( 0x8e == c ) { // 2
// found sjis 1st || euc Kana 1st (SS2)
GetNextChar( CODE_UNKNOWN ) if ( (0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xa0) || (0xe0 <= c && c <= 0xfc) ) // 2-1
// found sjis 2nd
return CODE_JPN_SJIS; else if ( 0xa1 <= c && c <= 0xdf ) // 2-2
// found sjis 2nd || euc Kana 2nd (sjis || euc)
continue; else // illegal character code sequence
return CODE_UNKNOWN;
} else if ( 0xf0 <= c && c <= 0xfe ) { // 4
// found euc 1st
return CODE_JPN_EUC;
} else if ( 0xe0 <= c && c <= 0xef ) { // 5
// found sjis 1st || euc 1st
GetNextChar( CODE_UNKNOWN ) if ( (0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xa0) ) // 5-1
// found sjis 2nd
return CODE_JPN_SJIS; else if ( 0xfd <= c && c <= 0xfe ) // 5-2
// found euc 2nd
return CODE_JPN_EUC; else if ( 0xa1 <= c && c <= 0xfc ) // 5-3
// found sjis 2nd || euc 2nd (sjis || euc)
continue; else // illegal character code sequence
return CODE_UNKNOWN;
} else if ( 0xa1 <= c && c <= 0xdf ) { // 3
// found sjis Kana || euc 1st
GetNextChar( CODE_JPN_SJIS ) if ( c <= 0x9f ) // 3-4
// not euc 2nd byte
return CODE_JPN_SJIS; else if ( 0xa1 <= c && c <= 0xdf ) // 3-2
// found sjis kana || euc 2nd (sjis || euc)
continue; else if ( 0xe0 <= c && c <= 0xef ) { // 3-3
// found sjis 1st || euc 2nd
sjis1stOReuc2nd: GetNextChar( CODE_JPN_EUC ) if ( 0xfd <= c && c <= 0xfe ) // 3-3-5
// found euc 1st
return CODE_JPN_EUC; else if ( (0x80 <= c && c <= 0x8d) || (0x8f <= c && c <= 0xa0) ) // 3-3-2
// found sjis 2nd
return CODE_JPN_SJIS; else if ( 0x40 <= c && c <= 0x7e ) // 3-3-1
// found sjis 2nd || sbcs (sjis || euc)
continue; else if ( 0x8e == c ) { // 3-3-3
// found sjis 2nd || euc kana 1st
GetNextChar( CODE_JPN_SJIS ) if ( 0xa1 <= c && c <= 0xdf ) // found sjis Kana || euc Kana 2nd (sjis || euc)
continue; else // not found euc kana 2nd
return CODE_JPN_SJIS; } else if ( 0xa1 <= c && c <= 0xfc ) { // 3-3-4
// found sjis 2nd || euc 1st
GetNextChar( CODE_JPN_SJIS ) if ( 0xa1 <= c && c <= 0xdf ) // 3-3-4-1
// found sjis kana || euc 2nd (sjis || euc)
continue; if ( 0xe0 <= c && c <= 0xef ) // 3-3-4-2
// found sjis 1st || euc 2nd
goto sjis1stOReuc2nd; if ( 0xf0 <= c && c <= 0xfe ) // 3-3-4-3
// found euc 2nd
return CODE_JPN_EUC; else // not found euc 2nd
return CODE_JPN_SJIS; } else // not found sjis 2nd
return CODE_JPN_EUC; } else if ( 0xf0 <= c && c <= 0xfe ) // 3-1
return CODE_JPN_EUC; else return CODE_UNKNOWN; } }
return CODE_ONLY_SBCS;
// |<-----sjis1st---->| |<-sjisKana->|<-sjis1st->|
// ss2 |<------euc1st-------------------->|
// |81 8d|8e|8f 9f|a0|a1 df|e0 ef|f0 fe|
// |<--1-->|2 |<--1-->| |<-----3---->|<----5---->|<---4--->|
// case 1 sjis
// case 4 euc
// case 2
// |<---sjis2nd--->| |<------sjis2nd---------------------->|
// |<-eucKana2nd->|
// |40 7e|7f|80 a0|a1 df|e0 fc|
// |<-------1----->| |<----1--->|<------2----->|<----1---->|
// case 5
// |<----sjis2nd----->| |<---------sjis2nd---------------->|
// |<--------euc2nd------>|
// |40 7e|7f|80 a0|a1 fc|fd fe|
// |<--------1------->| |<----1---->|<-------3------>|<-2->|
// case 3
// |<-----sjis1st---->| |<-sjisKana->|<-sjis1st->|
// |<------euc2nd-------------------->|
// |81 9f|a0|a1 df|e0 ef|f0 fe|
// <--------4--------->| |<-----2---->|<----3---->|<--1---->|
// case 3-3
// |<--sjis2nd-->| |<------------sjis2nd------------------>|
// ss2 |<------euc1st------>|
// |40 7e|7f|80 8d|8e|8f a0|a1 fc|fd fe|
// |<-----1----->| |<--2-->|3 |<--2-->|<------4----->|<-5->|
// case 3-3-4
// |<-sjisKana->|<-sjis1st->|
// |<------euc2nd-------------------->|
// |a1 df|e0 ef|f0 fe|
// |<-----1---->|<----2---->|<--3---->|
#if 0 // old code
for ( i = 0 ; i < count ; i++, string++ ) { if ( *string == ESC ) { if ( *(string+1) == KANJI_IN_1ST_CHAR && ( *(string+2) == KANJI_IN_2ND_CHAR1 || // ESC $ B
*(string+2) == KANJI_IN_2ND_CHAR2 )) { // ESC $ @
return CODE_JPN_JIS; } if ( *(string+1) == KANJI_OUT_1ST_CHAR && ( *(string+2) == KANJI_OUT_2ND_CHAR1 || // ESC ( B
*(string+2) == KANJI_OUT_2ND_CHAR2 )) { // ESC ( J
return CODE_JPN_JIS; } } else if ( *(string) >= 0x0081) { if ( *(string) < 0x00a0 ) { return CODE_JPN_SJIS; } else if ( *(string) < 0x00e0 || *(string) > 0x00ef) { return CODE_JPN_EUC; } if ( *(string+1) < 0x00a1) { return CODE_JPN_SJIS; } else if ( *(string+1) > 0x00fc) { return CODE_JPN_EUC; } } } return CODE_ONLY_SBCS; #endif // 0
}
|