/**************************************************************************** PROGRAM: MkUni.c PURPOSE: Creates a text file with unicode characters FUNCTIONS: ****************************************************************************/ #include #include #include #include #include "mkuni.h" #define REVERSE 0 #define LINE_SIZE 1000 #define ASCIIEOL TEXT("\r\n") #define UNILINESEP 0x2028 #define UNIPARASEP 0x2029 struct __range { int low; int high; LPTSTR pDes; } range[] = { {0x20, 0x7f, TEXT("ANSI") }, {0xa0, 0xff, TEXT("Latin") }, {0x100, 0x17f, TEXT("European Latin") }, {0x180, 0x1f0, TEXT("Extended Latin") }, {0x250, 0x2a8, TEXT("Standard Phonetic") }, {0x2b0, 0x2e9, TEXT("Modifier Letters") }, {0x300, 0x341, TEXT("Generic Diacritical") }, {0x370, 0x3f5, TEXT("Greek") }, {0x400, 0x486, TEXT("Cyrillic") }, {0x490, 0x4cc, TEXT("Extended Cyrillic") }, {0x5b0, 0x5f5, TEXT("Hebrew") }, {0x0600,0x06F9, TEXT("Arabic") }, {0x0900,0x0970, TEXT("Devanagari") }, {0x0E00,0x0E5B, TEXT("Thai") }, {0x1000,0x104C, TEXT("Tibetan") }, {0x10A0,0x10FB, TEXT("Georgian") }, {0x20a0,0x20aa, TEXT("Currency Symbols") }, {0x2100,0x2138, TEXT("Letterlike Symbols") }, {0x2153,0x2182, TEXT("Number Forms") }, {0x2190,0x21ea, TEXT("Arrows") }, {0x2200,0x22f1, TEXT("Math Operators") }, {0x2500,0x257F, TEXT("Form and Chart Components") }, {0x25A0,0x25EE, TEXT("Geometric Shapes") }, {0x2600,0x266F, TEXT("Miscellaneous Dingbats") }, {0x3000,0x303F, TEXT("CJK Symbols and Punctuations") }, {0x3040,0x309E, TEXT("Hiragana") }, {0x3100,0x312C, TEXT("Bopomofo") }, {0x3131,0x318E, TEXT("Hangul Elements") }, {0, 0, TEXT("terminating entry") }, }; /**************************************************************************** FUNCTION: putu(FILE*pf, TCHAR c) PURPOSE: writes a character to the file. (Reverses the order of leadbytes if the flag is set) ****************************************************************************/ void putu(FILE*pf, TCHAR c) { TCHAR chr=c; if( REVERSE ) chr= ( c<<8 ) + ( ( c>>8 ) &0xFF); fwrite((void*)&chr, 1, sizeof(TCHAR), pf); } /**************************************************************************** FUNCTION: putust(FILE*pf, LPTSTR pc) PURPOSE: writes a string to the file. ****************************************************************************/ void putust(FILE*pf, LPTSTR pc) { while (*pc) putu(pf, *pc++); } /**************************************************************************** FUNCTION: main(int, char**) PURPOSE: write sample unicode file ****************************************************************************/ int _cdecl main(int argc, char**argv) { struct __range*pr = range; int i; FILE *pf; FILE *pfo; char lpstrLine[LINE_SIZE]; if(!(pf = fopen("unicode.txt", "wb"))) return FALSE; // Task1: Write all the unicode ranges and all the characters // in those ranges to the output file. putu(pf, (TCHAR)0xfeff); while (pr->low != 0) { putust(pf, TEXT("<<< ")); putust(pf, pr->pDes); putust(pf, TEXT(" >>>")); putust(pf, ASCIIEOL ); for (i=pr->low ; i<=pr->high ; i++) putu(pf, (TCHAR)i); putust(pf, ASCIIEOL); pr++; } putust(pf, TEXT("Unicode Line separator here ->")); putu(pf, UNILINESEP ); putust(pf, TEXT("<- Unicode line separator")); putust( pf, ASCIIEOL ); putust(pf, TEXT("Unicode Paragraph separator here ->")); putu(pf, UNIPARASEP ); putust(pf, TEXT("<- Unicode paragraph separator")); putust( pf, ASCIIEOL ); fclose( pf ); // Task2: Write all the characters codes and information // on each character code to an output file. if (!(pf = fopen( "names2.txt", "r" ))) return FALSE; if (!(pfo = fopen("unicodes.txt", "wb"))) return FALSE; // The first character should be 0xFEFF in the file, // indicating that it's an unicode file. putu( pfo, (TCHAR)0xfeff); // Read the input file (names2.txt) which has information // on every unicode character. do { WCHAR wLineBuffer[LINE_SIZE]; int i, num; if (!memset(lpstrLine, 0, LINE_SIZE)) { _tprintf(TEXT("Something wrong - failed in Memset!!\n") ); break; } // fgets returns NULL on eof or on an error condition if( fgets( lpstrLine, LINE_SIZE, pf) == NULL ) { if (!feof(pf)) _tprintf(TEXT("Error occured while reading names2.txt.\n") ); break; } i = 0; // Find the first newline (if there is any) and replace it by \0. while((lpstrLine[i]!= '\n') && (lpstrLine[i]!='\r') && (lpstrLine[i]!='\0')) { i++; } lpstrLine[i]= '\0'; // If the line has the character code (for which info is given) // grab and "display" that. num= -1; sscanf( lpstrLine, "%x", &num); if( num != -1 ) { putu( pfo, (TCHAR) num ); putust( pfo, TEXT(": ") ); } else { putust( pfo,TEXT(" ") ); } // Convert it to the world of unicodes. if (MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, lpstrLine, -1, wLineBuffer, LINE_SIZE )) putust(pfo, wLineBuffer); putust( pfo, ASCIIEOL ); } while( TRUE ); fclose( pfo ); fclose( pf ); return 1; }