You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
216 lines
6.1 KiB
216 lines
6.1 KiB
/****************************************************************************
|
|
|
|
PROGRAM: MkUni.c
|
|
|
|
PURPOSE: Creates a text file with unicode characters
|
|
|
|
FUNCTIONS:
|
|
|
|
****************************************************************************/
|
|
|
|
#include <windows.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <tchar.h>
|
|
#include "mkuni.h"
|
|
|
|
|
|
#define REVERSE 0
|
|
#define LINE_SIZE 1000
|
|
|
|
#define ASCIIEOL TEXT("\r\n")
|
|
#define UNILINESEP 0x2028
|
|
#define UNIPARASEP 0x2029
|
|
|
|
struct __range {
|
|
int low;
|
|
int high;
|
|
LPTSTR pDes;
|
|
} range[] = {
|
|
{0x20, 0x7f, TEXT("ANSI") },
|
|
{0xa0, 0xff, TEXT("Latin") },
|
|
{0x100, 0x17f, TEXT("European Latin") },
|
|
{0x180, 0x1f0, TEXT("Extended Latin") },
|
|
{0x250, 0x2a8, TEXT("Standard Phonetic") },
|
|
{0x2b0, 0x2e9, TEXT("Modifier Letters") },
|
|
{0x300, 0x341, TEXT("Generic Diacritical") },
|
|
{0x370, 0x3f5, TEXT("Greek") },
|
|
{0x400, 0x486, TEXT("Cyrillic") },
|
|
{0x490, 0x4cc, TEXT("Extended Cyrillic") },
|
|
{0x5b0, 0x5f5, TEXT("Hebrew") },
|
|
{0x0600,0x06F9, TEXT("Arabic") },
|
|
{0x0900,0x0970, TEXT("Devanagari") },
|
|
{0x0E00,0x0E5B, TEXT("Thai") },
|
|
{0x1000,0x104C, TEXT("Tibetan") },
|
|
{0x10A0,0x10FB, TEXT("Georgian") },
|
|
{0x20a0,0x20aa, TEXT("Currency Symbols") },
|
|
{0x2100,0x2138, TEXT("Letterlike Symbols") },
|
|
{0x2153,0x2182, TEXT("Number Forms") },
|
|
{0x2190,0x21ea, TEXT("Arrows") },
|
|
{0x2200,0x22f1, TEXT("Math Operators") },
|
|
{0x2500,0x257F, TEXT("Form and Chart Components") },
|
|
{0x25A0,0x25EE, TEXT("Geometric Shapes") },
|
|
{0x2600,0x266F, TEXT("Miscellaneous Dingbats") },
|
|
{0x3000,0x303F, TEXT("CJK Symbols and Punctuations") },
|
|
{0x3040,0x309E, TEXT("Hiragana") },
|
|
{0x3100,0x312C, TEXT("Bopomofo") },
|
|
{0x3131,0x318E, TEXT("Hangul Elements") },
|
|
{0, 0, TEXT("terminating entry") },
|
|
};
|
|
|
|
/****************************************************************************
|
|
|
|
FUNCTION: putu(FILE*pf, TCHAR c)
|
|
|
|
PURPOSE: writes a character to the file.
|
|
(Reverses the order of leadbytes if the flag is set)
|
|
|
|
****************************************************************************/
|
|
|
|
void
|
|
putu(FILE*pf, TCHAR c)
|
|
{
|
|
TCHAR chr=c;
|
|
|
|
if( REVERSE )
|
|
chr= ( c<<8 ) + ( ( c>>8 ) &0xFF);
|
|
|
|
|
|
fwrite((void*)&chr, 1, sizeof(TCHAR), pf);
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
FUNCTION: putust(FILE*pf, LPTSTR pc)
|
|
|
|
PURPOSE: writes a string to the file.
|
|
|
|
****************************************************************************/
|
|
|
|
void
|
|
putust(FILE*pf, LPTSTR pc)
|
|
{
|
|
while (*pc)
|
|
putu(pf, *pc++);
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
FUNCTION: main(int, char**)
|
|
|
|
PURPOSE: write sample unicode file
|
|
|
|
****************************************************************************/
|
|
|
|
int _cdecl main(int argc, char**argv)
|
|
{
|
|
struct __range*pr = range;
|
|
int i;
|
|
FILE *pf;
|
|
FILE *pfo;
|
|
char lpstrLine[LINE_SIZE];
|
|
|
|
if(!(pf = fopen("unicode.txt", "wb")))
|
|
return FALSE;
|
|
|
|
// Task1: Write all the unicode ranges and all the characters
|
|
// in those ranges to the output file.
|
|
putu(pf, (TCHAR)0xfeff);
|
|
while (pr->low != 0) {
|
|
putust(pf, TEXT("<<< "));
|
|
putust(pf, pr->pDes);
|
|
putust(pf, TEXT(" >>>"));
|
|
putust(pf, ASCIIEOL );
|
|
for (i=pr->low ; i<=pr->high ; i++)
|
|
putu(pf, (TCHAR)i);
|
|
putust(pf, ASCIIEOL);
|
|
pr++;
|
|
}
|
|
|
|
putust(pf, TEXT("Unicode Line separator here ->"));
|
|
putu(pf, UNILINESEP );
|
|
putust(pf, TEXT("<- Unicode line separator"));
|
|
putust( pf, ASCIIEOL );
|
|
|
|
putust(pf, TEXT("Unicode Paragraph separator here ->"));
|
|
putu(pf, UNIPARASEP );
|
|
putust(pf, TEXT("<- Unicode paragraph separator"));
|
|
putust( pf, ASCIIEOL );
|
|
|
|
fclose( pf );
|
|
|
|
// Task2: Write all the characters codes and information
|
|
// on each character code to an output file.
|
|
if (!(pf = fopen( "names2.txt", "r" )))
|
|
return FALSE;
|
|
|
|
if (!(pfo = fopen("unicodes.txt", "wb")))
|
|
return FALSE;
|
|
|
|
// The first character should be 0xFEFF in the file,
|
|
// indicating that it's an unicode file.
|
|
putu( pfo, (TCHAR)0xfeff);
|
|
|
|
// Read the input file (names2.txt) which has information
|
|
// on every unicode character.
|
|
do
|
|
{
|
|
WCHAR wLineBuffer[LINE_SIZE];
|
|
int i, num;
|
|
|
|
if (!memset(lpstrLine, 0, LINE_SIZE))
|
|
{
|
|
_tprintf(TEXT("Something wrong - failed in Memset!!\n") );
|
|
break;
|
|
}
|
|
|
|
// fgets returns NULL on eof or on an error condition
|
|
if( fgets( lpstrLine, LINE_SIZE, pf) == NULL )
|
|
{
|
|
if (!feof(pf))
|
|
_tprintf(TEXT("Error occured while reading names2.txt.\n") );
|
|
|
|
break;
|
|
}
|
|
|
|
i = 0;
|
|
|
|
// Find the first newline (if there is any) and replace it by \0.
|
|
while((lpstrLine[i]!= '\n') && (lpstrLine[i]!='\r') && (lpstrLine[i]!='\0'))
|
|
{
|
|
i++;
|
|
}
|
|
|
|
lpstrLine[i]= '\0';
|
|
|
|
// If the line has the character code (for which info is given)
|
|
// grab and "display" that.
|
|
num= -1;
|
|
sscanf( lpstrLine, "%x", &num);
|
|
|
|
if( num != -1 )
|
|
{
|
|
putu( pfo, (TCHAR) num );
|
|
putust( pfo, TEXT(": ") );
|
|
}
|
|
else
|
|
{
|
|
putust( pfo,TEXT(" ") );
|
|
}
|
|
// Convert it to the world of unicodes.
|
|
if (MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, lpstrLine, -1,
|
|
wLineBuffer, LINE_SIZE ))
|
|
putust(pfo, wLineBuffer);
|
|
|
|
putust( pfo, ASCIIEOL );
|
|
|
|
}
|
|
while( TRUE );
|
|
|
|
fclose( pfo );
|
|
fclose( pf );
|
|
|
|
return 1;
|
|
}
|