windows-server-2003/shell/osshell/accesory/notepad/mkuni/mkuni.c

/****************************************************************************

    PROGRAM: MkUni.c

    PURPOSE: Creates a text file with unicode characters

    FUNCTIONS:

****************************************************************************/

#include <windows.h>
#include <string.h>
#include <stdio.h>
#include <tchar.h>
#include "mkuni.h"


#define REVERSE    0
#define LINE_SIZE  1000

#define ASCIIEOL TEXT("\r\n")
#define UNILINESEP 0x2028
#define UNIPARASEP 0x2029

struct __range {
	int	low;
	int	high;
	LPTSTR	pDes;
} range[] = {
            {0x20,  0x7f,   TEXT("ANSI") },
            {0xa0,  0xff,   TEXT("Latin") },
            {0x100, 0x17f,  TEXT("European Latin") },
            {0x180, 0x1f0,  TEXT("Extended Latin") },
            {0x250, 0x2a8,  TEXT("Standard Phonetic") },
            {0x2b0, 0x2e9,  TEXT("Modifier Letters") },
            {0x300, 0x341,  TEXT("Generic Diacritical") },
            {0x370, 0x3f5,  TEXT("Greek") },
            {0x400, 0x486,  TEXT("Cyrillic") },
            {0x490, 0x4cc,  TEXT("Extended Cyrillic") },
            {0x5b0, 0x5f5,  TEXT("Hebrew") },
            {0x0600,0x06F9, TEXT("Arabic") },
            {0x0900,0x0970, TEXT("Devanagari") },
            {0x0E00,0x0E5B, TEXT("Thai") },
            {0x1000,0x104C, TEXT("Tibetan") },
            {0x10A0,0x10FB, TEXT("Georgian") },
            {0x20a0,0x20aa, TEXT("Currency Symbols") },
            {0x2100,0x2138, TEXT("Letterlike Symbols") },
            {0x2153,0x2182, TEXT("Number Forms") },
            {0x2190,0x21ea, TEXT("Arrows") },
            {0x2200,0x22f1, TEXT("Math Operators") },
            {0x2500,0x257F, TEXT("Form and Chart Components") },
            {0x25A0,0x25EE, TEXT("Geometric Shapes") },
            {0x2600,0x266F, TEXT("Miscellaneous Dingbats") },
            {0x3000,0x303F, TEXT("CJK Symbols and Punctuations") },            
            {0x3040,0x309E, TEXT("Hiragana") },
            {0x3100,0x312C, TEXT("Bopomofo") },
            {0x3131,0x318E, TEXT("Hangul Elements") },
            {0,     0,      TEXT("terminating entry") },
            };

/****************************************************************************

    FUNCTION: putu(FILE*pf, TCHAR c)

    PURPOSE: writes a character to the file.
             (Reverses the order of leadbytes if the flag is set)

****************************************************************************/

void
putu(FILE*pf, TCHAR c)
{
    TCHAR chr=c;

    if( REVERSE )
        chr= ( c<<8 ) + ( ( c>>8 ) &0xFF);


	fwrite((void*)&chr, 1, sizeof(TCHAR), pf);
}


/****************************************************************************

    FUNCTION: putust(FILE*pf, LPTSTR pc)

    PURPOSE: writes a string to the file.

****************************************************************************/

void
putust(FILE*pf, LPTSTR pc)
{
	while (*pc)
		putu(pf, *pc++);
}


/****************************************************************************

    FUNCTION: main(int, char**)

    PURPOSE: write sample unicode file

****************************************************************************/

int _cdecl main(int argc, char**argv)
{
    struct __range*pr = range;
    int	    i;
    FILE    *pf;
    FILE    *pfo;
    char    lpstrLine[LINE_SIZE];

    if(!(pf = fopen("unicode.txt", "wb")))
        return FALSE;

    // Task1: Write all the unicode ranges and all the characters 
    // in those ranges to the output file.
    putu(pf, (TCHAR)0xfeff);
    while (pr->low != 0) {
    	putust(pf, TEXT("<<< "));
    	putust(pf, pr->pDes);
    	putust(pf, TEXT(" >>>"));
    	putust(pf, ASCIIEOL );
    	for (i=pr->low ; i<=pr->high ; i++)
    	    putu(pf, (TCHAR)i);
    	putust(pf, ASCIIEOL);
    	pr++;
    }

    putust(pf, TEXT("Unicode Line separator here ->"));
    putu(pf, UNILINESEP );
    putust(pf, TEXT("<- Unicode line separator"));
    putust( pf, ASCIIEOL );
    
    putust(pf, TEXT("Unicode Paragraph separator here ->"));
    putu(pf, UNIPARASEP );
    putust(pf, TEXT("<- Unicode paragraph separator"));
    putust( pf, ASCIIEOL );

    fclose( pf );

    // Task2: Write all the characters codes and information
    // on each character code to an output file.
    if (!(pf = fopen( "names2.txt", "r" )))
        return FALSE;

    if (!(pfo = fopen("unicodes.txt", "wb")))
        return FALSE;

    // The first character should be 0xFEFF in the file, 
    // indicating that it's an unicode file.
    putu( pfo, (TCHAR)0xfeff);

    // Read the input file (names2.txt) which has information
    // on every unicode character.
    do
    {
    WCHAR wLineBuffer[LINE_SIZE];
    int i, num;

        if (!memset(lpstrLine, 0, LINE_SIZE))
        {
            _tprintf(TEXT("Something wrong - failed in Memset!!\n") );
            break;
        }
       
        // fgets returns NULL on eof or on an error condition
        if( fgets( lpstrLine, LINE_SIZE, pf) == NULL )
        {
            if (!feof(pf))
               _tprintf(TEXT("Error occured while reading names2.txt.\n") );

            break;
        }

        i = 0;
        
        // Find the first newline (if there is any) and replace it by \0.
        while((lpstrLine[i]!= '\n') && (lpstrLine[i]!='\r') && (lpstrLine[i]!='\0'))
        {
            i++;
        }

        lpstrLine[i]= '\0';
       
        // If the line has the character code (for which info is given)
        // grab and "display" that.
        num= -1;
        sscanf( lpstrLine, "%x", &num);

        if( num != -1 )
        {
            putu( pfo, (TCHAR) num );
            putust( pfo, TEXT(": ") );
        }
        else
        {
            putust( pfo,TEXT("   ") );
        }
        // Convert it to the world of unicodes.
        if (MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, lpstrLine, -1, 
                        wLineBuffer, LINE_SIZE ))
            putust(pfo, wLineBuffer);

        putust( pfo, ASCIIEOL );

    }
    while( TRUE );

    fclose( pfo );
    fclose( pf );

    return 1;
}