windows-xp/Source/XPSP1/NT/windows/richedit/re30/rtflex.cpp


								/*

								 *	@doc INTERNAL

								 *

								 *	@module RTFLEX.CPP - RichEdit RTF reader lexical analyzer |

								 *

								 *		This file contains the implementation of the lexical analyzer part of

								 *		the RTF reader.

								 *

								 *	Authors: <nl>

								 *		Original RichEdit 1.0 RTF converter: Anthony Francisco <nl>

								 *		Conversion to C++ and RichEdit 2.0:  Murray Sargent <nl>

								 *

								 *	@devnote

								 *		All sz's in the RTF*.? files refer to a LPSTRs, not LPTSTRs, unless

								 *		noted as a szUnicode.

								 *

								 *	Copyright (c) 1995-1997, Microsoft Corporation. All rights reserved.

								 */


								#include "_common.h"

								#include "_rtfread.h"

								#include "hash.h"


								ASSERTDATA


								#include "tokens.cpp"


								// Array used by character classification macros to speed classification

								// of chars residing in two or more discontiguous ranges, e.g., alphanumeric

								// or hex.  The alphabetics used in RTF control words are lower-case ASCII.

								// *** DO NOT DBCS rgbCharClass[] ***


								#define	fCS		fCT + fSP

								#define fSB		fBL + fSP

								#define fHD		fHX + fDG

								#define	fHU		fHX + fUC

								#define	fHL		fHX + fLC


								const BYTE rgbCharClass[256] =

								{

									fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT, fCT,fCS,fCS,fCS,fCS,fCS,fCT,fCT,

									fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT, fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT,

									fSB,fPN,fPN,fPN,fPN,fPN,fPN,fPN, fPN,fPN,fPN,fPN,fPN,fPN,fPN,fPN,

									fHD,fHD,fHD,fHD,fHD,fHD,fHD,fHD, fHD,fHD,fPN,fPN,fPN,fPN,fPN,fPN,


									fPN,fHU,fHU,fHU,fHU,fHU,fHU,fUC, fUC,fUC,fUC,fUC,fUC,fUC,fUC,fUC,

									fUC,fUC,fUC,fUC,fUC,fUC,fUC,fUC, fUC,fUC,fUC,fPN,fPN,fPN,fPN,fPN,

									fPN,fHL,fHL,fHL,fHL,fHL,fHL,fLC, fLC,fLC,fLC,fLC,fLC,fLC,fLC,fLC,

									fLC,fLC,fLC,fLC,fLC,fLC,fLC,fLC, fLC,fLC,fLC,fPN,fPN,fPN,fPN,fPN,


									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,


									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

									0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

								};


								const char szRTFSig[] = "rtf";

								#define cchRTFSig   3

								#define cbRTFSig    (cchRTFSig * sizeof(char))


								// Specifies the number of bytes we can safely "UngetChar"

								// before possibly underflowing the buffer.

								const int cbBackupMax = 4;


								// Bug2298 - I found an RTF writer which emits uppercase RTF keywords,

								// 			so I had to change IsLCAscii to IsAlphaChar for use in scanning

								//			for RTF keywords.

								inline BOOL IsAlphaChar(BYTE b)

								{

									return IN_RANGE('a', b, 'z') || IN_RANGE('A', b, 'Z');

								}


								// Quick and dirty tolower(b)

								inline BYTE REToLower(BYTE b)

								{

									Assert(!b || IsAlphaChar(b));

									return b ? (BYTE)(b | 0x20) : 0;

								}


								extern BOOL IsRTF(char *pstr);


								BOOL IsRTF(

									char *pstr)

								{

									if(!pstr || *pstr++ != '{' || *pstr++ != '\\')

										return FALSE;					// Quick out for most common cases


									if(*pstr == 'u')					// Bypass u of possible urtf

										pstr++;


									return !CompareMemory(szRTFSig, pstr, cbRTFSig);

								}


								/*

								 *	CRTFRead::InitLex()

								 *

								 *	@mfunc

								 *		Initialize the lexical analyzer. Reset the variables. if reading in

								 *		from resource file, sort the keyword list (). Uses global hinstRE

								 *		from the RichEdit to find out where its resources are.  Note: in

								 *		RichEdit 2.0, currently the resource option is not supported.

								 *

								 *	@rdesc

								 *		TRUE				If lexical analyzer was initialized

								 */

								BOOL CRTFRead::InitLex()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::InitLex");


									AssertSz(cKeywords == i_TokenIndexMax,

										"Keyword index enumeration is incompatible with rgKeyword[]");

									Assert(!_szText && !_pchRTFBuffer);


									// Allocate our buffers with an extra byte for szText so that hex

									// conversion doesn't have to worry about running off the end if the

									// first char is NULL

									if ((_szText	   = (BYTE *)PvAlloc(cachTextMax + 1, GMEM_ZEROINIT)) &&

										(_pchRTFBuffer = (BYTE *)PvAlloc(cachBufferMost, GMEM_ZEROINIT)))

									{

										return TRUE;					// Signal that lexer is initialized

									}


									_ped->GetCallMgr()->SetOutOfMemory();

									_ecParseError = ecLexInitFailed;

									return FALSE;

								}


								/*

								 *	CRTFRead::DeinitLex()

								 *

								 *	@mfunc

								 *		Shut down lexical analyzer

								 */

								void CRTFRead::DeinitLex()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::DeinitLex");


								#ifdef KEYWORD_RESOURCE

									if (hglbKeywords)

									{

										FreeResource(hglbKeywords);

										hglbKeywords = NULL;

										rgKeyword = NULL;

									}

								#endif


									FreePv(_szText);

									FreePv(_pchRTFBuffer);

								}


								/*

								 *	CRTFRead::GetChar()

								 *

								 *	@mfunc

								 *		Get next char, filling buffer as needed

								 *

								 *	@rdesc

								 *		BYTE			nonzero char value if success; else 0

								 */

								BYTE CRTFRead::GetChar()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetChar");


									if (_pchRTFCurrent == _pchRTFEnd && !FillBuffer())

									{

										_ecParseError = ecUnexpectedEOF;

										return 0;

									}

									return *_pchRTFCurrent++;

								}


								/*

								 *	CRTFRead::FillBuffer()

								 *

								 *	@mfunc

								 *		Fill RTF buffer & return != 0 if successful

								 *

								 *	@rdesc

								 *		LONG			# chars read

								 *

								 *	@comm

								 *		This routine doesn't bother copying anything down if

								 *		pchRTFCurrent <lt> pchRTFEnd so anything not read yet is lost.

								 *		The only exception to this is that it always copies down the

								 *		last two bytes read so that UngetChar() will work. ReadData()

								 *		actually counts on this behavior, so if you change it, change

								 *		ReadData() accordingly.

								 */

								LONG CRTFRead::FillBuffer()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::FillBuffer");


									LONG cchRead;


									if (!_pchRTFCurrent)

									{

										// No data yet, nothing for backup

										// Leave cbBackupMax NULL chars so backup

										// area of buffer doesn't contain garbage.


										for(int i = 0; i < cbBackupMax; i++)

										{

											_pchRTFBuffer[i] = 0;

										}

									}

									else

									{

										Assert(_pchRTFCurrent == _pchRTFEnd);


										// Copy most recently read chars in case

										//  we need to back up


										int cbBackup = min((UINT) cbBackupMax, DiffPtrs(_pchRTFCurrent, &_pchRTFBuffer[cbBackupMax]));

										int i;


										for(i = -1; i >= -cbBackup; i--)

											_pchRTFBuffer[cbBackupMax + i] = _pchRTFCurrent[i];


										if(cbBackup < cbBackupMax)

										{

											// NULL before the first valid character in the backup buffer

											_pchRTFBuffer[cbBackupMax + i] = 0;

										}

									}

									_pchRTFCurrent = &_pchRTFBuffer[cbBackupMax];


									// Fill buffer with as much as we can take given our starting offset

									_pes->dwError = _pes->pfnCallback(_pes->dwCookie,

																	  _pchRTFCurrent,

																	  cachBufferMost - cbBackupMax,

																	  &cchRead);

									if (_pes->dwError)

									{

										TRACEERRSZSC("RTFLEX: GetChar()", _pes->dwError);

										_ecParseError = ecGeneralFailure;

										return 0;

									}


									_pchRTFEnd = &_pchRTFBuffer[cbBackupMax + cchRead];		// Point the end


								#if defined(DEBUG) && !defined(MACPORT)

									if(_hfileCapture)

									{

										DWORD cbLeftToWrite = cchRead;

										DWORD cbWritten = 0;

										BYTE *pbToWrite = (BYTE *)_pchRTFCurrent;


										while(WriteFile(_hfileCapture,

														pbToWrite,

														cbLeftToWrite,

														&cbWritten,

														NULL) &&

														(pbToWrite += cbWritten,

														(cbLeftToWrite -= cbWritten)));

									}

								#endif


									return cchRead;

								}


								/*

								 *	CRTFRead::UngetChar()

								 *

								 *	@mfunc

								 *		Bump our file pointer back one char

								 *

								 *	@rdesc

								 *		BOOL				TRUE on success

								 *

								 *	@comm

								 *		You can safely UngetChar _at most_ cbBackupMax times without

								 *		error.

								 */

								BOOL CRTFRead::UngetChar()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::UngetChar");


									if (_pchRTFCurrent == _pchRTFBuffer || !_pchRTFCurrent)

									{

										Assert(0);

										_ecParseError = ecUnGetCharFailed;

										return FALSE;

									}


									--_pchRTFCurrent;

									return TRUE;

								}


								/*

								 *	CRTFRead::UngetChar(cch)

								 *

								 *	@mfunc

								 *		Bump our file pointer back 'cch' chars

								 *

								 *	@rdesc

								 *		BOOL				TRUE on success

								 *

								 *	@comm

								 *		You can safely UngetChar _at most_ cbBackupMax times without

								 *		error.

								 */

								BOOL CRTFRead::UngetChar(UINT cch)

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::UngetChar");


									AssertSz(cch <= cbBackupMax, "CRTFRead::UngetChar():  Number of UngetChar's "

																"exceeds size of backup buffer.");


									while(cch-- > 0)

									{

										if(!UngetChar())

											return FALSE;

									}


									return TRUE;

								}


								/*

								 *	CRTFRead::GetHex()

								 *

								 *	@mfunc

								 *		Get next char if hex and return hex value

								 *		If not hex, leave char in buffer and return 255

								 *

								 *	@rdesc

								 *		BYTE			hex value of GetChar() if hex; else 255

								 */

								BYTE CRTFRead::GetHex()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetHex");


									BYTE ch = GetChar();


									if(IsXDigit(ch))

										return (BYTE)(ch <= '9' ? ch - '0' : (ch & 0x4f) - 'A' + 10);

									if(ch)

										UngetChar();

									return 255;

								}


								/*

								 *	CRTFRead::GetHexSkipCRLF()

								 *

								 *	@mfunc

								 *		Get next char if hex and return hex value

								 *		If not hex, leave char in buffer and return 255

								 *

								 *	@rdesc

								 *		BYTE			hex value of GetChar() if hex; else 255

								 *

								 *	@devnote

								 *		Keep this in sync with GetHex above.

								 */

								BYTE CRTFRead::GetHexSkipCRLF()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetHexSkipCRLF");


									BYTE ch = GetChar();


									// Skip \r \n

									while(ch == CR || ch == LF)

										ch = GetChar();


									// Rest is same as CRTFRead::GetHex()

									if(IsXDigit(ch))

										return (BYTE)(ch <= '9' ? ch - '0' : (ch & 0x4f) - 'A' + 10);

									if(ch)

										UngetChar();

									return 255;

								}


								/*

								 *	CRTFRead::TokenGetHex()

								 *

								 *	@mfunc

								 *		Get an 8 bit character saved as a 2 hex digit value

								 *

								 *	@rdesc

								 *		TOKEN			value of hex number read in

								 */

								TOKEN CRTFRead::TokenGetHex()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetHex");


									BYTE bChar0 = GetHex();

									BYTE bChar1;


									if(bChar0 < 16 && (bChar1 = GetHex()) < 16)

										_token = (WORD)(bChar0 << 4 | bChar1);

									else

										_token = tokenError;


									return _token;

								}


								/*

								 *	CRTFRead::SkipToEndOfGroup()

								 *

								 *	@mfunc

								 *		Skip to end of current group

								 *

								 *	@rdesc

								 *		EC				An error code

								 */

								EC CRTFRead::SkipToEndOfGroup()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::SkipToEndOfGroup");


									INT		nDepth = 1;

									BYTE	ach;


									while(TRUE)

									{

										ach = GetChar();

										switch(ach)

										{

											case BSLASH:

											{

												BYTE achNext = GetChar();


												// EOF: goto done; else ignore NULLs

												if(!achNext && _ecParseError == ecUnexpectedEOF)

													goto done;


												if(achNext == 'b' && UngetChar() &&

													TokenGetKeyword() == tokenBinaryData)

												{

													// We've encountered the \binN tag in the RTF we want

													//	to skip.  _iParam contains N from \binN once the

													// 	tag is parsed by TokenGetKeyword()

													SkipBinaryData(_iParam);

												}

												break;

											}


											case LBRACE:

												nDepth++;

												break;


											case RBRACE:

												if (--nDepth <= 0)

													goto done;

												break;


											case 0:

												if(_ecParseError == ecUnexpectedEOF)

													goto done;


											default:

												// Detect Lead bytes here.

												int cTrailBytes = GetTrailBytesCount(ach, _nCodePage);

												if (cTrailBytes)

												{

													for (int i = 0; i < cTrailBytes; i++)

													{

														ach = GetChar();

														if(ach == 0 && _ecParseError == ecUnexpectedEOF)

															goto done;

													}

												}

												break;

										}

									}


									Assert(!_ecParseError);

									_ecParseError = ecUnexpectedEOF;


								done:

									return _ecParseError;

								}


								/*

								 *	CRTFRead::TokenFindKeyword(szKeyword)

								 *

								 *	@mfunc

								 *		Find keyword <p szKeyword> and return its token value

								 *

								 *	@rdesc

								 *		TOKEN			token number of keyword

								 */

								TOKEN CRTFRead::TokenFindKeyword(

									BYTE *	szKeyword)			// @parm Keyword to find

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenFindKeyword");


									INT				iMin;

									INT				iMax;

									INT				iMid;

									INT				nComp;

									BYTE *			pchCandidate;

									BYTE *			pchKeyword;

									const KEYWORD *	pk;


									AssertSz(szKeyword[0],

										"CRTFRead::TokenFindKeyword: null keyword");


								#ifdef RTF_HASHCACHE

									if ( _rtfHashInited )

									{

										// Hash is 23% faster than the following binary search on finds

										//  and 55% faster on misses: For 97 words stored in a 257 cache.

										//  Performance numbers will change when the total stored goes up.

										pk = HashKeyword_Fetch ( (CHAR *) szKeyword );

									}

									else

								#endif

									{

										iMin = 0;

										iMax = cKeywords - 1;

										pk = NULL;

										do				// Note (MS3): Hash would be quicker than binary search

										{

											iMid		 = (iMin + iMax) / 2;

											pchCandidate = (BYTE *)rgKeyword[iMid].szKeyword;

											pchKeyword	 = szKeyword;

											while (!(nComp = REToLower(*pchKeyword) - *pchCandidate)	// Be sure to match

												&& *pchKeyword)											//  terminating 0's

											{

												pchKeyword++;

												pchCandidate++;

											}

											if (nComp < 0)

												iMax = iMid - 1;

											else if (nComp)

												iMin = iMid + 1;

											else

											{

												pk = &rgKeyword[iMid];

												break;

											}

										} while (iMin <= iMax);

									}


									if(pk)

									{

										_token = pk->token;


										// here, we log the RTF keyword scan to aid in tracking RTF tag ocverage

								// TODO: Implement RTF tag logging for the Mac and WinCE

								#if defined(DEBUG) && !defined(MACPORT) && !defined(PEGASUS)

										if(_prtflg)

										{

								#ifdef RTF_HASCACHE

											_prtflg->AddAt(szKeyword);

								#else

											_prtflg->AddAt((size_t)iMid);

								#endif

										}

								#endif

									}

									else

										_token = tokenUnknownKeyword;		// No match: TODO: place to take


									return _token;				 			//  care of unrecognized RTF

								}


								/*

								 *	CRTFRead::TokenGetKeyword()

								 *

								 *	@mfunc

								 *		Collect a keyword and its parameter. Return token's keyword

								 *

								 *	@rdesc

								 *		TOKEN				token number of keyword

								 *

								 *	@comm

								 *		Most RTF control words (keywords) consist of a span of lower-case

								 *		ASCII letters possibly followed by a span of decimal digits. Other

								 *		control words consist of a single character that isn't LC ASCII. No

								 *		control words contain upper-case characters.

								 */

								TOKEN CRTFRead::TokenGetKeyword()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetKeyword");


									BYTE		ach = GetChar();

									BYTE		*pach;

									SHORT		cachKeyword = 1;

									BYTE		szKeyword[cachKeywordMax];


									_szParam[0] = '\0';							// Clear parameter

									_iParam = 0;


									if(!IsAlphaChar(ach))						// Not alpha, i.e.,

									{											//  single char

										if (ach == '\'')						// Most common case needs

										{										//  special treatment

											// Convert hex to char and store result in _token

											if(TokenGetHex() == tokenError)

											{

												_ecParseError = ecUnexpectedChar;

												goto TokenError;

											}

											if((_token == CR || _token == LF) && FInDocTextDest())

											{

												// Add raw CR or LF in the byte stream as a \par

												return tokenEndParagraph;

											}

										}

										else

										{

											// Check for other known symbols

											const BYTE *pachSym = szSymbolKeywords;


											while(ach != *pachSym && *pachSym)

												pachSym++;

											if(*pachSym)						// Found one

											{

												_token = tokenSymbol[pachSym - szSymbolKeywords];

												if(_token > 0x7F)				// Token or larger Unicode

													return _token;				//  value

											}

											else if (!ach)						// No more input chars

												goto TokenError;

											else								// Code for unrecognized RTF

												_token = ach;					// We'll just insert it for now

										}

										_token = TokenGetText((BYTE)_token);

										return _token;

									}


									szKeyword[0] = ach;							// Collect keyword that starts

									pach = szKeyword + 1;						// 	with ASCII

									while (cachKeyword < cachKeywordMax &&

										   IsAlphaChar(ach = GetChar()))

									{

										cachKeyword++;

										*pach++ = ach;

									}


									if (cachKeyword == cachKeywordMax)

									{

										_ecParseError = ecKeywordTooLong;

										goto TokenError;

									}

									*pach = '\0';								// Terminate keyword


									if (IsDigit(ach) || ach == '-')				// Collect parameter

									{

										pach = _szParam;

										*pach++ = ach;

										if(ach != '-')

											_iParam = ach - '0';				// Get parameter value


										while (IsDigit(ach = GetChar()))

										{

											_iParam = _iParam*10 + ach - '0';

											*pach++ = ach;

										}

										*pach = '\0';							// Terminate parameter string

										if (_szParam[0] == '-')

											_iParam = -_iParam;

									}


									if (!_ecParseError &&						// We overshot:

										(ach == ' ' || UngetChar()))			//  if not ' ', unget char

											return TokenFindKeyword(szKeyword);	// Find and return keyword


								TokenError:

									TRACEERRSZSC("TokenGetKeyword()", _ecParseError);

									return _token = tokenError;

								}


								/*

								 *	CRTFRead::TokenGetText(ach)

								 *

								 *	@mfunc

								 *		Collect a string of text starting with the char <p ach> and treat as a

								 *		single token. The string ends when a LBRACE, RBRACE, or single '\\' is found.

								 *

								 *	@devnote

								 *		We peek past the '\\' for \\'xx, which we decode and keep on going;

								 *		else we return in a state where the next character is the '\\'.

								 *

								 *	@rdesc

								 *		TOKEN			Token number of next token (tokenText or tokenError)

								 */

								TOKEN CRTFRead::TokenGetText(

									BYTE ach)				// @parm First char of 8-bit text string

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetText");


									BYTE *	pach = _szText;

									SHORT	cachText = 0;

									LONG	CodePage = _pstateStackTop->nCodePage;

									BOOL	fAllASCII = TRUE;

									int		cTrailBytesNeeded = 0;


									_token = tokenError;						// Default error


									// FUTURE(BradO):  This 'goto' into a while loop is pretty weak.

									//	Restructure this 'while' loop such that the 'goto' is removed.


									// Add character passed into routine

									goto add;


									// If cTrailBytesNeeded is non-zero, we need to get all the trail bytes.  Otherwise,

									// a string end in the middle of a DBC or UTF-8 will cause bad display/print problem

									// - 5 to allow extra space for up to 4 bytes for UTF-8 and Null char

									while (cachText < cachTextMax - 5 || cTrailBytesNeeded)

									{

										ach = GetChar();

										switch (ach)

										{

											case BSLASH:

											{

												// FUTURE(BradO):  This code looks ALOT like TokenGetKeyword.

												//	We should combine the two into a common routine.


												BYTE achNext;


												// Get char after BSLASH

												achNext = GetChar();

												if(!achNext)

													goto error;


												if(achNext == '\'')					// Handle most frequent

												{									//  case here

													if(TokenGetHex() == tokenError)

													{

														if(cTrailBytesNeeded)

														{

															// The trail-byte must be a raw BSLASH.

															// Unget the single-quote.


															if(!UngetChar())

																goto error;

															// fall through to add BSLASH

														}

														else

														{

															_ecParseError = ecUnexpectedChar;

															goto error;

														}

													}

													else

													{

														ach = (BYTE)_token;

														if (cTrailBytesNeeded == 0 && (ach == CR || ach == LF) &&

															FInDocTextDest())

														{

															// Here, we have a raw CR or LF in document text.

															// Unget the whole lot of characters and bail out.

															// TokenGetKeyword will convert this CR or LF into

															// a \par.


															if(!UngetChar(4))

																goto error;

															goto done;

														}

													}

													goto add;

												}


												// Check next byte against list of RTF symbol

												// NOTE:- we need to check for RTF symbol even if we

												// are expecting a trail byte.  According to the rtf spec,

												// we cannot just take this backslash as trail byte.

												// HWC 9/97


												const BYTE *pachSymbol = szSymbolKeywords;

												while(achNext != *pachSymbol && *pachSymbol)

													pachSymbol++;


												TOKEN tokenTmp;


												if (*pachSymbol &&

													(tokenTmp = tokenSymbol[pachSymbol - szSymbolKeywords])

														 <= 0x7F)

												{

													ach = (BYTE)tokenTmp;

													goto add;

												}


												// In either of the last two cases below, we will want

												// to unget the byte following the BSLASH

												if(!UngetChar())

													goto error;


												if(cTrailBytesNeeded && !IsAlphaChar(achNext))

												{

													// In this situation, either this BSLASH begins the next

													// RTF keyword or it is a raw BSLASH which is the trail

													// byte for a DBCS character.


													// I think a fair assumption here is that if an alphanum

													// follows the BSLASH, that the BSLASH begins the next

													// RTF keyword.


													// add the raw BSLASH

													goto add;

												}


												// Here, my guess is that the BSLASH begins the next RTF

												// keyword, so unget the BSLASH

											    if(!UngetChar())

													goto error;


												goto done;

											}


											case LBRACE:						// End of text string

											case RBRACE:

												if(cTrailBytesNeeded)

												{

													// Previous char was a lead-byte of a DBCS pair or UTF-8, which

													// makes this char a raw trail-byte.

													goto add;

												}


												if(!UngetChar())				// Unget delimeter

													goto error;

												goto done;


											case LF:							// Throw away noise chars

											case CR:

												break;


											case 0:

												if(_ecParseError == ecUnexpectedEOF)

													goto done;

												ach = ' ';						// Replace NULL by blank


											default:							// Collect chars

								add:

												// Outstanding chars to be skipped after \uN tag

												if(_cbSkipForUnicode)

												{

													_cbSkipForUnicode--;

													continue;

												}


												*pach++ = ach;

												++cachText;

												if(ach > 0x7F)

													fAllASCII = FALSE;


												// Check if we are expecting more trail bytes

												if (cTrailBytesNeeded)

													cTrailBytesNeeded--;

												else

													cTrailBytesNeeded = GetTrailBytesCount(ach, CodePage);

												Assert(cTrailBytesNeeded >= 0);

										}

									}


								done:

									_token = (WORD)(fAllASCII ? tokenASCIIText : tokenText);

									*pach = '\0';								// Terminate token string


								error:

									return _token;

								}


								/*

								 *	CRTFRead::TokenGetToken()

								 *

								 *	@mfunc

								 *		This function reads in next token from input stream

								 *

								 *	@rdesc

								 *		TOKEN				token number of next token

								 */

								TOKEN CRTFRead::TokenGetToken()

								{

									TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetToken");


									BYTE		ach;


									_tokenLast	= _token;					// Used by \* destinations and FE

									_token = tokenEOF;						// Default end-of-file


								SkipNoise:

									ach = GetChar();

									switch (ach)

									{

									case CR:

									case LF:

										goto SkipNoise;


									case LBRACE:

										_token = tokenStartGroup;

										break;


									case RBRACE:

										_token = tokenEndGroup;

										break;


									case BSLASH:

										_token = TokenGetKeyword();

										break;


									case 0:

										if(_ecParseError == ecUnexpectedEOF)

											break;

										ach = ' ';							// Replace NULL by blank

																			// Fall thru to default

									default:

										if( !_pstateStackTop )

										{

											TRACEWARNSZ("Unexpected token in rtf file");

											Assert(_token == tokenEOF);

											if (_ped->Get10Mode())

												_ecParseError = ecUnexpectedToken;	// Signal bad file

										}

										else if (_pstateStackTop->sDest == destObjectData ||

												 _pstateStackTop->sDest == destPicture )

										// not text but data

										{

											_token = (WORD)(tokenObjectDataValue + _pstateStackTop->sDest

															- destObjectData);

											UngetChar();

										}

										else

											_token = TokenGetText(ach);

									}

									return _token;

								}


								/*

								 *	CRTFRead::FInDocTextDest()

								 *

								 *	@mfunc

								 *		Returns a BOOL indicating if the current destination is one in which

								 *		we would encounter document text.

								 *

								 *	@rdesc

								 *		BOOL	indicates the current destination may contain document text.

								 */

								BOOL CRTFRead::FInDocTextDest() const

								{

									switch(_pstateStackTop->sDest)

									{

										case destRTF:

										case destField:

										case destFieldResult:

										case destFieldInstruction:

										case destParaNumbering:

										case destParaNumText:

										case destNULL:

											return TRUE;


										case destFontTable:

										case destRealFontName:

										case destObjectClass:

										case destObjectName:

										case destFollowingPunct:

										case destLeadingPunct:

										case destColorTable:

										case destBinary:

										case destObject:

										case destObjectData:

										case destPicture:

										case destDocumentArea:

											return FALSE;


										default:

											AssertSz(0, "CRTFRead::FInDocTextDest():  New destination "

															"encountered - update enum in _rtfread.h");

											return TRUE;

									}

								}