|
|
// MLLBCons.cpp : Implementation of CMLLBCons
#include "private.h"
#include "mllbcons.h"
#ifdef ASTRIMPL
#include "mlswalk.h"
#endif
#include "mlstrbuf.h"
/////////////////////////////////////////////////////////////////////////////
// Line Break Character Table
const WCHAR awchNonBreakingAtLineEnd[] = { 0x0028, // LEFT PARENTHESIS
0x005B, // LEFT SQUARE BRACKET
0x007B, // LEFT CURLY BRACKET
0x00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x2018, // LEFT SINGLE QUOTATION MARK
0x201C, // LEFT DOUBLE QUOTATION MARK
0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x2045, // LEFT SQUARE BRACKET WITH QUILL
0x207D, // SUPERSCRIPT LEFT PARENTHESIS
0x208D, // SUBSCRIPT LEFT PARENTHESIS
0x226A, // MUCH LESS THAN
0x3008, // LEFT ANGLE BRACKET
0x300A, // LEFT DOUBLE ANGLE BRACKET
0x300C, // LEFT CORNER BRACKET
0x300E, // LEFT WHITE CORNER BRACKET
0x3010, // LEFT BLACK LENTICULAR BRACKET
0x3014, // LEFT TORTOISE SHELL BRACKET
0x3016, // LEFT WHITE LENTICULAR BRACKET
0x3018, // LEFT WHITE TORTOISE SHELL BRACKET
0x301A, // LEFT WHITE SQUARE BRACKET
0x301D, // REVERSED DOUBLE PRIME QUOTATION MARK
0xFD3E, // ORNATE LEFT PARENTHESIS
0xFE35, // PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
0xFE37, // PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
0xFE39, // PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
0xFE3B, // PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
0xFE3D, // PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
0xFE3F, // PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
0xFE41, // PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
0xFE43, // PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
0xFE59, // SMALL LEFT PARENTHESIS
0xFE5B, // SMALL LEFT CURLY BRACKET
0xFE5D, // SMALL LEFT TORTOISE SHELL BRACKET
0xFF08, // FULLWIDTH LEFT PARENTHESIS
0xFF1C, // FULLWIDTH LESS-THAN SIGN
0xFF3B, // FULLWIDTH LEFT SQUARE BRACKET
0xFF5B, // FULLWIDTH LEFT CURLY BRACKET
0xFF62, // HALFWIDTH LEFT CORNER BRACKET
0xFFE9 // HALFWIDTH LEFTWARDS ARROW
};
const WCHAR awchNonBreakingAtLineStart[] = { 0x0029, // RIGHT PARENTHESIS
0x002D, // HYPHEN
0x005D, // RIGHT SQUARE BRACKET
0x007D, // RIGHT CURLY BRACKET
0x00AD, // OPTIONAL HYPHEN
0x00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x02C7, // CARON
0x02C9, // MODIFIER LETTER MACRON
0x055D, // ARMENIAN COMMA
0x060C, // ARABIC COMMA
0x2013, // EN DASH
0x2014, // EM DASH
0x2016, // DOUBLE VERTICAL LINE
0x201D, // RIGHT DOUBLE QUOTATION MARK
0x2022, // BULLET
0x2025, // TWO DOT LEADER
0x2026, // HORIZONTAL ELLIPSIS
0x2027, // HYPHENATION POINT
0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x2046, // RIGHT SQUARE BRACKET WITH QUILL
0x207E, // SUPERSCRIPT RIGHT PARENTHESIS
0x208E, // SUBSCRIPT RIGHT PARENTHESIS
0x226B, // MUCH GREATER THAN
0x2574, // BOX DRAWINGS LIGHT LEFT
0x3001, // IDEOGRAPHIC COMMA
0x3002, // IDEOGRAPHIC FULL STOP
0x3003, // DITTO MARK
0x3005, // IDEOGRAPHIC ITERATION MARK
0x3009, // RIGHT ANGLE BRACKET
0x300B, // RIGHT DOUBLE ANGLE BRACKET
0x300D, // RIGHT CORNER BRACKET
0x300F, // RIGHT WHITE CORNER BRACKET
0x3011, // RIGHT BLACK LENTICULAR BRACKET
0x3015, // RIGHT TORTOISE SHELL BRACKET
0x3017, // RIGHT WHITE LENTICULAR BRACKET
0x3019, // RIGHT WHITE TORTOISE SHELL BRACKET
0x301B, // RIGHT WHITE SQUARE BRACKET
0x301E, // DOUBLE PRIME QUOTATION MARK
0x3041, // HIRAGANA LETTER SMALL A
0x3043, // HIRAGANA LETTER SMALL I
0x3045, // HIRAGANA LETTER SMALL U
0x3047, // HIRAGANA LETTER SMALL E
0x3049, // HIRAGANA LETTER SMALL O
0x3063, // HIRAGANA LETTER SMALL TU
0x3083, // HIRAGANA LETTER SMALL YA
0x3085, // HIRAGANA LETTER SMALL YU
0x3087, // HIRAGANA LETTER SMALL YO
0x308E, // HIRAGANA LETTER SMALL WA
0x309B, // KATAKANA-HIRAGANA VOICED SOUND MARK
0x309C, // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
0x309D, // HIRAGANA ITERATION MARK
0x309E, // HIRAGANA VOICED ITERATION MARK
0x30A1, // KATAKANA LETTER SMALL A
0x30A3, // KATAKANA LETTER SMALL I
0x30A5, // KATAKANA LETTER SMALL U
0x30A7, // KATAKANA LETTER SMALL E
0x30A9, // KATAKANA LETTER SMALL O
0x30C3, // KATAKANA LETTER SMALL TU
0x30E3, // KATAKANA LETTER SMALL YA
0x30E5, // KATAKANA LETTER SMALL YU
0x30E7, // KATAKANA LETTER SMALL YO
0x30EE, // KATAKANA LETTER SMALL WA
0x30F5, // KATAKANA LETTER SMALL KA
0x30F6, // KATAKANA LETTER SMALL KE
0x30FC, // KATAKANA-HIRAGANA PROLONGED SOUND MARK
0x30FD, // KATAKANA ITERATION MARK
0x30FE, // KATAKANA VOICED ITERATION MARK
0xFD3F, // ORNATE RIGHT PARENTHESIS
0xFE30, // VERTICAL TWO DOT LEADER
0xFE31, // VERTICAL EM DASH
0xFE33, // VERTICAL LOW LINE
0xFE34, // VERTICAL WAVY LOW LINE
0xFE36, // PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
0xFE38, // PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
0xFE3A, // PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
0xFE3C, // PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
0xFE3E, // PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
0xFE40, // PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
0xFE42, // PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
0xFE44, // PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
0xFE4F, // WAVY LOW LINE
0xFE50, // SMALL COMMA
0xFE51, // SMALL IDEOGRAPHIC COMMA
0xFE5A, // SMALL RIGHT PARENTHESIS
0xFE5C, // SMALL RIGHT CURLY BRACKET
0xFE5E, // SMALL RIGHT TORTOISE SHELL BRACKET
0xFF09, // FULLWIDTH RIGHT PARENTHESIS
0xFF0C, // FULLWIDTH COMMA
0xFF0E, // FULLWIDTH FULL STOP
0xFF1E, // FULLWIDTH GREATER-THAN SIGN
0xFF3D, // FULLWIDTH RIGHT SQUARE BRACKET
0xFF40, // FULLWIDTH GRAVE ACCENT
0xFF5C, // FULLWIDTH VERTICAL LINE
0xFF5D, // FULLWIDTH RIGHT CURLY BRACKET
0xFF5E, // FULLWIDTH TILDE
0xFF61, // HALFWIDTH IDEOGRAPHIC FULL STOP
0xFF63, // HALFWIDTH RIGHT CORNER BRACKET
0xFF64, // HALFWIDTH IDEOGRAPHIC COMMA
0xFF67, // HALFWIDTH KATAKANA LETTER SMALL A
0xFF68, // HALFWIDTH KATAKANA LETTER SMALL I
0xFF69, // HALFWIDTH KATAKANA LETTER SMALL U
0xFF6A, // HALFWIDTH KATAKANA LETTER SMALL E
0xFF6B, // HALFWIDTH KATAKANA LETTER SMALL O
0xFF6C, // HALFWIDTH KATAKANA LETTER SMALL YA
0xFF6D, // HALFWIDTH KATAKANA LETTER SMALL YU
0xFF6E, // HALFWIDTH KATAKANA LETTER SMALL YO
0xFF6F, // HALFWIDTH KATAKANA LETTER SMALL TU
0xFF70, // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
0xFF9E, // HALFWIDTH KATAKANA VOICED SOUND MARK
0xFF9F, // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
0xFFEB // HALFWIDTH RIGHTWARDS ARROW
};
const WCHAR awchRomanInterWordSpace[] = { 0x0009, // TAB
0x0020, // SPACE
0x2002, // EN SPACE
0x2003, // EM SPACE
0x2004, // THREE-PER-EM SPACE
0x2005, // FOUR-PER-EM SPACE
0x2006, // SIX-PER-EM SPACE
0x2007, // FIGURE SPACE
0x2008, // PUNCTUATION SPACE
0x2009, // THIN SPACE
0x200A, // HAIR SPACE
0x200B // ZERO WIDTH SPACE
};
BOOL ScanWChar(const WCHAR awch[], int nArraySize, WCHAR wch) { int iMin = 0; int iMax = nArraySize - 1;
while (iMax - iMin >= 2) { int iTry = (iMax + iMin + 1) / 2; if (wch < awch[iTry]) iMax = iTry; else if (wch > awch[iTry]) iMin = iTry; else return TRUE; }
return (wch == awch[iMin] || wch == awch[iMax]); }
#ifdef MLLBCONS_DEBUG
void TestTable(const WCHAR awch[], int nArraySize) { int nDummy;
for (int i = 0; i < nArraySize - 1; i++) { if (awch[i] >= awch[i + 1]) nDummy = 0; }
int cFound = 0; for (int n = 0; n < 0x10000; n++) { if (ScanWChar(awch, nArraySize, n)) { cFound++; for (i = 0; i < nArraySize; i++) { if (awch[i] == n) break; } ASSERT(i < nArraySize); } } ASSERT(cFound == nArraySize); } #endif
/////////////////////////////////////////////////////////////////////////////
// CMLLBCons
STDMETHODIMP CMLLBCons::BreakLineML(IMLangString* pSrcMLStr, long lSrcPos, long lSrcLen, long cMinColumns, long cMaxColumns, long* plLineLen, long* plSkipLen) { #ifdef MLLBCONS_DEBUG
TestTable(awchNonBreakingAtLineEnd, ARRAYSIZE(awchNonBreakingAtLineEnd)); TestTable(awchNonBreakingAtLineStart, ARRAYSIZE(awchNonBreakingAtLineStart)); TestTable(awchRomanInterWordSpace, ARRAYSIZE(awchRomanInterWordSpace)); #endif
ASSERT_THIS; ASSERT_READ_PTR(pSrcMLStr); ASSERT_WRITE_PTR_OR_NULL(plLineLen); ASSERT_WRITE_PTR_OR_NULL(plSkipLen);
HRESULT hr; IMLangStringWStr* pMLStrWStr; long lStrLen; long lBreakPos = -1; // Break at default position(cMaxColumns)
long lSkipLen = 0; long lPrevBreakPos = 0; long lPrevSkipLen = 0;
if (SUCCEEDED(hr = pSrcMLStr->QueryInterface(IID_IMLangStringWStr, (void**)&pMLStrWStr)) && SUCCEEDED(hr = pSrcMLStr->GetLength(&lStrLen)) && SUCCEEDED(hr = ::RegularizePosLen(lStrLen, &lSrcPos, &lSrcLen))) { long cColumns = 0; #ifndef ASTRIMPL
long lSrcPosTemp = lSrcPos; long lSrcLenTemp = lSrcLen; #endif
long lCandPos = 0; struct { unsigned fDone : 1; unsigned fInSpaces : 1; unsigned fFEChar : 1; unsigned fInFEChar : 1; unsigned fBreakByEndOfLine : 1; unsigned fNonBreakNext : 1; unsigned fHaveCandPos : 1; unsigned fSlashR : 1; } Flags = {0, 0, 0, 0, 0, 0, 0, 0}; #ifdef ASTRIMPL
CCharType<CT_CTYPE3, 128> ct3; CMLStrWalkW StrWalk(pMLStrWStr, lSrcPos, lSrcLen); #else
LCID locale; hr = pMLStrWStr->GetLocale(0, -1, &locale, NULL, NULL); CCharType<CT_CTYPE3, 128> ct3(locale); #endif
#ifdef ASTRIMPL
while (StrWalk.Lock(hr)) { ct3.Flush();
for (int iCh = 0; iCh < StrWalk.GetCCh(); iCh++) { const WCHAR wch = StrWalk.GetStr()[iCh]; const WORD wCharType3 = ct3.GetCharType(pSrcMLStr, StrWalk.GetPos() + iCh, StrWalk.GetLen() - iCh, &hr); if (FAILED(hr)) break; #else
while (lSrcLenTemp > 0 && SUCCEEDED(hr)) { WCHAR* pszBuf; long cchBuf; long lLockedLen;
ct3.Flush();
if (SUCCEEDED(hr = pMLStrWStr->LockWStr(lSrcPosTemp, lSrcLenTemp, MLSTR_READ, 0, &pszBuf, &cchBuf, &lLockedLen))) { for (int iCh = 0; iCh < cchBuf; iCh++) { const WCHAR wch = pszBuf[iCh]; const WORD wCharType3 = ct3.GetCharType(pszBuf + iCh, cchBuf - iCh); #endif
const int nWidth = (wCharType3 & C3_HALFWIDTH) ? 1 : 2;
if (wch == L'\r' && !Flags.fSlashR) { Flags.fSlashR = TRUE; } else if (wch == L'\n' || Flags.fSlashR) // End of line
{ Flags.fDone = TRUE; Flags.fBreakByEndOfLine = TRUE; if (Flags.fInSpaces) { Flags.fHaveCandPos = FALSE; lBreakPos = lCandPos; lSkipLen++; // Skip spaces and line break character
} else { #ifdef ASTRIMPL
lBreakPos = StrWalk.GetPos() + iCh; // Break at right before the end of line
#else
lBreakPos = lSrcPosTemp + iCh; // Break at right before the end of line
#endif
if (Flags.fSlashR) lBreakPos--;
lSkipLen = 1; // Skip line break character
} if (wch == L'\n' && Flags.fSlashR) lSkipLen++; break; } else if (ScanWChar(awchRomanInterWordSpace, ARRAYSIZE(awchRomanInterWordSpace), wch)) // Spaces
{ if (!Flags.fInSpaces) { Flags.fHaveCandPos = TRUE; #ifdef ASTRIMPL
lCandPos = StrWalk.GetPos() + iCh; // Break at right before the spaces
#else
lCandPos = lSrcPosTemp + iCh; // Break at right before the spaces
#endif
lSkipLen = 0; } Flags.fInSpaces = TRUE; lSkipLen++; // Skip continuous spaces after breaking
} else // Other characters
{ Flags.fFEChar = ((wCharType3 & (C3_KATAKANA | C3_HIRAGANA | C3_FULLWIDTH | C3_IDEOGRAPH)) != 0);
if ((Flags.fFEChar || Flags.fInFEChar) && !Flags.fNonBreakNext && !Flags.fInSpaces) { Flags.fHaveCandPos = TRUE; #ifdef ASTRIMPL
lCandPos = StrWalk.GetPos() + iCh; // Break at right before or after the FE char
#else
lCandPos = lSrcPosTemp + iCh; // Break at right before or after the FE char
#endif
lSkipLen = 0; } Flags.fInFEChar = Flags.fFEChar; Flags.fInSpaces = FALSE;
if (Flags.fHaveCandPos) { Flags.fHaveCandPos = FALSE; if (!ScanWChar(awchNonBreakingAtLineStart, ARRAYSIZE(awchNonBreakingAtLineStart), wch)) lBreakPos = lCandPos; }
if (cColumns + nWidth > cMaxColumns) { Flags.fDone = TRUE; if (Flags.fNonBreakNext && lPrevSkipLen) { lBreakPos = lPrevBreakPos; lSkipLen = lPrevSkipLen; } break; }
Flags.fNonBreakNext = ScanWChar(awchNonBreakingAtLineEnd, ARRAYSIZE(awchNonBreakingAtLineEnd), wch); if (Flags.fNonBreakNext) { // Need to remember previous break postion in case the line been terminated by the max columns
lPrevBreakPos = lBreakPos; lPrevSkipLen = lSkipLen; } }
cColumns += nWidth; } #ifdef ASTRIMPL
StrWalk.Unlock(hr);
if (Flags.fDone && SUCCEEDED(hr)) break; #else
HRESULT hrTemp = pMLStrWStr->UnlockWStr(pszBuf, 0, NULL, NULL); if (FAILED(hrTemp) && SUCCEEDED(hr)) hr = hrTemp;
if (Flags.fDone && SUCCEEDED(hr)) break;
lSrcPosTemp += lLockedLen; lSrcLenTemp -= lLockedLen; } #endif
}
pMLStrWStr->Release();
if (Flags.fHaveCandPos) lBreakPos = lCandPos;
if (SUCCEEDED(hr) && !Flags.fBreakByEndOfLine && lBreakPos - lSrcPos < cMinColumns) { lBreakPos = min(lSrcLen, cMaxColumns) + lSrcPos; // Default breaking
lSkipLen = 0; }
if (SUCCEEDED(hr) && !Flags.fDone) { if (Flags.fInSpaces) { lBreakPos = lSrcLen - lSkipLen; } else { lBreakPos = lSrcLen; lSkipLen = 0; } if (Flags.fSlashR) { lBreakPos--; lSkipLen++; } } }
if (SUCCEEDED(hr)) { if (plLineLen) *plLineLen = lBreakPos - lSrcPos; if (plSkipLen) *plSkipLen = lSkipLen; } else { if (plLineLen) *plLineLen = 0; if (plSkipLen) *plSkipLen = 0; }
return hr; }
STDMETHODIMP CMLLBCons::BreakLineW(LCID locale, const WCHAR* pszSrc, long cchSrc, long lMaxColumns, long* pcchLine, long* pcchSkip) { ASSERT_THIS; ASSERT_READ_BLOCK(pszSrc, cchSrc); ASSERT_WRITE_PTR_OR_NULL(pcchLine); ASSERT_WRITE_PTR_OR_NULL(pcchSkip);
HRESULT hr = S_OK; IMLangStringWStr* pMLStrW;
if (SUCCEEDED(hr = PrepareMLStrClass()) && SUCCEEDED(hr = m_pMLStrClass->CreateInstance(NULL, IID_IMLangStringWStr, (void**)&pMLStrW))) { CMLStrBufConstStackW StrBuf((LPWSTR)pszSrc, cchSrc); long lLineLen; long lSkipLen; hr = pMLStrW->SetStrBufW(0, -1, &StrBuf, NULL, NULL);
if (SUCCEEDED(hr)) hr = pMLStrW->SetLocale(0, -1, locale);
if (SUCCEEDED(hr)) hr = BreakLineML(pMLStrW, 0, -1, 0, lMaxColumns, (pcchLine || pcchSkip) ? &lLineLen : NULL, (pcchSkip) ? &lSkipLen : NULL);
if (SUCCEEDED(hr) && pcchLine) hr = pMLStrW->GetWStr(0, lLineLen, NULL, 0, pcchLine, NULL);
if (SUCCEEDED(hr) && pcchSkip) hr = pMLStrW->GetWStr(lLineLen, lSkipLen, NULL, 0, pcchSkip, NULL);
pMLStrW->Release(); }
if (FAILED(hr)) { if (pcchLine) *pcchLine = 0; if (pcchSkip) *pcchSkip = 0; }
return hr; }
STDMETHODIMP CMLLBCons::BreakLineA(LCID locale, UINT uCodePage, const CHAR* pszSrc, long cchSrc, long lMaxColumns, long* pcchLine, long* pcchSkip) { ASSERT_THIS; ASSERT_READ_BLOCK(pszSrc, cchSrc); ASSERT_WRITE_PTR_OR_NULL(pcchLine); ASSERT_WRITE_PTR_OR_NULL(pcchSkip);
HRESULT hr = S_OK; IMLangStringAStr* pMLStrA;
if (uCodePage == 50000) uCodePage = 1252;
if (SUCCEEDED(hr = PrepareMLStrClass()) && SUCCEEDED(hr = m_pMLStrClass->CreateInstance(NULL, IID_IMLangStringAStr, (void**)&pMLStrA))) { CMLStrBufConstStackA StrBuf((LPSTR)pszSrc, cchSrc); long lLineLen; long lSkipLen; hr = pMLStrA->SetStrBufA(0, -1, uCodePage, &StrBuf, NULL, NULL);
if (SUCCEEDED(hr)) hr = pMLStrA->SetLocale(0, -1, locale);
if (SUCCEEDED(hr)) hr = BreakLineML(pMLStrA, 0, -1, 0, lMaxColumns, (pcchLine || pcchSkip) ? &lLineLen : NULL, (pcchSkip) ? &lSkipLen : NULL);
if (SUCCEEDED(hr) && pcchLine) hr = pMLStrA->GetAStr(0, lLineLen, uCodePage, NULL, NULL, 0, pcchLine, NULL);
if (SUCCEEDED(hr) && pcchSkip) hr = pMLStrA->GetAStr(lLineLen, lSkipLen, uCodePage, NULL, NULL, 0, pcchSkip, NULL);
pMLStrA->Release(); }
if (FAILED(hr)) { if (pcchLine) *pcchLine = 0; if (pcchSkip) *pcchSkip = 0; }
return hr; }
|