Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2913 lines
74 KiB

  1. /*
  2. * @doc INTERNAL
  3. *
  4. * @module TEXT.C -- CTxtPtr implementation |
  5. *
  6. * Authors: <nl>
  7. * Original RichEdit code: David R. Fulmer <nl>
  8. * Christian Fortini <nl>
  9. * Murray Sargent <nl>
  10. *
  11. * History: <nl>
  12. * 6/25/95 alexgo cleanup and reorganization (use run pointers now)
  13. *
  14. * Copyright (c) 1995-2000, Microsoft Corporation. All rights reserved.
  15. */
  16. #include "_common.h"
  17. #include "_text.h"
  18. #include "_edit.h"
  19. #include "_antievt.h"
  20. #include "_clasfyc.h"
  21. #include "_txtbrk.h"
  22. ASSERTDATA
  23. //-----------------------------Internal functions--------------------------------
  24. // Text Block management
  25. static void TxDivideInsertion(LONG cch, LONG ichBlock, LONG cchAfter,
  26. LONG *pcchFirst, LONG *pcchLast);
  27. /*
  28. * IsWhiteSpace(ch)
  29. *
  30. * @func
  31. * Used to determine if ch is an EOP char (see IsEOP() for definition),
  32. * TAB or blank. This function is used in identifying sentence start
  33. * and end.
  34. *
  35. * @rdesc
  36. * TRUE if ch is whitespace
  37. */
  38. BOOL IsWhiteSpace(unsigned ch)
  39. {
  40. return ch == ' ' || IN_RANGE(CELL, ch, CR) || (ch | 1) == PS;
  41. }
  42. /*
  43. * IsSentenceTerminator(ch)
  44. *
  45. * @func
  46. * Used to determine if ch is a standard sentence terminator character,
  47. * namely, '?', '.', or '!'
  48. *
  49. * @rdesc
  50. * TRUE if ch is a question mark, period, or exclamation point.
  51. */
  52. BOOL IsSentenceTerminator(unsigned ch)
  53. {
  54. return ch == '?' || ch == '.' || ch == '!'; // Std sentence delimiters
  55. }
  56. // =========================== Invariant stuff ==================================================
  57. #define DEBUG_CLASSNAME CTxtPtr
  58. #include "_invar.h"
  59. // =============================== CTxtPtr ======================================================
  60. #ifdef DEBUG
  61. /*
  62. * CTxtPtr::Invariant
  63. *
  64. * @mfunc invariant check
  65. */
  66. BOOL CTxtPtr::Invariant() const
  67. {
  68. static LONG numTests = 0;
  69. numTests++; // Counts how many times we've been called
  70. // Make sure _cp is within range
  71. Assert(_cp >= 0);
  72. Update_pchCp();
  73. CRunPtrBase::Invariant();
  74. if(IsValid())
  75. {
  76. // We use less than or equals here so that we can be an insertion
  77. // point at the *end* of the currently existing text.
  78. Assert(_cp <= GetTextLength());
  79. // Make sure all the blocks are consistent...
  80. Assert(GetTextLength() == ((CTxtArray *)_pRuns)->Invariant());
  81. Assert(_cp == CRunPtrBase::CalculateCp());
  82. }
  83. else
  84. {
  85. Assert(_ich == 0);
  86. }
  87. return TRUE;
  88. }
  89. /*
  90. * CTxtPtr::Update_pchCp ()
  91. *
  92. * @mfunc
  93. * Define _pchCp to be ptr to text at _cp
  94. */
  95. void CTxtPtr::Update_pchCp() const
  96. {
  97. LONG cchValid;
  98. *(LONG_PTR *)&_pchCp = (LONG_PTR)GetPch(cchValid);
  99. if(!cchValid)
  100. *(LONG_PTR *)&_pchCp = (LONG_PTR)GetPchReverse(cchValid);
  101. }
  102. /*
  103. * CTxtPtr::MoveGapToEndOfBlock ()
  104. *
  105. * @mfunc
  106. * Function to move buffer gap to current block end to aid in debugging
  107. */
  108. void CTxtPtr::MoveGapToEndOfBlock () const
  109. {
  110. CTxtBlk *ptb = GetRun(0);
  111. ptb->MoveGap(ptb->_cch); // Move gaps to end of cur block
  112. Update_pchCp();
  113. }
  114. #endif // DEBUG
  115. /*
  116. * CTxtPtr::CTxtPtr(ped, cp)
  117. *
  118. * @mfunc constructor
  119. */
  120. CTxtPtr::CTxtPtr (
  121. CTxtEdit *ped, //@parm Ptr to CTxtEdit instance
  122. LONG cp) //@parm cp to set the pointer to
  123. {
  124. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::CTxtPtr");
  125. _ped = ped;
  126. _cp = 0;
  127. SetRunArray((CRunArray *) &ped->GetTxtStory()->_TxtArray);
  128. if(IsValid())
  129. _cp = BindToCp(cp);
  130. }
  131. /*
  132. * CTxtPtr::CTxtPtr(&tp)
  133. *
  134. * @mfunc Copy Constructor
  135. */
  136. CTxtPtr::CTxtPtr (
  137. const CTxtPtr &tp)
  138. {
  139. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::CTxtPtr");
  140. // copy all the values over
  141. *this = tp;
  142. }
  143. /*
  144. * CTxtPtr::GetTextLength()
  145. *
  146. * @mfunc
  147. * Return count of characters in the story pointed to by this
  148. * text ptr. Includes the story's final CR in the count
  149. *
  150. * @rdesc
  151. * cch for the story pointed to by this text ptr
  152. *
  153. * @devnote
  154. * This method returns 0 if the text ptr is a zombie, a state
  155. * identified by _ped = NULL.
  156. */
  157. LONG CTxtPtr::GetTextLength() const
  158. {
  159. return _ped ? ((CTxtArray *)_pRuns)->_cchText : 0;
  160. }
  161. /*
  162. * CTxtPtr::GetChar()
  163. *
  164. * @mfunc
  165. * Return character at this text pointer, NULL if text pointer is at
  166. * end of text
  167. *
  168. * @rdesc
  169. * Character at this text ptr
  170. */
  171. WCHAR CTxtPtr::GetChar()
  172. {
  173. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetChar");
  174. LONG cchValid;
  175. const WCHAR *pch = GetPch(cchValid);
  176. return pch ? *pch : 0;
  177. }
  178. /*
  179. * CTxtPtr::GetPrevChar()
  180. *
  181. * @mfunc
  182. * Return character just before this text pointer, NULL if text pointer
  183. * beginning of text
  184. *
  185. * @rdesc
  186. * Character just before this text ptr
  187. */
  188. WCHAR CTxtPtr::GetPrevChar()
  189. {
  190. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPrevChar");
  191. LONG cchValid;
  192. const WCHAR *pch = GetPchReverse(cchValid);
  193. return pch ? *(pch - 1) : 0;
  194. }
  195. /*
  196. * CTxtPtr::GetPch(&cchValid)
  197. *
  198. * @mfunc
  199. * return a character pointer to the text at this text pointer
  200. *
  201. * @rdesc
  202. * a pointer to an array of characters. May be NULL. If non-null,
  203. * then cchValid is guaranteed to be at least 1
  204. */
  205. const WCHAR * CTxtPtr::GetPch(
  206. LONG & cchValid) const //@parm Count of chars for which ptr is valid
  207. {
  208. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPch");
  209. // returned pointer is valid
  210. LONG ich = _ich;
  211. WCHAR * pchBase;
  212. CTxtBlk * ptb = IsValid() ? GetRun(0) : NULL;
  213. cchValid = 0; // Default nothing valid
  214. if(!ptb)
  215. return NULL;
  216. // If we're at the edge of a run, grab the next run or
  217. // stay at the current run.
  218. if(_ich == ptb->_cch)
  219. {
  220. if(_iRun < Count() - 1)
  221. {
  222. // Set us to the next text block
  223. ptb = GetRun(1);
  224. ich = 0;
  225. }
  226. else // At very end of text:
  227. return NULL; // just return NULL
  228. }
  229. AssertSz(CbOfCch(ich) <= ptb->_cbBlock,
  230. "CTxtPtr::GetPch(): _ich bigger than block");
  231. pchBase = ptb->_pch + ich;
  232. // Check to see if we need to skip over gap. Recall that
  233. // the gap may come anywhere in the middle of a block,
  234. // so if the current ich (note, no underscore, we want
  235. // the active ich) is beyond the gap, then recompute pchBase
  236. // by adding in the size of the block.
  237. //
  238. // cchValid will then be the number of characters left in
  239. // the text block (or _cch - ich)
  240. if(CbOfCch(ich) >= ptb->_ibGap)
  241. {
  242. pchBase += CchOfCb(ptb->_cbBlock) - ptb->_cch;
  243. cchValid = ptb->_cch - ich;
  244. }
  245. else
  246. {
  247. // We're valid until the buffer gap (or see below).
  248. cchValid = CchOfCb(ptb->_ibGap) - ich;
  249. }
  250. AssertSz(cchValid > 0 && GetCp() + cchValid <= GetTextLength(),
  251. "CTxtPtr::GetPch: illegal cchValid");
  252. return pchBase;
  253. }
  254. /*
  255. * CTxtPtr::GetPchReverse(&cchValidReverse, pcchValid)
  256. *
  257. * @mfunc
  258. * return a character pointer to the text at this text pointer
  259. * adjusted so that there are some characters valid *behind* the
  260. * pointer.
  261. *
  262. * @rdesc
  263. * a pointer to an array of characters. May be NULL. If non-null,
  264. * then cchValidReverse is guaranteed to be at least 1
  265. */
  266. const WCHAR * CTxtPtr::GetPchReverse(
  267. LONG & cchValidReverse, //@parm length for reverse
  268. LONG * pcchValid) const //@parm length forward
  269. {
  270. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPchReverse");
  271. LONG cchTemp;
  272. LONG ich = _ich;
  273. WCHAR * pchBase;
  274. CTxtBlk * ptb = IsValid() ? GetRun(0) : NULL;
  275. cchValidReverse = 0; // Default no valid chars in run
  276. if(!ptb)
  277. return NULL;
  278. // If we're at the edge of a run, grab the previous run or
  279. // stay at the current run.
  280. if(!_ich)
  281. {
  282. if(_iRun)
  283. {
  284. ptb = GetRun(-1); // Go to next text block
  285. ich = ptb->_cch;
  286. }
  287. else // At start of text:
  288. return NULL; // just return NULL
  289. }
  290. AssertSz(CbOfCch(ich) <= ptb->_cbBlock,
  291. "CTxtPtr::GetPchReverse(): _ich bigger than block");
  292. pchBase = ptb->_pch + ich;
  293. // Check to see if we need to skip over gap. Recall that
  294. // the game may come anywhere in the middle of a block,
  295. // so if the current ich (note, no underscore, we want
  296. // the active ich) is at least one char past the gap, then recompute
  297. // pchBase by adding the size of the gap (so that it's after
  298. // the gap). This differs from GetPch(), which works forward and
  299. // wants pchBase to include the gap size if ich is at the gap, let
  300. // alone one or more chars past it.
  301. //
  302. // Also figure out the count of valid characters. It's
  303. // either the count of characters from the beginning of the
  304. // text block, i.e. ich, or the count of characters from the
  305. // end of the buffer gap.
  306. cchValidReverse = ich; // Default for ich <= gap offset
  307. cchTemp = ich - CchOfCb(ptb->_ibGap); // Calculate displacement
  308. if(cchTemp > 0) // Positive: pchBase is after gap
  309. {
  310. cchValidReverse = cchTemp;
  311. pchBase += CchOfCb(ptb->_cbBlock) - ptb->_cch; // Add in gap size
  312. }
  313. if(pcchValid) // if client needs forward length
  314. {
  315. if(cchTemp > 0)
  316. cchTemp = ich - ptb->_cch;
  317. else
  318. cchTemp = -cchTemp;
  319. *pcchValid = cchTemp;
  320. }
  321. AssertSz(cchValidReverse > 0 && GetCp() - cchValidReverse >= 0,
  322. "CTxtPtr::GetPchReverse: illegal cchValidReverse");
  323. return pchBase;
  324. }
  325. /*
  326. * CTxtPtr::GetCharFlagsInRange(cch, iCharRepDefault)
  327. *
  328. * @mfunc
  329. * return CharFlags for the range of chars starting at this text pointer
  330. * for cch chars.
  331. *
  332. * @rdesc
  333. * CharFlags for the range of chars
  334. */
  335. QWORD CTxtPtr::GetCharFlagsInRange(
  336. LONG cch,
  337. BYTE iCharRepDefault)
  338. {
  339. QWORD qw = 0;
  340. QWORD qw0;
  341. WCHAR szch[10];
  342. cch = min(cch + 1, 10);
  343. cch = GetText(cch, szch);
  344. for(WCHAR *pch = szch; cch > 0; cch--, pch++)
  345. {
  346. qw0 = GetCharFlags(pch, cch, iCharRepDefault);
  347. if(qw0 & FSURROGATE)
  348. {
  349. cch--;
  350. pch++;
  351. }
  352. qw |= qw0;
  353. }
  354. return qw;
  355. }
  356. /*
  357. * CTxtPtr::BindToCp(cp)
  358. *
  359. * @mfunc
  360. * set cached _cp = cp (or nearest valid value)
  361. *
  362. * @rdesc
  363. * _cp actually set
  364. *
  365. * @comm
  366. * This method overrides CRunPtrBase::BindToCp to keep _cp up to date
  367. * correctly.
  368. *
  369. * @devnote
  370. * Do *not* call this method when high performance is needed; use
  371. * Move() instead, which moves from 0 or from the cached
  372. * _cp, depending on which is closer.
  373. */
  374. LONG CTxtPtr::BindToCp(
  375. LONG cp) //@parm char position to bind to
  376. {
  377. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::BindToCp");
  378. _cp = CRunPtrBase::BindToCp(cp, GetTextLength());
  379. // We want to be able to use this routine to fix up things so we don't
  380. // check invariants on entry.
  381. _TEST_INVARIANT_
  382. return _cp;
  383. }
  384. /*
  385. * CTxtPtr::SetCp(cp)
  386. *
  387. * @mfunc
  388. * 'efficiently' sets cp by advancing from current position or from 0,
  389. * depending on which is closer
  390. *
  391. * @rdesc
  392. * cp actually set to
  393. */
  394. LONG CTxtPtr::SetCp(
  395. LONG cp) //@parm char position to set to
  396. {
  397. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::SetCp");
  398. Move(cp - _cp);
  399. return _cp;
  400. }
  401. /*
  402. * CTxtPtr::Move(cch)
  403. *
  404. * @mfunc
  405. * Move cp by cch characters
  406. *
  407. * @rdesc
  408. * Actual number of characters Moved by
  409. *
  410. * @comm
  411. * We override CRunPtrBase::Move so that the cached _cp value
  412. * can be correctly updated and so that the move can be made
  413. * from the cached _cp or from 0, depending on which is closer.
  414. *
  415. * @devnote
  416. * It's also easy to bind at the end of the story. So an improved
  417. * optimization would bind there if 2*(_cp + cch) > _cp + text length.
  418. */
  419. LONG CTxtPtr::Move(
  420. LONG cch) // @parm count of chars to move by
  421. {
  422. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::Move");
  423. if(!IsValid()) // No runs yet, so don't go
  424. return 0; // anywhere
  425. const LONG cpSave = _cp; // Save entry _cp
  426. LONG cp = cpSave + cch; // Requested target cp (maybe < 0)
  427. if(cp < cpSave/2) // Closer to 0 than cached cp
  428. {
  429. cp = max(cp, 0); // Don't undershoot
  430. _cp = CRunPtrBase::BindToCp(cp);
  431. }
  432. else
  433. _cp += CRunPtrBase::Move(cch); // exist
  434. // NB! the invariant check needs to come at the end; we may be
  435. // moving 'this' text pointer in order to make it valid again
  436. // (for the floating range mechanism).
  437. _TEST_INVARIANT_
  438. return _cp - cpSave; // cch this CTxtPtr moved
  439. }
  440. /*
  441. * CTxtPtr::GetText(cch, pch)
  442. *
  443. * @mfunc
  444. * get a range of cch characters starting at this text ptr. A literal
  445. * copy is made, i.e., with no CR -> CRLF and WCH_EMBEDDING -> ' '
  446. * translations. For these translations, see CTxtPtr::GetPlainText()
  447. *
  448. * @rdesc
  449. * count of characters actually copied
  450. *
  451. * @comm
  452. * Doesn't change this text ptr
  453. */
  454. LONG CTxtPtr::GetText(
  455. LONG cch, //@parm Count of characters to get
  456. WCHAR * pch) //@parm Buffer to copy the text into
  457. {
  458. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetText");
  459. LONG cchSave = cch;
  460. LONG cchValid;
  461. const WCHAR *pchRead;
  462. CTxtPtr tp(*this);
  463. _TEST_INVARIANT_
  464. // Use tp to read valid blocks of text until all the requested
  465. // text is read or until the end of story is reached.
  466. while( cch )
  467. {
  468. pchRead = tp.GetPch(cchValid);
  469. if(!pchRead) // No more text
  470. break;
  471. cchValid = min(cchValid, cch);
  472. CopyMemory(pch, pchRead, cchValid*sizeof(WCHAR));
  473. pch += cchValid;
  474. cch -= cchValid;
  475. tp.Move(cchValid);
  476. }
  477. return cchSave - cch;
  478. }
  479. #ifndef NOCOMPLEXSCRIPTS
  480. /*
  481. * OverRideNeutralChar(ch)
  482. *
  483. * @mfunc
  484. * Helper for overriding BiDi neutral character classification.
  485. * Option is used in Access Expression Builder.
  486. *
  487. * @rdesc
  488. * Modified character or unmodified input character
  489. */
  490. WCHAR OverRideNeutralChar(WCHAR ch)
  491. {
  492. if(ch < '!')
  493. return ch == CELL ? CR : ch;
  494. if(ch > '}')
  495. return ch;
  496. if (IN_RANGE('!', ch, '>'))
  497. {
  498. // True for !"#&'()*+,-./:;<=>
  499. if ((0x00000001 << (ch - TEXT(' '))) & 0x7C00FFCE)
  500. ch = 'a';
  501. }
  502. if (IN_RANGE('[', ch, '^') || ch == '{' || ch == '}')
  503. {
  504. // True for [/]^{}
  505. ch = 'a';
  506. }
  507. return ch;
  508. }
  509. /*
  510. * CTxtPtr::GetTextForUsp(cch, pch, fNeutralOverride)
  511. *
  512. * @mfunc
  513. * get a range of cch characters starting at this text ptr. A literal
  514. * copy is made, with translation to fool Uniscribe classification
  515. *
  516. * @rdesc
  517. * count of characters actually copied
  518. *
  519. * @comm
  520. * Doesn't change this text ptr
  521. */
  522. LONG CTxtPtr::GetTextForUsp(
  523. LONG cch, //@parm Count of characters to get
  524. WCHAR * pch, //@parm Buffer to copy the text into
  525. BOOL fNeutralOverride) //@parm Neutral override option
  526. {
  527. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetTextForUsp");
  528. LONG cchSave = cch;
  529. LONG cchValid;
  530. const WCHAR *pchRead;
  531. CTxtPtr tp(*this);
  532. int i;
  533. WCHAR xltchar;
  534. _TEST_INVARIANT_
  535. // Use tp to read valid blocks of text until all the requested
  536. // text is read or until the end of story is reached.
  537. while( cch )
  538. {
  539. pchRead = tp.GetPch(cchValid);
  540. if(!pchRead) // No more text
  541. break;
  542. cchValid = min(cchValid, cch);
  543. if (!fNeutralOverride)
  544. {
  545. for (i = 0; i < cchValid; i++)
  546. {
  547. xltchar = pchRead[i];
  548. if(xltchar <= '$')
  549. {
  550. if(xltchar >= '#')
  551. xltchar = '@';
  552. if(xltchar == CELL)
  553. xltchar = CR;
  554. }
  555. pch[i] = xltchar;
  556. }
  557. }
  558. else
  559. {
  560. for (i = 0; i < cchValid; i++)
  561. {
  562. pch[i] = OverRideNeutralChar(pchRead[i]);
  563. }
  564. }
  565. pch += cchValid;
  566. cch -= cchValid;
  567. tp.Move(cchValid);
  568. }
  569. return cchSave - cch;
  570. }
  571. #endif
  572. /*
  573. * CTxtPtr::GetPlainText(cchBuff, pch, cpMost, fTextize)
  574. *
  575. * @mfunc
  576. * Copy up to cchBuff characters or up to cpMost, whichever comes
  577. * first, translating lone CRs into CRLFs. Move this text ptr just
  578. * past the last character processed. If fTextize, copy up to but
  579. * not including the first WCH_EMBEDDING char. If not fTextize,
  580. * replace WCH_EMBEDDING by a blank since RichEdit 1.0 does.
  581. *
  582. * @rdesc
  583. * Count of characters copied
  584. *
  585. * @comm
  586. * An important feature is that this text ptr is moved just past the
  587. * last char copied. In this way, the caller can conveniently read
  588. * out plain text in bufferfuls of up to cch chars, which is useful for
  589. * stream I/O. This routine won't copy the final CR even if cpMost
  590. * is beyond it.
  591. */
  592. LONG CTxtPtr::GetPlainText(
  593. LONG cchBuff, //@parm Buffer cch
  594. WCHAR * pch, //@parm Buffer to copy text into
  595. LONG cpMost, //@parm Largest cp to get
  596. BOOL fTextize, //@parm True if break on WCH_EMBEDDING
  597. BOOL fUseCRLF) //@parm If TRUE, CR or LF -> CRLF
  598. {
  599. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::GetPlainText");
  600. LONG cch = cchBuff; // Countdown counter
  601. LONG cchValid; // Valid ptr cch
  602. LONG cchT; // Temporary cch
  603. unsigned ch; // Current char
  604. unsigned chPrev = 0; // Previous char
  605. const WCHAR *pchRead; // Backing-store ptr
  606. _TEST_INVARIANT_
  607. AdjustCRLF(); // Be sure we start on an EOP bdy
  608. if(_ped->Get10Mode()) // RE 1.0 delivers EOP chars as
  609. fUseCRLF = FALSE; // they appear in backing store
  610. LONG cchText = _ped->GetAdjustedTextLength();
  611. cpMost = min(cpMost, cchText); // Don't write final CR
  612. if(GetCp() >= cpMost)
  613. return 0;
  614. while(cch > 0) // While room in buffer
  615. {
  616. if(!(pchRead = GetPch(cchValid))) // No more chars available
  617. break; // so we're out of here
  618. cchT = GetCp() + cchValid - cpMost;
  619. if(cchT > 0) // Don't overshoot
  620. {
  621. cchValid -= cchT;
  622. if(cchValid <= 0)
  623. break; // Nothing left before cpMost
  624. }
  625. for(cchT = 0; cch > 0 && cchT < cchValid; cchT++, cch--, chPrev = ch)
  626. {
  627. ch = *pch++ = *pchRead++; // Copy next char (but don't
  628. if(IN_RANGE(CELL, ch, CR)) // count it yet)
  629. {
  630. if(IsASCIIEOP(ch)) // LF, VT, FF, CR
  631. {
  632. if(!fUseCRLF || ch == FF)
  633. continue;
  634. if (ch == CR && chPrev == ENDFIELD &&
  635. cchValid - cchT > 1 &&
  636. *pchRead == STARTFIELD)
  637. {
  638. *(pch - 1) = ' '; // New table row follows old:
  639. continue; // use only 1 CRLF
  640. }
  641. Move(cchT); // Move up to CR
  642. if(cch < 2) // No room for LF, so don't
  643. goto done; // count CR either
  644. // Bypass EOP w/o worrying about
  645. cchT = AdvanceCRLF(FALSE);// buffer gaps and blocks
  646. if(cchT > 2) // Translate CRCRLF to ' '
  647. { // Usually copied count exceeds
  648. Assert(cchT == 3); // internal count, but CRCRLFs
  649. *(pch - 1) = ' '; // reduce the relative increase:
  650. } // NB: error for EM_GETTEXTLENGTHEX
  651. else // CRLF or lone CR
  652. { // Store LF in both cases for
  653. *(pch - 1) = CR; // Be sure it's a CR not a VT,
  654. *pch++ = LF; // Windows. No LF for Mac
  655. cch--; // One less for target buffer
  656. }
  657. cch--; // CR (or ' ') copied
  658. cchT = 0; // Don't Move() more below
  659. break; // Go get new pchRead & cchValid
  660. }
  661. else if(ch == CELL) // Use TAB for cell end markers
  662. *(pch - 1) = TAB;
  663. }
  664. else if(ch >= STARTFIELD)
  665. { // Object lives here
  666. if(fTextize && ch == WCH_EMBEDDING) // Break on WCH_EMBEDDING
  667. {
  668. Move(cchT); // Move this text ptr up to
  669. goto done; // WCH_EMBEDDING and return
  670. }
  671. *(pch - 1) = ' '; // Replace embedding char by ' '
  672. }
  673. }
  674. Move(cchT);
  675. }
  676. done:
  677. return cchBuff - cch;
  678. }
  679. /*
  680. * CTxtPtr::AdvanceCRLF(fMulticharAdvance)
  681. *
  682. * @mfunc
  683. * Move text pointer by one character, safely advancing
  684. * over CRLF, CRCRLF, and UTF-16 combinations
  685. *
  686. * @rdesc
  687. * Number of characters text pointer has been moved by
  688. */
  689. LONG CTxtPtr::AdvanceCRLF(
  690. BOOL fMulticharAdvance) //@parm If TRUE, advance over combining-mark sequences
  691. {
  692. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::AdvanceCRLF");
  693. _TEST_INVARIANT_
  694. LONG cp;
  695. LONG cpSave = _cp;
  696. WCHAR ch = GetChar(); // Char on entry
  697. WCHAR ch1 = NextChar(); // Advance to and get next char
  698. BOOL fTwoCRs = FALSE;
  699. BOOL fCombiningMark = FALSE;
  700. if(ch == CR)
  701. {
  702. if(ch1 == CR && _cp < GetTextLength())
  703. {
  704. fTwoCRs = TRUE; // Need at least 3 chars to
  705. ch1 = NextChar(); // have CRCRLF at end
  706. }
  707. if(ch1 == LF)
  708. Move(1); // Bypass CRLF
  709. else if(fTwoCRs)
  710. Move(-1); // Only bypass one CR of two
  711. AssertSz(_ped->fUseCRLF() || _cp == cpSave + 1,
  712. "CTxtPtr::AdvanceCRLF: EOP isn't a single char");
  713. }
  714. // Handle Unicode UTF-16 surrogates
  715. if(IN_RANGE(0xD800, ch, 0xDBFF)) // Started on UTF-16 lead word
  716. {
  717. if (IN_RANGE(0xDC00, ch1, 0xDFFF))
  718. Move(1); // Bypass UTF-16 trail word
  719. else
  720. AssertSz(FALSE, "CTxtPtr::AdvanceCRLF: illegal Unicode surrogate combo");
  721. }
  722. if (fMulticharAdvance)
  723. {
  724. while(IN_RANGE(0x300, ch1, 0x36F)) // Bypass combining diacritical marks
  725. {
  726. fCombiningMark = TRUE;
  727. cp = _cp;
  728. ch1 = NextChar();
  729. if (_cp == cp)
  730. break;
  731. }
  732. }
  733. if(IN_RANGE(STARTFIELD, ch, ENDFIELD))
  734. Move(1); // Bypass field type
  735. LONG cch = _cp - cpSave;
  736. AssertSz(!cch || cch == 1 || fCombiningMark ||
  737. cch == 2 && (IN_RANGE(0xD800, ch, 0xDBFF) ||
  738. IN_RANGE(STARTFIELD, ch, ENDFIELD)) ||
  739. (_ped->fUseCRLF() && GetPrevChar() == LF &&
  740. (cch == 2 || cch == 3 && fTwoCRs)),
  741. "CTxtPtr::AdvanceCRLF(): Illegal multichar");
  742. return cch; // # chars bypassed
  743. }
  744. /*
  745. * CTxtPtr::NextChar()
  746. *
  747. * @mfunc
  748. * Increment this text ptr and return char it points at
  749. *
  750. * @rdesc
  751. * Next char
  752. */
  753. WCHAR CTxtPtr::NextChar()
  754. {
  755. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::NextChar");
  756. _TEST_INVARIANT_
  757. Move(1);
  758. return GetChar();
  759. }
  760. /*
  761. * CTxtPtr::PrevChar()
  762. *
  763. * @mfunc
  764. * Decrement this text ptr and return char it points at
  765. *
  766. * @rdesc
  767. * Previous char
  768. */
  769. WCHAR CTxtPtr::PrevChar()
  770. {
  771. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::PrevChar");
  772. _TEST_INVARIANT_
  773. return Move(-1) ? GetChar() : 0;
  774. }
  775. /*
  776. * CTxtPtr::BackupCRLF(fMulticharBackup)
  777. *
  778. * @mfunc
  779. * Backup text pointer by one character, safely backing up
  780. * over CRLF, CRCRLF, and UTF-16 combinations
  781. *
  782. * @rdesc
  783. * Number of characters text pointer has been moved by
  784. *
  785. * @future
  786. * Backup over Unicode combining marks
  787. */
  788. LONG CTxtPtr::BackupCRLF(
  789. BOOL fMulticharBackup) //@parm If TRUE, backup over combining-mark sequences
  790. {
  791. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::BackupCRLF");
  792. _TEST_INVARIANT_
  793. LONG cpSave = _cp;
  794. WCHAR ch = PrevChar(); // Moves to and get previous char
  795. if(fMulticharBackup)
  796. { // Bypass combining diacritical marks
  797. while(IN_RANGE(0x300, ch, 0x36F))
  798. ch = PrevChar();
  799. }
  800. // Handle Unicode UTF-16 surrogates
  801. if(_cp && IN_RANGE(0xDC00, ch, 0xDFFF))
  802. {
  803. ch = PrevChar();
  804. if (!IN_RANGE(0xD800, ch, 0xDBFF))
  805. {
  806. AssertSz(FALSE, "CTxtPtr::BackupCRLF: illegal Unicode surrogate combo");
  807. ch = NextChar();
  808. }
  809. }
  810. if(ch == LF) // Try to back up 1 char in any case
  811. {
  812. if(_cp && PrevChar() != CR) // If LF, does prev char = CR?
  813. Move(1); // No, leave tp at LF
  814. else if(_cp && !IsAfterTRD(0) &&// At CRLF. If not after TRD
  815. PrevChar() != CR) // and prev char != CR, leave
  816. { // at CRLF
  817. Move(1);
  818. }
  819. }
  820. else if(IN_RANGE(STARTFIELD, GetPrevChar(), ENDFIELD))
  821. Move(-1); // Bypass field type
  822. AssertSz( _cp == cpSave ||
  823. ch == LF && GetChar() == CR ||
  824. !(ch == LF || fMulticharBackup &&
  825. (IN_RANGE(0x300, ch, 0x36F) ||
  826. IN_RANGE(0xDC00, ch, 0xDFFF) && IN_RANGE(0xD800, GetPrevChar(), 0xDBFF)) ),
  827. "CTxtPtr::BackupCRLF(): Illegal multichar");
  828. return _cp - cpSave; // - # chars this CTxtPtr moved
  829. }
  830. /*
  831. * CTxtPtr::AdjustCRLF(iDir)
  832. *
  833. * @mfunc
  834. * Adjust the position of this text pointer to the beginning of a CRLF
  835. * or CRCRLF combination, if it is in the middle of such a combination.
  836. * Move text pointer to the beginning/end (for iDir neg/pos) of a Unicode
  837. * surrogate pair or a STARTFIELD/ENDFIELD pair if it is in the middle
  838. * of such a pair.
  839. *
  840. * @rdesc
  841. * Number of characters text pointer has been moved by
  842. *
  843. * @future
  844. * Adjust to beginning of sequence containing Unicode combining marks
  845. */
  846. LONG CTxtPtr::AdjustCRLF(
  847. LONG iDir) //@parm Move forward/backward for iDir = 1/-1, respectively
  848. {
  849. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::AdjustCpCRLF");
  850. _TEST_INVARIANT_
  851. UINT ch = GetChar();
  852. LONG cpSave = _cp;
  853. if(!_cp) // Alignment always correct
  854. return 0; // at cp 0
  855. iDir = iDir < 0 ? -1 : 1;
  856. // Handle Unicode UTF-16 surrogates
  857. if(IN_RANGE(0xDC00, ch, 0xDFFF)) // Landed on UTF-16 trail word
  858. {
  859. AssertSz(IN_RANGE(0xD800, GetPrevChar(), 0xDBFF),
  860. "CTxtPtr::AdjustCRLF: illegal Unicode surrogate combo");
  861. return Move(iDir); // Backup to UTF-16 lead word or
  862. } // move forward to next char
  863. UINT chPrev = GetPrevChar();
  864. if(IN_RANGE(STARTFIELD, chPrev, ENDFIELD) && chPrev != 0xFFFA)
  865. return Move(iDir);
  866. if(!IsASCIIEOP(ch) || IsAfterTRD(0)) // Early out
  867. return 0;
  868. if(ch == LF && chPrev == CR) // Landed on LF preceded by CR:
  869. Move(-1); // move to CR for CRCRLF test
  870. // Leave as adjust-forward only behavior for RE 1.0 compatibility on
  871. // CRCRLF and CRLF
  872. if(GetChar() == CR) // Land on a CR of CRLF or
  873. { // second CR of CRCRLF?
  874. CTxtPtr tp(*this);
  875. if(tp.NextChar() == LF)
  876. {
  877. tp.Move(-2); // First CR of CRCRLF ?
  878. if(tp.GetChar() == CR) // Yes or CRLF is at start of
  879. Move(-1); // story. Try to back up over
  880. } // CR (If at BOS, no effect)
  881. }
  882. return _cp - cpSave;
  883. }
  884. /*
  885. * CTxtPtr::IsAtEOP()
  886. *
  887. * @mfunc
  888. * Return TRUE iff this text pointer is at an end-of-paragraph mark
  889. *
  890. * @rdesc
  891. * TRUE if at EOP
  892. *
  893. * @devnote
  894. * End of paragraph marks for RichEdit 1.0 and the MLE can be CRLF
  895. * and CRCRLF. For RichEdit 2.0, EOPs can also be CR, VT (0xB - Shift-
  896. * Enter), and FF (0xC - page break or form feed).
  897. */
  898. BOOL CTxtPtr::IsAtEOP()
  899. {
  900. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAtEOP");
  901. _TEST_INVARIANT_
  902. unsigned ch = GetChar();
  903. if(IsASCIIEOP(ch)) // See if LF <= ch <= CR
  904. { // Clone tp in case
  905. CTxtPtr tp(*this); // AdjustCpCRLF moves
  906. return !tp.AdjustCRLF(); // Return TRUE unless in
  907. } // middle of CRLF or CRCRLF
  908. return (ch | 1) == PS || ch == CELL; // Allow Unicode 0x2028/9 also
  909. }
  910. /*
  911. * CTxtPtr::IsAfterEOP()
  912. *
  913. * @mfunc
  914. * Return TRUE iff this text pointer is just after an end-of-paragraph
  915. * mark
  916. *
  917. * @rdesc
  918. * TRUE iff text ptr follows an EOP mark
  919. */
  920. BOOL CTxtPtr::IsAfterEOP()
  921. {
  922. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAfterEOP");
  923. _TEST_INVARIANT_
  924. if(IsASCIIEOP(GetChar()))
  925. {
  926. CTxtPtr tp(*this); // If in middle of CRLF
  927. if(tp.AdjustCRLF()) // or CRCRLF, return FALSE
  928. return FALSE;
  929. }
  930. return IsEOP(GetPrevChar()); // After EOP if after Unicode
  931. } // PS or LF, VT, FF, CR, CELL
  932. /*
  933. * CTxtPtr::IsAtTRD(ch)
  934. *
  935. * @mfunc
  936. * Return TRUE iff this text pointer is at a table row delimiter (ch CR).
  937. * If ch = 0, then match both start and end delimiters.
  938. *
  939. * @rdesc
  940. * TRUE iff text ptr is at a table row end delimiter
  941. */
  942. BOOL CTxtPtr::IsAtTRD(
  943. WCHAR ch) //@parm Table Row Delimiter
  944. {
  945. LONG cchValid;
  946. const WCHAR *pch = GetPch(cchValid);
  947. if(cchValid < 1)
  948. return FALSE;
  949. WCHAR chNext;
  950. if(cchValid < 2) // In case gap splits TRD
  951. { // (may happen after undo)
  952. CTxtPtr tp(*this);
  953. tp.Move(1);
  954. chNext = tp.GetChar();
  955. }
  956. else
  957. chNext = *(pch + 1);
  958. if(chNext != CR)
  959. return FALSE;
  960. if(ch)
  961. {
  962. AssertSz(ch == STARTFIELD || ch == ENDFIELD,
  963. "CTxtPtr::IsAtTRD: illegal argument");
  964. return *pch == ch;
  965. }
  966. ch = *pch;
  967. return ch == STARTFIELD || ch == ENDFIELD;
  968. }
  969. /*
  970. * CTxtPtr::IsAfterTRD(ch)
  971. *
  972. * @mfunc
  973. * Return TRUE iff this text pointer immediately follows a table row
  974. * start/end delimiter specified by ch (ch = STARTFIELD/ENDFIELD
  975. * followed by CR). If ch = 0, then match both start and end delims.
  976. *
  977. * @rdesc
  978. * TRUE iff text ptr follows an table row start delimiter
  979. */
  980. BOOL CTxtPtr::IsAfterTRD(
  981. WCHAR ch) //@parm Table Row Delimiter
  982. {
  983. LONG cchValid;
  984. const WCHAR *pch = GetPchReverse(cchValid);
  985. if(cchValid < 1 || *(pch - 1) != CR)
  986. return FALSE;
  987. WCHAR chPrev;
  988. if(cchValid < 2) // In case gap splits TRD
  989. { // (may happen after undo)
  990. CTxtPtr tp(*this);
  991. tp.Move(-1);
  992. chPrev = tp.GetPrevChar();
  993. }
  994. else
  995. chPrev = *(pch - 2);
  996. if(ch)
  997. {
  998. AssertSz(ch == STARTFIELD || ch == ENDFIELD,
  999. "CTxtPtr::IsAfterTRD: illegal argument");
  1000. return chPrev == ch;
  1001. }
  1002. return chPrev == STARTFIELD || chPrev == ENDFIELD;
  1003. }
  1004. /*
  1005. * CTxtPtr::IsAtStartOfCell()
  1006. *
  1007. * @mfunc
  1008. * Return TRUE iff this text pointer immediately follows a table row
  1009. * start delimiter (STARTFIELD CR) or any cell delimiter (CELL) except
  1010. * the last one in a row.
  1011. *
  1012. * @rdesc
  1013. * TRUE iff text ptr follows an table row start delimiter
  1014. */
  1015. BOOL CTxtPtr::IsAtStartOfCell()
  1016. {
  1017. LONG cchValid;
  1018. const WCHAR *pch = GetPchReverse(cchValid);
  1019. return cchValid && *(pch - 1) == CELL && !IsAtTRD(ENDFIELD) ||
  1020. cchValid >= 2 && *(pch - 1) == CR && *(pch - 2) == STARTFIELD;
  1021. }
  1022. // Needed for CTxtPtr::ReplaceRange() and InsertRange()
  1023. #if cchGapInitial < 1
  1024. #error "cchGapInitial must be at least one"
  1025. #endif
  1026. /*
  1027. * CTxtPtr::MoveWhile(cch, chFirst, chLast, fInRange)
  1028. *
  1029. * @mfunc
  1030. * Move this text ptr 1) to first char (fInRange ? in range : not in range)
  1031. * chFirst thru chLast or 2) cch chars, which ever comes first. Return
  1032. * count of chars left in run on return. E.g., chFirst = 0, chLast = 0x7F
  1033. * and fInRange = TRUE breaks on first nonASCII char.
  1034. *
  1035. * @rdesc
  1036. * cch left in run on return
  1037. */
  1038. LONG CTxtPtr::MoveWhile(
  1039. LONG cchRun, //@parm Max cch to check
  1040. WCHAR chFirst, //@parm First ch in range
  1041. WCHAR chLast, //@parm Last ch in range
  1042. BOOL fInRange) //@parm break on non0/0 high byte for TRUE/FALSE
  1043. {
  1044. LONG cch;
  1045. LONG i;
  1046. const WCHAR *pch;
  1047. while(cchRun)
  1048. {
  1049. pch = GetPch(cch);
  1050. cch = min(cch, cchRun);
  1051. for(i = 0; i < cch; i++)
  1052. {
  1053. if(IN_RANGE(chFirst, *pch++, chLast) ^ fInRange)
  1054. {
  1055. Move(i); // Advance to 1st char with 0/non0 masked
  1056. return cchRun - i; // value
  1057. }
  1058. }
  1059. cchRun -= cch;
  1060. Move(cch); // Advance to next txt bdy
  1061. }
  1062. return 0;
  1063. }
  1064. /*
  1065. * CTxtPtr::FindWordBreak(action, cpMost)
  1066. *
  1067. * @mfunc
  1068. * Find a word break and move this text pointer to it.
  1069. *
  1070. * @rdesc
  1071. * Offset from cp of the word break
  1072. */
  1073. LONG CTxtPtr::FindWordBreak(
  1074. INT action, //@parm See TxWordBreakProc header
  1075. LONG cpMost) //@parm Limiting character position
  1076. {
  1077. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindWordBreak");
  1078. _TEST_INVARIANT_
  1079. const INT breakBufSize = 10;
  1080. LONG bufferSize;
  1081. LONG cch;
  1082. LONG cchBuffer;
  1083. LONG cchChunk;
  1084. LONG cchText = GetTextLength();
  1085. WCHAR ch = GetChar();
  1086. WCHAR pchBreakBuf[breakBufSize];
  1087. LONG cpSave = _cp; // For calculating break pt
  1088. LONG ichBreak;
  1089. WCHAR * pBuf;
  1090. WCHAR const * pch;
  1091. LONG t; // Temp for abs() macro
  1092. BOOL b10ModeWordBreak = (_ped->Get10Mode() && _ped->_pfnWB);
  1093. if(action == WB_CLASSIFY || action == WB_ISDELIMITER)
  1094. return ch ? _ped->TxWordBreakProc(&ch, 0, CbOfCch(1), action, GetCp()) : 0;
  1095. if(action & 1) // Searching forward
  1096. { // Easiest to handle EOPs
  1097. if(action == WB_MOVEWORDRIGHT && IsEOP(ch)) // explicitly (spanning
  1098. { // a class can go too
  1099. AdjustCRLF(); // far). Go to end of
  1100. AdvanceCRLF(); // EOP "word"
  1101. goto done;
  1102. }
  1103. // Calc. max search
  1104. if((DWORD)cpMost > (DWORD)cchText) // Bounds check: get < 0
  1105. cpMost = cchText; // as well as too big
  1106. cch = cpMost - _cp;
  1107. while(cch > 0)
  1108. { // The independent buffer
  1109. cchBuffer = min(cch, breakBufSize - 1); // avoids gaps in BS
  1110. cch -= bufferSize = cchBuffer;
  1111. pBuf = pchBreakBuf; // Fill buffer forward
  1112. // Grab the first character in reverse for fnWB that require 2
  1113. // chars. Note, we play with _ich to get single char fnWB
  1114. // to ignore this character.
  1115. pch = GetPchReverse(cchChunk);
  1116. if ( !cchChunk ) pch = L" "; // Any break char
  1117. *pBuf++ = *pch;
  1118. // *pBuf++ = (cchChunk ? *(pch - 1) : L' ');
  1119. while ( cchBuffer ) // Finish filling
  1120. {
  1121. pch = GetPch(cchChunk);
  1122. if (!cchChunk) { Assert(0); break; }
  1123. cchChunk = min(cchBuffer, cchChunk);
  1124. Move(cchChunk);
  1125. wcsncpy(pBuf, pch, cchChunk);
  1126. pBuf += cchChunk;
  1127. cchBuffer -= cchChunk;
  1128. }
  1129. ichBreak = _ped->TxWordBreakProc(pchBreakBuf, 1, // Find the break
  1130. CbOfCch(bufferSize+1), action, GetCp()-bufferSize, GetCp()-bufferSize) - 1;
  1131. // in 1.0 mode some apps will return 0 implying the current cp position is a valid break point
  1132. if (ichBreak == -1 && b10ModeWordBreak)
  1133. ichBreak = 0;
  1134. // Apparently, some fnWBs return ambiguous results
  1135. if(ichBreak >= 0 && ichBreak <= bufferSize)
  1136. {
  1137. // Ambiguous break pt?
  1138. // Due to the imprecise nature of the word break proc spec,
  1139. // we've reached an ambiguous condition where we don't know
  1140. // if this is really a break, or just the end of the data.
  1141. // By backing up or going forward by 2, we'll know for sure.
  1142. // NOTE: we'll always be able to advance or go back by 2
  1143. // because we guarantee that when !cch that we have
  1144. // at least breakBufSize (16) characters in the data stream.
  1145. if (ichBreak < bufferSize || !cch)
  1146. {
  1147. Move( ichBreak - bufferSize );
  1148. break;
  1149. }
  1150. // Need to recalc break pt to disambiguate
  1151. t = Move(ichBreak - bufferSize - 2); // abs() is a
  1152. cch += abs(t); // macro
  1153. }
  1154. }
  1155. }
  1156. else // REVERSE - code dup based on EliK "streams" concept.
  1157. {
  1158. if(!_cp) // Can't go anywhere
  1159. return 0;
  1160. if(action == WB_MOVEWORDLEFT) // Easiest to handle EOPs
  1161. { // here
  1162. if(IsASCIIEOP(ch) && AdjustCRLF()) // In middle of a CRLF or
  1163. goto done; // CRCRLF "word"
  1164. ch = PrevChar(); // Check if previous char
  1165. if(IsEOP(ch)) // is an EOP char
  1166. {
  1167. if(ch == LF) // Backspace to start of
  1168. AdjustCRLF(); // CRLF and CRCRLF
  1169. goto done;
  1170. }
  1171. Move(1); // Move back to start char
  1172. }
  1173. // Calc. max search
  1174. if((DWORD)cpMost > (DWORD)_cp) // Bounds check (also
  1175. cpMost = _cp; // handles cpMost < 0)
  1176. cch = cpMost;
  1177. while(cch > 0)
  1178. { // The independent buffer
  1179. cchBuffer = min(cch, breakBufSize - 1); // avoids gaps in BS
  1180. cch -= bufferSize = cchBuffer;
  1181. pBuf = pchBreakBuf + cchBuffer; // Fill from the end.
  1182. // Grab the first character forward for fnWB that require 2 chars.
  1183. // Note: we play with _ich to get single char fnWB to ignore this
  1184. // character.
  1185. pch = GetPch(cchChunk);
  1186. if ( !cchChunk ) pch = L" "; // Any break char
  1187. *pBuf = *pch;
  1188. while ( cchBuffer > 0 ) // Fill rest of buffer
  1189. { // before going in reverse
  1190. pch = GetPchReverse(cchChunk );
  1191. if (!cchChunk) { Assert(0); break; }
  1192. cchChunk = min(cchBuffer, cchChunk);
  1193. Move(-cchChunk);
  1194. pch -= cchChunk;
  1195. pBuf -= cchChunk;
  1196. wcsncpy(pBuf, pch, cchChunk);
  1197. cchBuffer -= cchChunk;
  1198. }
  1199. // Get break left.
  1200. ichBreak = _ped->TxWordBreakProc(pchBreakBuf, bufferSize,
  1201. CbOfCch(bufferSize+1), action, GetCp(), GetCp()+bufferSize);
  1202. // in 1.0 mode some apps will return 0 implying the current cp position is a valid break point
  1203. if (ichBreak == 0 && b10ModeWordBreak)
  1204. ichBreak = bufferSize;
  1205. // Apparently, some fnWBs return ambiguous results
  1206. if(ichBreak >= 0 && ichBreak <= bufferSize)
  1207. { // Ambiguous break pt?
  1208. // NOTE: when going in reverse, we have >= bufsize - 1
  1209. // because there is a break-after char (hyphen).
  1210. if ( ichBreak > 0 || !cch )
  1211. {
  1212. Move(ichBreak); // Move _cp to break point.
  1213. break;
  1214. }
  1215. cch += Move(2 + ichBreak); // Need to recalc break pt
  1216. } // to disambiguate.
  1217. }
  1218. }
  1219. done:
  1220. return _cp - cpSave; // Offset of where to break
  1221. }
  1222. /*
  1223. * CTxtPtr::TranslateRange(cch, CodePage, fSymbolCharSet, publdr)
  1224. *
  1225. * @mfunc
  1226. * Translate a range of text at this text pointer to...
  1227. *
  1228. * @rdesc
  1229. * Count of new characters added (should be same as count replaced)
  1230. *
  1231. * @devnote
  1232. * Moves this text pointer to end of replaced text.
  1233. * May move text block and formatting arrays.
  1234. */
  1235. LONG CTxtPtr::TranslateRange(
  1236. LONG cch, //@parm length of range to translate
  1237. UINT CodePage, //@parm CodePage for MBTWC or WCTMB
  1238. BOOL fSymbolCharSet, //@parm Target charset
  1239. IUndoBuilder *publdr) //@parm Undo bldr to receive antievents
  1240. {
  1241. CTempWcharBuf twcb;
  1242. CTempCharBuf tcb;
  1243. UINT ch;
  1244. BOOL fAllASCII = TRUE;
  1245. BOOL fNoCodePage;
  1246. BOOL fUsedDef; //@parm Out parm to receive whether default char used
  1247. LONG i;
  1248. char * pastr = tcb.GetBuf(cch);
  1249. WCHAR * pstr = twcb.GetBuf(cch);
  1250. WCHAR * pstrT = pstr;
  1251. i = GetText(cch, pstr);
  1252. Assert(i == cch);
  1253. if(fSymbolCharSet) // Target is SYMBOL_CHARSET
  1254. {
  1255. WCTMB(CodePage, 0, pstr, cch, pastr, cch, "\0", &fUsedDef,
  1256. &fNoCodePage, FALSE);
  1257. if(fNoCodePage)
  1258. return cch;
  1259. for(; i && *pastr; i--) // Break if conversion failed
  1260. { // (NULL default char used)
  1261. if(*pstr >= 128)
  1262. fAllASCII = FALSE;
  1263. *pstr++ = *(BYTE *)pastr++;
  1264. }
  1265. cch -= i;
  1266. if(fAllASCII)
  1267. return cch;
  1268. }
  1269. else // Target isn't SYMBOL_CHARSET
  1270. {
  1271. while(i--)
  1272. {
  1273. ch = *pstr++; // Source is SYMBOL_CHARSET, so
  1274. *pastr++ = (char)ch; // all chars should be < 256
  1275. if(ch >= 128) // In any event, truncate to BYTE
  1276. fAllASCII = FALSE;
  1277. }
  1278. if(fAllASCII) // All ASCII, so no conversion needed
  1279. return cch;
  1280. MBTWC(CodePage, 0, pastr - cch, cch, pstrT, cch, &fNoCodePage);
  1281. if(fNoCodePage)
  1282. return cch;
  1283. }
  1284. return ReplaceRange(cch, cch, pstrT, publdr, NULL, NULL);
  1285. }
  1286. /*
  1287. * CTxtPtr::ReplaceRange(cchOld, cchNew, *pch, publdr, paeCF, paePF)
  1288. *
  1289. * @mfunc
  1290. * replace a range of text at this text pointer.
  1291. *
  1292. * @rdesc
  1293. * count of new characters added
  1294. *
  1295. * @comm SideEffects: <nl>
  1296. * moves this text pointer to end of replaced text <nl>
  1297. * moves text block array <nl>
  1298. */
  1299. LONG CTxtPtr::ReplaceRange(
  1300. LONG cchOld, //@parm length of range to replace
  1301. // (<lt> 0 means to end of text)
  1302. LONG cchNew, //@parm length of replacement text
  1303. WCHAR const *pch, //@parm replacement text
  1304. IUndoBuilder *publdr, //@parm if non-NULL, where to put an
  1305. // anti-event for this action
  1306. IAntiEvent *paeCF, //@parm char format AE
  1307. IAntiEvent *paePF ) //@parm paragraph formatting AE
  1308. {
  1309. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::ReplaceRange");
  1310. _TEST_INVARIANT_
  1311. LONG cchAdded = 0;
  1312. LONG cchInBlock;
  1313. LONG cchNewInBlock;
  1314. if(cchOld < 0)
  1315. cchOld = GetTextLength() - _cp;
  1316. if(publdr)
  1317. HandleReplaceRangeUndo( cchOld, cchNew, publdr, paeCF, paePF);
  1318. // Blocks involving replacement
  1319. while(cchOld > 0 && cchNew > 0)
  1320. {
  1321. CTxtBlk *ptb = GetRun(0);
  1322. // cchOld should never be nonzero if the text run is empty
  1323. AssertSz(ptb,
  1324. "CTxtPtr::Replace() - Pointer to text block is NULL !");
  1325. ptb->MoveGap(_ich);
  1326. cchInBlock = min(cchOld, ptb->_cch - _ich);
  1327. if(cchInBlock > 0)
  1328. {
  1329. cchOld -= cchInBlock;
  1330. ptb->_cch -= cchInBlock;
  1331. ((CTxtArray *)_pRuns)->_cchText -= cchInBlock;
  1332. }
  1333. cchNewInBlock = CchOfCb(ptb->_cbBlock) - ptb->_cch;
  1334. // if there's room for a gap, leave one
  1335. if(cchNewInBlock > cchGapInitial)
  1336. cchNewInBlock -= cchGapInitial;
  1337. if(cchNewInBlock > cchNew)
  1338. cchNewInBlock = cchNew;
  1339. if(cchNewInBlock > 0)
  1340. {
  1341. CopyMemory(ptb->_pch + _ich, pch, CbOfCch(cchNewInBlock));
  1342. cchNew -= cchNewInBlock;
  1343. _cp += cchNewInBlock;
  1344. _ich += cchNewInBlock;
  1345. pch += cchNewInBlock;
  1346. cchAdded += cchNewInBlock;
  1347. ptb->_cch += cchNewInBlock;
  1348. ptb->_ibGap += CbOfCch(cchNewInBlock);
  1349. ((CTxtArray *)_pRuns)->_cchText += cchNewInBlock;
  1350. }
  1351. if(_iRun >= Count() - 1 || !cchOld )
  1352. break;
  1353. // Go to next block
  1354. _iRun++;
  1355. _ich = 0;
  1356. }
  1357. if(cchNew > 0)
  1358. cchAdded += InsertRange(cchNew, pch);
  1359. else if(cchOld > 0)
  1360. DeleteRange(cchOld);
  1361. return cchAdded;
  1362. }
  1363. /*
  1364. * CTxtPtr::HandleReplaceRangeUndo (cchOld, cchNew, publdr, paeCF, paePF)
  1365. *
  1366. * @mfunc
  1367. * worker function for ReplaceRange. Figures out what will happen in
  1368. * the replace range call and creates the appropriate anti-events
  1369. *
  1370. * @devnote
  1371. * We first check to see if our replace range data can be merged into
  1372. * an existing anti-event. If it can, then we just return.
  1373. * Otherwise, we copy the deleted characters into an allocated buffer
  1374. * and then create a ReplaceRange anti-event.
  1375. *
  1376. * In order to handle ordering problems between formatting and text
  1377. * anti-events (that is, text needs to exist before formatting can
  1378. * be applied), we have any formatting anti-events passed to us first.
  1379. */
  1380. void CTxtPtr::HandleReplaceRangeUndo(
  1381. LONG cchOld, //@parm Count of characters to delete
  1382. LONG cchNew, //@parm Count of new characters to add
  1383. IUndoBuilder * publdr, //@parm Undo builder to receive anti-event
  1384. IAntiEvent * paeCF, //@parm char formatting AE
  1385. IAntiEvent * paePF ) //@parm paragraph formatting AE
  1386. {
  1387. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::HandleReplaceRangeUndo");
  1388. _TEST_INVARIANT_
  1389. IAntiEvent *pae = publdr->GetTopAntiEvent();
  1390. WCHAR * pch = NULL;
  1391. if(pae)
  1392. {
  1393. SimpleReplaceRange sr;
  1394. sr.cpMin = _cp;
  1395. sr.cpMax = _cp + cchNew;
  1396. sr.cchDel = cchOld;
  1397. if(pae->MergeData(MD_SIMPLE_REPLACERANGE, &sr) == NOERROR)
  1398. {
  1399. // If the data was merged successfully, then we do
  1400. // not need these anti-events
  1401. if(paeCF)
  1402. DestroyAEList(paeCF);
  1403. if(paePF)
  1404. DestroyAEList(paePF);
  1405. // we've done everything we need to.
  1406. return;
  1407. }
  1408. }
  1409. // Allocate a buffer and grab the soon-to-be deleted
  1410. // text (if necessary)
  1411. if( cchOld > 0 )
  1412. {
  1413. pch = new WCHAR[cchOld];
  1414. if( pch )
  1415. GetText(cchOld, pch);
  1416. else
  1417. cchOld = 0;
  1418. }
  1419. // The new range will exist from our current position plus
  1420. // cchNew (because everything in cchOld gets deleted)
  1421. pae = gAEDispenser.CreateReplaceRangeAE(_ped, _cp, _cp + cchNew,
  1422. cchOld, pch, paeCF, paePF);
  1423. if( !pae )
  1424. delete pch;
  1425. if( pae )
  1426. publdr->AddAntiEvent(pae);
  1427. }
  1428. /*
  1429. * CTxtPtr::InsertRange(cch, pch)
  1430. *
  1431. * @mfunc
  1432. * Insert a range of characters at this text pointer
  1433. *
  1434. * @rdesc
  1435. * Count of characters successfully inserted
  1436. *
  1437. * @comm Side Effects: <nl>
  1438. * moves this text pointer to end of inserted text <nl>
  1439. * moves the text block array <nl>
  1440. */
  1441. LONG CTxtPtr::InsertRange (
  1442. LONG cch, //@parm length of text to insert
  1443. WCHAR const *pch) //@parm text to insert
  1444. {
  1445. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::InsertRange");
  1446. _TEST_INVARIANT_
  1447. LONG cchSave = cch;
  1448. LONG cchInBlock;
  1449. LONG cchFirst;
  1450. LONG cchLast = 0;
  1451. LONG ctbNew;
  1452. CTxtBlk *ptb;
  1453. // Ensure text array is allocated
  1454. if(!Count())
  1455. {
  1456. LONG cbSize = -1;
  1457. // If we don't have any blocks, allocate first block to be big enuf
  1458. // for the inserted text *only* if it's smaller than the normal block
  1459. // size. This allows us to be used efficiently as a display engine
  1460. // for small amounts of text.
  1461. if(cch < CchOfCb(cbBlockInitial))
  1462. cbSize = CbOfCch(cch);
  1463. if(!((CTxtArray *)_pRuns)->AddBlock(0, cbSize))
  1464. {
  1465. _ped->GetCallMgr()->SetOutOfMemory();
  1466. goto done;
  1467. }
  1468. }
  1469. ptb = GetRun(0);
  1470. cchInBlock = CchOfCb(ptb->_cbBlock) - ptb->_cch;
  1471. AssertSz(ptb->_cbBlock <= cbBlockMost, "block too big");
  1472. // Try resizing without splitting...
  1473. if(cch > cchInBlock &&
  1474. cch <= cchInBlock + CchOfCb(cbBlockMost - ptb->_cbBlock))
  1475. {
  1476. if( !ptb->ResizeBlock(min(cbBlockMost,
  1477. CbOfCch(ptb->_cch + cch + cchGapInitial))) )
  1478. {
  1479. _ped->GetCallMgr()->SetOutOfMemory();
  1480. goto done;
  1481. }
  1482. cchInBlock = CchOfCb(ptb->_cbBlock) - ptb->_cch;
  1483. }
  1484. if(cch <= cchInBlock)
  1485. {
  1486. // All fits into block without any hassle
  1487. ptb->MoveGap(_ich);
  1488. CopyMemory(ptb->_pch + _ich, pch, CbOfCch(cch));
  1489. _cp += cch; // *this points at end of
  1490. _ich += cch; // insertion
  1491. ptb->_cch += cch;
  1492. ((CTxtArray *)_pRuns)->_cchText += cch;
  1493. ptb->_ibGap += CbOfCch(cch);
  1494. return cch;
  1495. }
  1496. // Won't all fit in this block, so figure out best division into blocks
  1497. TxDivideInsertion(cch, _ich, ptb->_cch - _ich,&cchFirst, &cchLast);
  1498. // Subtract cchLast up front so return value isn't negative
  1499. // if SplitBlock() fails
  1500. cch -= cchLast; // Don't include last block in count for middle blocks
  1501. // Split block containing insertion point
  1502. // ***** moves _prgtb ***** //
  1503. if(!((CTxtArray *)_pRuns)->SplitBlock(_iRun, _ich, cchFirst, cchLast,
  1504. _ped->IsStreaming()))
  1505. {
  1506. _ped->GetCallMgr()->SetOutOfMemory();
  1507. goto done;
  1508. }
  1509. ptb = GetRun(0); // Recompute ptb after (*_pRuns) moves
  1510. // Copy into first block (first half of split)
  1511. if(cchFirst > 0)
  1512. {
  1513. AssertSz(ptb->_ibGap == CbOfCch(_ich), "split first gap in wrong place");
  1514. AssertSz(cchFirst <= CchOfCb(ptb->_cbBlock) - ptb->_cch, "split first not big enough");
  1515. CopyMemory(ptb->_pch + _ich, pch, CbOfCch(cchFirst));
  1516. cch -= cchFirst;
  1517. pch += cchFirst;
  1518. _ich += cchFirst;
  1519. ptb->_cch += cchFirst;
  1520. ((CTxtArray *)_pRuns)->_cchText += cchFirst;
  1521. ptb->_ibGap += CbOfCch(cchFirst);
  1522. }
  1523. // Copy into middle blocks
  1524. // FUTURE: (jonmat) I increased the size for how large a split block
  1525. // could be and this seems to increase the performance, we should test
  1526. // the block size difference on a retail build, however. 5/15/1995
  1527. ctbNew = cch / cchBlkInsertmGapI /* cchBlkInitmGapI */;
  1528. if(ctbNew <= 0 && cch > 0)
  1529. ctbNew = 1;
  1530. for(; ctbNew > 0; ctbNew--)
  1531. {
  1532. cchInBlock = cch / ctbNew;
  1533. AssertSz(cchInBlock > 0, "nothing to put into block");
  1534. // ***** moves _prgtb ***** //
  1535. if(!((CTxtArray *)_pRuns)->AddBlock(++_iRun,
  1536. CbOfCch(cchInBlock + cchGapInitial)))
  1537. {
  1538. _ped->GetCallMgr()->SetOutOfMemory();
  1539. BindToCp(_cp); //force a rebind;
  1540. goto done;
  1541. }
  1542. // NOTE: next line intentionally advances ptb to next CTxtBlk
  1543. ptb = GetRun(0);
  1544. AssertSz(ptb->_ibGap == 0, "New block not added correctly");
  1545. CopyMemory(ptb->_pch, pch, CbOfCch(cchInBlock));
  1546. cch -= cchInBlock;
  1547. pch += cchInBlock;
  1548. _ich = cchInBlock;
  1549. ptb->_cch = cchInBlock;
  1550. ((CTxtArray *)_pRuns)->_cchText += cchInBlock;
  1551. ptb->_ibGap = CbOfCch(cchInBlock);
  1552. }
  1553. AssertSz(cch == 0, "Didn't use up all text");
  1554. // copy into last block (second half of split)
  1555. if(cchLast > 0)
  1556. {
  1557. AssertSz(_iRun < Count()-1, "no last block");
  1558. ptb = Elem(++_iRun);
  1559. AssertSz(ptb->_ibGap == 0, "split last gap in wrong place");
  1560. AssertSz(cchLast <= CchOfCb(ptb->_cbBlock) - ptb->_cch,
  1561. "split last not big enuf");
  1562. CopyMemory(ptb->_pch, pch, CbOfCch(cchLast));
  1563. // don't subtract cchLast from cch; it's already been done
  1564. _ich = cchLast;
  1565. ptb->_cch += cchLast;
  1566. ((CTxtArray *)_pRuns)->_cchText += cchLast;
  1567. ptb->_ibGap = CbOfCch(cchLast);
  1568. cchLast = 0; // Inserted all requested chars
  1569. }
  1570. done:
  1571. AssertSz(cch + cchLast >= 0, "we should have inserted some characters");
  1572. AssertSz(cch + cchLast <= cchSave, "don't insert more than was asked for");
  1573. cch = cchSave - cch - cchLast; // # chars successfully inserted
  1574. _cp += cch;
  1575. AssertSz (GetTextLength() ==
  1576. ((CTxtArray *)_pRuns)->CalcTextLength(),
  1577. "CTxtPtr::InsertRange(): _pRuns->_cchText screwed up !");
  1578. return cch;
  1579. }
  1580. /*
  1581. * TxDivideInsertion(cch, ichBlock, cchAfter, pcchFirst, pcchLast)
  1582. *
  1583. * @func
  1584. * Find best way to distribute an insertion
  1585. *
  1586. * @rdesc
  1587. * nothing
  1588. */
  1589. static void TxDivideInsertion(
  1590. LONG cch, //@parm length of text to insert
  1591. LONG ichBlock, //@parm offset within block to insert text
  1592. LONG cchAfter, //@parm length of text after insertion in block
  1593. LONG *pcchFirst, //@parm exit: length of text to put in first block
  1594. LONG *pcchLast) //@parm exit: length of text to put in last block
  1595. {
  1596. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "TxDivideInsertion");
  1597. LONG cchFirst = max(0, cchBlkCombmGapI - ichBlock);
  1598. LONG cchLast = max(0, cchBlkCombmGapI - cchAfter);
  1599. LONG cchPartial;
  1600. LONG cchT;
  1601. // Fill first and last blocks to min block size if possible
  1602. cchFirst = min(cch, cchFirst);
  1603. cch -= cchFirst;
  1604. cchLast = min(cch, cchLast);
  1605. cch -= cchLast;
  1606. // How much is left over when we divide up the rest?
  1607. cchPartial = cch % cchBlkInsertmGapI;
  1608. if(cchPartial > 0)
  1609. {
  1610. // Fit as much as the leftover as possible in the first and last
  1611. // w/o growing the first and last over cbBlockInitial
  1612. cchT = max(0, cchBlkInsertmGapI - ichBlock - cchFirst);
  1613. cchT = min(cchT, cchPartial);
  1614. cchFirst += cchT;
  1615. cch -= cchT;
  1616. cchPartial -= cchT;
  1617. if(cchPartial > 0)
  1618. {
  1619. cchT = max(0, cchBlkInsertmGapI - cchAfter - cchLast);
  1620. cchT = min(cchT, cchPartial);
  1621. cchLast += cchT;
  1622. }
  1623. }
  1624. *pcchFirst = cchFirst;
  1625. *pcchLast = cchLast;
  1626. }
  1627. /*
  1628. * CTxtPtr::DeleteRange(cch)
  1629. *
  1630. * @mfunc
  1631. * Delete cch characters starting at this text pointer
  1632. *
  1633. * @rdesc
  1634. * nothing
  1635. *
  1636. * @comm Side Effects: <nl>
  1637. * moves text block array
  1638. */
  1639. void CTxtPtr::DeleteRange(
  1640. LONG cch) //@parm length of text to delete
  1641. {
  1642. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::DeleteRange");
  1643. _TEST_INVARIANT_
  1644. LONG cchInBlock;
  1645. LONG ctbDel = 0; // Default no blocks to delete
  1646. LONG itb;
  1647. CTxtBlk * ptb = GetRun(0);
  1648. LONG cOldRuns = Count();
  1649. AssertSz(ptb,
  1650. "CTxtPtr::DeleteRange: want to delete, but no text blocks");
  1651. if (cch > GetTextLength() - _cp) // Don't delete beyond end of story
  1652. cch = GetTextLength() - _cp;
  1653. ((CTxtArray *)_pRuns)->_cchText -= cch;
  1654. // remove from first block
  1655. ptb->MoveGap(_ich);
  1656. cchInBlock = min(cch, ptb->_cch - _ich);
  1657. cch -= cchInBlock;
  1658. ptb->_cch -= cchInBlock;
  1659. #ifdef DEBUG
  1660. ((CTxtArray *)_pRuns)->Invariant();
  1661. #endif // DEBUG
  1662. for(itb = ptb->_cch ? _iRun + 1 : _iRun;
  1663. cch && cch >= Elem(itb)->_cch; ctbDel++, itb++)
  1664. {
  1665. // More to go: scan for complete blocks to remove
  1666. cch -= Elem(itb)->_cch;
  1667. }
  1668. if(ctbDel)
  1669. {
  1670. // ***** moves (*_pRuns) ***** //
  1671. itb -= ctbDel;
  1672. ((CTxtArray *)_pRuns)->RemoveBlocks(itb, ctbDel);
  1673. }
  1674. // Remove from last block
  1675. if(cch > 0)
  1676. {
  1677. ptb = Elem(itb);
  1678. AssertSz(cch < ptb->_cch, "last block too small");
  1679. ptb->MoveGap(0);
  1680. ptb->_cch -= cch;
  1681. #ifdef DEBUG
  1682. ((CTxtArray *)_pRuns)->Invariant();
  1683. #endif // DEBUG
  1684. }
  1685. ((CTxtArray *)_pRuns)->CombineBlocks(_iRun);
  1686. if(cOldRuns > Count() || _iRun >= Count() || !Elem(_iRun)->_cch)
  1687. BindToCp(_cp); // Empty block: force tp rebind
  1688. AssertSz (GetTextLength() ==
  1689. ((CTxtArray *)_pRuns)->CalcTextLength(),
  1690. "CTxtPtr::DeleteRange(): _pRuns->_cchText screwed up !");
  1691. }
  1692. /*
  1693. * CTxtPtr::FindText (cpLimit, dwFlags, pch, cch)
  1694. *
  1695. * @mfunc
  1696. * Find the text string <p pch> of length <p cch> starting at this
  1697. * text pointer. If found, move this text pointer to the end of the
  1698. * matched string and return the cp of the first character of the matched
  1699. * string. If not found, return -1 and don't change this text ptr.
  1700. *
  1701. * @rdesc
  1702. * character position of first match
  1703. * <lt> 0 if no match
  1704. */
  1705. LONG CTxtPtr::FindText (
  1706. LONG cpLimit, //@parm Limit of search or <lt> 0 for end of text
  1707. DWORD dwFlags, //@parm FR_MATCHCASE case must match <nl>
  1708. // FR_WHOLEWORD match must be a whole word
  1709. const WCHAR *pch, //@parm Text to find
  1710. LONG cch) //@parm Length of text to find
  1711. {
  1712. LONG cpFirst, cpLast;
  1713. CTxtFinder tf;
  1714. if(tf.FindText(*this, cpLimit, dwFlags, pch, cch, cpFirst, cpLast))
  1715. {
  1716. // Set text ptr to char just after last char in found string
  1717. SetCp(cpLast + 1);
  1718. // Return cp of first char in found string
  1719. return cpFirst;
  1720. }
  1721. return -1;
  1722. }
  1723. /*
  1724. * CTxtPtr::FindOrSkipWhiteSpaces (cchMax, dwFlags, pdwResult)
  1725. *
  1726. * @mfunc
  1727. * Find a whitespace or a non-whitespace character (skip all whitespaces).
  1728. *
  1729. * @rdesc
  1730. * Signed number of character this ptr was moved by the operation.
  1731. * In case of moving backward, the return position was already adjusted forward
  1732. * so the caller doesnt need to.
  1733. */
  1734. LONG CTxtPtr::FindOrSkipWhiteSpaces (
  1735. LONG cchMax, //@parm Max signed count of char to search
  1736. DWORD dwFlags, //@parm Input flags
  1737. DWORD* pdwResult) //@parm Flag set if found
  1738. {
  1739. const WCHAR* pch;
  1740. CTxtPtr tp(*this);
  1741. LONG iDir = cchMax < 0 ? -1 : 1;
  1742. LONG cpSave = _cp;
  1743. LONG cchChunk, cch = 0;
  1744. DWORD dwResult = 0;
  1745. BOOL (*pfnIsWhite)(unsigned) = IsWhiteSpace;
  1746. if (dwFlags & FWS_BOUNDTOPARA)
  1747. pfnIsWhite = IsEOP;
  1748. if (cchMax < 0)
  1749. cchMax = -cchMax;
  1750. while (cchMax > 0 && !dwResult)
  1751. {
  1752. pch = iDir > 0 ? tp.GetPch(cch) : tp.GetPchReverse(cch);
  1753. if (!pch)
  1754. break; // No text available
  1755. if (iDir < 0)
  1756. pch--; // Going backward, point at previous char
  1757. cch = min(cch, cchMax);
  1758. for(cchChunk = cch; cch > 0; cch--, pch += iDir)
  1759. {
  1760. if ((dwFlags & FWS_SKIP) ^ pfnIsWhite(*pch))
  1761. {
  1762. dwResult++;
  1763. break;
  1764. }
  1765. }
  1766. cchChunk -= cch;
  1767. cchMax -= cchChunk;
  1768. tp.Move(iDir * cchChunk); // advance to next chunk
  1769. }
  1770. if (pdwResult)
  1771. *pdwResult = dwResult;
  1772. cch = tp.GetCp() - cpSave;
  1773. if (dwFlags & FWS_MOVE)
  1774. Move(cch); // Auto advance if requested
  1775. return cch;
  1776. }
  1777. /*
  1778. * CTxtPtr::FindWhiteSpaceBound (cchMin, cpStart, cpEnd, dwFlags)
  1779. *
  1780. * @mfunc
  1781. * Figure the smallest boundary that covers cchMin and limited by
  1782. * whitespaces (included CR/LF). This is how it works.
  1783. *
  1784. * Text: xxx xxx xxx xxx xxx
  1785. * cp + cchMin: xxxxx
  1786. * Boundary: xxxxxxxxxxxxx
  1787. *
  1788. * @rdesc
  1789. * cch of white space characters
  1790. */
  1791. LONG CTxtPtr::FindWhiteSpaceBound (
  1792. LONG cchMin, // @parm Minimum char count to be covered
  1793. LONG& cpStart, // @parm Boundary start
  1794. LONG& cpEnd, // @parm Boundary end
  1795. DWORD dwFlags) // @parm Input flags
  1796. {
  1797. CTxtPtr tp(*this);
  1798. LONG cch = tp.GetTextLength();
  1799. LONG cp = _cp;
  1800. Assert (cp + cchMin <= cch);
  1801. cpStart = cpEnd = cp;
  1802. cpEnd += max(2, cchMin); // make sure it covers minimum requirement.
  1803. cpEnd = min(cpEnd, cch); // but not too many
  1804. dwFlags &= FWS_BOUNDTOPARA;
  1805. // Figure nearest upper bound
  1806. //
  1807. tp.SetCp(cpEnd);
  1808. cpEnd += tp.FindOrSkipWhiteSpaces(cch - cpEnd, dwFlags | FWS_MOVE); // find a whitespaces
  1809. cpEnd += tp.FindOrSkipWhiteSpaces(cch - cpEnd, dwFlags | FWS_MOVE | FWS_SKIP); // skip whitespaces
  1810. if (!(dwFlags & FWS_BOUNDTOPARA))
  1811. cpEnd += tp.FindOrSkipWhiteSpaces(cch - cpEnd, dwFlags | FWS_MOVE); // find a whitespace
  1812. // Figure nearest lower bound
  1813. //
  1814. tp.SetCp(cpStart);
  1815. cpStart += tp.FindOrSkipWhiteSpaces(-cpStart, dwFlags | FWS_MOVE); // find a whitespace
  1816. cpStart += tp.FindOrSkipWhiteSpaces(-cpStart, dwFlags | FWS_MOVE | FWS_SKIP); // skip whitespaces
  1817. if (!(dwFlags & FWS_BOUNDTOPARA))
  1818. cpStart += tp.FindOrSkipWhiteSpaces(-cpStart, dwFlags | FWS_MOVE); // find a whitespace
  1819. Assert (cpStart <= cpEnd && cpEnd - cpStart >= cchMin);
  1820. return cpEnd - cpStart;
  1821. }
  1822. /*
  1823. * CTxtPtr::FindEOP(cchMax, pResults)
  1824. *
  1825. * @mfunc
  1826. * Find EOP mark in a range within cchMax chars from this text pointer
  1827. * and position *this after it. If no EOP is found and cchMax is not
  1828. * enough to reach the start or end of the story, leave this text ptr
  1829. * alone and return 0. If no EOP is found and cchMax is sufficient to
  1830. * reach the start or end of the story, position this text ptr at the
  1831. * beginning/end of document (BOD/EOD) for cchMax <lt>/<gt> 0,
  1832. * respectively, that is, BOD and EOD are treated as a BOP and an EOP,
  1833. * respectively.
  1834. *
  1835. * @rdesc
  1836. * Return cch this text ptr is moved. Return in *pResults whether a CELL
  1837. * or EOP was found. The low byte gives the cch of the EOP if moving
  1838. * forward (else it's just 1).
  1839. *
  1840. * @devnote
  1841. * This function assumes that this text ptr isn't in middle of a CRLF
  1842. * or CRCRLF (found only in RichEdit 1.0 compatibility mode). Changing
  1843. * the for loop could speed up ITextRange MoveUntil/While substantially.
  1844. */
  1845. LONG CTxtPtr::FindEOP (
  1846. LONG cchMax, //@parm Max signed count of chars to search
  1847. LONG *pResults) //@parm Flags saying if EOP and CELL are found
  1848. {
  1849. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindEOP");
  1850. LONG cch = 0, cchStart; // cch's for scans
  1851. unsigned ch; // Current char
  1852. LONG cpSave = _cp; // Save _cp for returning delta
  1853. LONG iDir = 1; // Default forward motion
  1854. const WCHAR*pch; // Used to walk text chunks
  1855. LONG Results = 0; // Nothing found yet
  1856. CTxtPtr tp(*this); // tp to search text with
  1857. if(cchMax < 0) // Backward search
  1858. {
  1859. iDir = -1; // Backward motion
  1860. cchMax = -cchMax; // Make max count positive
  1861. cch = tp.AdjustCRLF(); // If in middle of CRLF or
  1862. if(!cch && IsAfterEOP()) // CRCRLF, or follow any EOP,
  1863. cch = tp.BackupCRLF(); // backup before EOP
  1864. cchMax += cch;
  1865. }
  1866. while(cchMax > 0) // Scan until get out of search
  1867. { // range or match an EOP
  1868. pch = iDir > 0 // Point pch at contiguous text
  1869. ? tp.GetPch(cch) // chunk going forward or
  1870. : tp.GetPchReverse(cch); // going backward
  1871. if(!pch) // No more text to search
  1872. break;
  1873. if(iDir < 0) // Going backward, point at
  1874. pch--; // previous char
  1875. cch = min(cch, cchMax); // Limit scan to cchMax chars
  1876. for(cchStart = cch; cch; cch--) // Scan chunk for EOP
  1877. {
  1878. ch = *pch;
  1879. pch += iDir;
  1880. if(IN_RANGE(CELL, ch, CR) && ch != TAB)
  1881. { // Note that EOP was found
  1882. if(ch == CELL)
  1883. Results |= FEOP_CELL;
  1884. Results |= FEOP_EOP;
  1885. break;
  1886. }
  1887. }
  1888. cchStart -= cch; // Get cch of chars passed by
  1889. cchMax -= cchStart; // Update cchMax
  1890. AssertSz(iDir > 0 && GetCp() + cchStart <= GetTextLength() ||
  1891. iDir < 0 && GetCp() - cchStart >= 0,
  1892. "CTxtPtr::FindEOP: illegal advance");
  1893. tp.Move(iDir*cchStart); // Update tp
  1894. if(Results & FEOP_EOP) // Found an EOP
  1895. break;
  1896. } // Continue with next chunk
  1897. LONG cp = tp.GetCp();
  1898. if ((Results & FEOP_EOP) || !cp || // Found EOP or cp is at story
  1899. cp == GetTextLength()) // beginning or end
  1900. {
  1901. SetCp(cp); // Set _cp = tp._cp
  1902. if(iDir > 0) // Going forward, put ptr just
  1903. Results = (Results & ~255) | AdvanceCRLF(FALSE);// after EOP
  1904. // (going back already there)
  1905. }
  1906. if(pResults) // Report whether EOP and CELL
  1907. *pResults = Results; // were found
  1908. return _cp - cpSave; // Return cch this tp moved
  1909. }
  1910. /*
  1911. * CTxtPtr::FindBOSentence(cch)
  1912. *
  1913. * @mfunc
  1914. * Find beginning of sentence in a range within cch chars from this text
  1915. * pointer and position *this at it. If no sentence beginning is found,
  1916. * position *this at beginning of document (BOD) for cch <lt> 0 and
  1917. * leave *this unchanged for cch >= 0.
  1918. *
  1919. * @rdesc
  1920. * Count of chars moved *this moves
  1921. *
  1922. * @comm
  1923. * This routine defines a sentence as a character string that ends with
  1924. * period followed by at least one whitespace character or the EOD. This
  1925. * should be replacable so that other kinds of sentence endings can be
  1926. * used. This routine also matches initials like "M. " as sentences.
  1927. * We could eliminate those by requiring that sentences don't end with
  1928. * a word consisting of a single capital character. Similarly, common
  1929. * abbreviations like "Mr." could be bypassed. To allow a sentence to
  1930. * end with these "words", two blanks following a period could be used
  1931. * to mean an unconditional end of sentence.
  1932. */
  1933. LONG CTxtPtr::FindBOSentence (
  1934. LONG cch) //@parm max signed count of chars to search
  1935. {
  1936. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindBOSentence");
  1937. _TEST_INVARIANT_
  1938. LONG cchWhite = 0; // No whitespace chars yet
  1939. LONG cp;
  1940. LONG cpSave = _cp; // Save value for return
  1941. BOOL fST; // TRUE if sent terminator
  1942. LONG iDir = cch > 0 ? 1 : -1; // Move() increment
  1943. CTxtPtr tp(*this); // tp to search with
  1944. if(iDir > 0) // If going forward in white
  1945. while(IsWhiteSpace(tp.GetChar()) && // space, backup to 1st non
  1946. tp.Move(-1)); // whitespace char (in case
  1947. // inside sentence ending)
  1948. while(iDir > 0 || tp.Move(-1)) // Need to back up if finding
  1949. { // backward
  1950. for(fST = FALSE; cch; cch -= iDir) // Find sentence terminator
  1951. {
  1952. fST = IsSentenceTerminator(tp.GetChar());
  1953. if(fST || !tp.Move(iDir))
  1954. break;
  1955. }
  1956. if(!fST) // If FALSE, we ran out of
  1957. break; // chars
  1958. while(IsWhiteSpace(tp.NextChar()) && cch)
  1959. { // Bypass a span of blank
  1960. cchWhite++; // chars
  1961. cch--;
  1962. }
  1963. if(cchWhite && (cch >= 0 || tp._cp < cpSave))// Matched new sentence
  1964. break; // break
  1965. if(cch < 0) // Searching backward
  1966. {
  1967. tp.Move(-cchWhite - 1); // Back up to terminator
  1968. cch += cchWhite + 1; // Fewer chars to search
  1969. }
  1970. cchWhite = 0; // No whitespace yet for next
  1971. } // iteration
  1972. cp = tp._cp;
  1973. if(cchWhite || !cp || cp == GetTextLength())// If sentence found or got
  1974. SetCp(cp); // start/end of story, set
  1975. // _cp to tp's
  1976. return _cp - cpSave; // Tell caller cch moved
  1977. }
  1978. /*
  1979. * CTxtPtr::IsAtBOSentence()
  1980. *
  1981. * @mfunc
  1982. * Return TRUE iff *this is at the beginning of a sentence (BOS) as
  1983. * defined in the description of the FindBOSentence(cch) routine
  1984. *
  1985. * @rdesc
  1986. * TRUE iff this text ptr is at the beginning of a sentence
  1987. */
  1988. BOOL CTxtPtr::IsAtBOSentence()
  1989. {
  1990. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAtBOSentence");
  1991. if(!_cp) // Beginning of story is an
  1992. return TRUE; // unconditional beginning
  1993. // of sentence
  1994. unsigned ch = GetChar();
  1995. if (IsWhiteSpace(ch) || // Proper sentences don't
  1996. IsSentenceTerminator(ch)) // start with whitespace or
  1997. { // sentence terminators
  1998. return FALSE;
  1999. }
  2000. LONG cchWhite;
  2001. CTxtPtr tp(*this); // tp to walk preceding chars
  2002. for(cchWhite = 0; // Backspace over possible
  2003. IsWhiteSpace(ch = tp.PrevChar()); // span of whitespace chars
  2004. cchWhite++) ;
  2005. return cchWhite && IsSentenceTerminator(ch);
  2006. }
  2007. /*
  2008. * CTxtPtr::IsAtBOWord()
  2009. *
  2010. * @mfunc
  2011. * Return TRUE iff *this is at the beginning of a word, that is,
  2012. * _cp = 0 or the char at _cp is an EOP, or
  2013. * FindWordBreak(WB_MOVEWORDRIGHT) would break at _cp.
  2014. *
  2015. * @rdesc
  2016. * TRUE iff this text ptr is at the beginning of a Word
  2017. */
  2018. BOOL CTxtPtr::IsAtBOWord()
  2019. {
  2020. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::IsAtBOWord");
  2021. if(!_cp || IsAtEOP()) // Story beginning is also
  2022. return TRUE; // a word beginning
  2023. CTxtPtr tp(*this);
  2024. tp.Move(-1);
  2025. tp.FindWordBreak(WB_MOVEWORDRIGHT);
  2026. return _cp == tp._cp;
  2027. }
  2028. /*
  2029. * CTxtPtr::FindExact(cchMax, pch)
  2030. *
  2031. * @mfunc
  2032. * Find exact text match for null-terminated string pch in a range
  2033. * starting at this text pointer. Position this just after matched
  2034. * string and return cp at start of string, i.e., same as FindText().
  2035. *
  2036. * @rdesc
  2037. * Return cp of first char in matched string and *this pointing at cp
  2038. * just following matched string. Return -1 if no match
  2039. *
  2040. * @comm
  2041. * Much faster than FindText, but still a simple search, i.e., could
  2042. * be improved.
  2043. *
  2044. * FindText can delegate to this search for search strings in which
  2045. * each char can only match itself.
  2046. */
  2047. LONG CTxtPtr::FindExact (
  2048. LONG cchMax, //@parm signed max # of chars to search
  2049. WCHAR * pch) //@parm ptr to null-terminated string to find exactly
  2050. {
  2051. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::FindExact");
  2052. _TEST_INVARIANT_
  2053. LONG cch, cchStart;
  2054. LONG cchValid;
  2055. LONG cchText = GetTextLength();
  2056. LONG cpMatch;
  2057. LONG iDir = 1; // Default for forward search
  2058. const WCHAR *pc;
  2059. CTxtPtr tp(*this); // tp to search text with
  2060. if(!*pch)
  2061. return -1; // Signal null string not found
  2062. if(cchMax < 0) // Backward search
  2063. {
  2064. iDir = -1;
  2065. cchMax = -cchMax; // Make count positive
  2066. }
  2067. while(cchMax > 0)
  2068. {
  2069. if(iDir > 0)
  2070. {
  2071. if(tp.GetCp() >= cchText) // Can't go further
  2072. break;
  2073. pc = tp.GetPch(cchValid); // Characters we can search w/o
  2074. cch = cchValid; // encountering block end/gap,
  2075. } // i.e., stay within text chunk
  2076. else
  2077. {
  2078. if(!tp.GetCp()) // Can't back up any more
  2079. break;
  2080. tp.Move(-1);
  2081. pc = tp.GetPchReverse(cchValid);
  2082. cch = cchValid + 1;
  2083. }
  2084. cch = min(cch, cchMax);
  2085. if(!cch || !pc)
  2086. break; // No more text to search
  2087. for(cchStart = cch; // Find first char
  2088. cch && *pch != *pc; cch--) // Most execution time is spent
  2089. { // in this loop going forward or
  2090. pc += iDir; // backward. x86 rep scasb/scasw
  2091. } // are faster
  2092. cchStart -= cch;
  2093. cchMax -= cchStart; // Update cchMax
  2094. tp.Move( iDir*(cchStart)); // Update tp
  2095. if(cch && *pch == *pc) // Matched first char
  2096. { // See if matches up to null
  2097. cpMatch = tp.GetCp(); // Save cp of matched first char
  2098. cch = cchMax;
  2099. for(pc = pch; // Try to match rest of string
  2100. cch && *++pc==tp.NextChar();// Note: this match goes forward
  2101. cch--) ; // for both values of iDir
  2102. if(!cch)
  2103. break; // Not enuf chars for string
  2104. if(!*pc) // Matched null-terminated string
  2105. { // *pch. Set this tp just after
  2106. SetCp(tp.GetCp()); // matched string and return cp
  2107. return cpMatch; // at start
  2108. }
  2109. tp.SetCp(cpMatch + iDir); // Move to char just following or
  2110. } // preceding matched first char
  2111. } // Up-to-date tp: continue search
  2112. return -1; // Signal string not found
  2113. }
  2114. /*
  2115. * CTxtPtr::NextCharCount(&cch)
  2116. *
  2117. * @mfunc
  2118. * Helper function for getting next char and decrementing abs(*pcch)
  2119. *
  2120. * @rdesc
  2121. * Next char
  2122. */
  2123. WCHAR CTxtPtr::NextCharCount (
  2124. LONG& cch) //@parm count to use and decrement
  2125. {
  2126. TRACEBEGIN(TRCSUBSYSTOM, TRCSCOPEINTERN, "CTxtPtr::NextCharCount");
  2127. LONG iDelta = (cch > 0) ? 1 : -1;
  2128. if(!cch || !Move(iDelta))
  2129. return 0;
  2130. cch -= iDelta; // Count down or up
  2131. return GetChar(); // Return char at _cp
  2132. }
  2133. /*
  2134. * CTxtPtr::Zombie ()
  2135. *
  2136. * @mfunc
  2137. * Turn this object into a zombie by NULLing out its _ped member
  2138. */
  2139. void CTxtPtr::Zombie ()
  2140. {
  2141. TRACEBEGIN(TRCSUBSYSBACK, TRCSCOPEINTERN, "CTxtPtr::Zombie");
  2142. _ped = NULL;
  2143. _cp = 0;
  2144. SetToNull();
  2145. }
  2146. /*
  2147. * CTxtIStream::CTxtIStream(tp, iDir)
  2148. *
  2149. * @mfunc
  2150. * Creates from the textptr, <p tp>, a character input stream with which
  2151. * to retrieve characters starting from the cp of the <p tp> and proceeding
  2152. * in the direction indicated by <p iDir>.
  2153. */
  2154. CTxtIStream::CTxtIStream(
  2155. const CTxtPtr &tp,
  2156. int iDir
  2157. ) : CTxtPtr(tp)
  2158. {
  2159. _pfnGetChar = (iDir == DIR_FWD ?
  2160. &CTxtIStream::GetNextChar : &CTxtIStream::GetPrevChar);
  2161. _cch = 0;
  2162. _pch = NULL;
  2163. }
  2164. /*
  2165. * CTxtIStream::GetNextChar()
  2166. *
  2167. * @mfunc
  2168. * Returns the next character in the text stream.
  2169. * Ensures that at least one valid character exists in _pch and then returns
  2170. * the next character in _pch.
  2171. *
  2172. * @rdesc
  2173. * WCHAR the next character in the character input stream
  2174. * 0, if end of text stream
  2175. */
  2176. WCHAR CTxtIStream::GetNextChar()
  2177. {
  2178. if(!_cch)
  2179. FillPchFwd();
  2180. if(_cch)
  2181. {
  2182. _cch--;
  2183. return *_pch++;
  2184. }
  2185. return 0;
  2186. }
  2187. /*
  2188. * CTxtIStream::GetPrevChar()
  2189. *
  2190. * @mfunc
  2191. * Returns the next character in the text stream, where the direction of the
  2192. * stream is reverse.
  2193. * Ensures that at least one valid character exists in _pch and then returns
  2194. * the next character in _pch. Here, _pch points to the end of a string
  2195. * containing _cch valid characters.
  2196. *
  2197. * @rdesc
  2198. * WCHAR the next character in the character input stream (travelling backwards
  2199. * along the string pointed to by _pch)
  2200. * 0, if end of text stream
  2201. */
  2202. WCHAR CTxtIStream::GetPrevChar()
  2203. {
  2204. if(!_cch)
  2205. FillPchRev();
  2206. if(_cch)
  2207. {
  2208. _cch--;
  2209. return *(--_pch);
  2210. }
  2211. return 0;
  2212. }
  2213. /*
  2214. * CTxtIStream::FillPchFwd()
  2215. *
  2216. * @mfunc
  2217. * Gets the next run of characters and Moves the cp of this CTxtPtr (base
  2218. * class) just past the run.
  2219. * This ensures enough chars in _pch to facilitate the next _cch calls to
  2220. * GetNextChar().
  2221. */
  2222. void CTxtIStream::FillPchFwd()
  2223. {
  2224. _pch = GetPch(_cch);
  2225. Move(_cch);
  2226. }
  2227. /*
  2228. * CTxtIStream::FillPchRev()
  2229. *
  2230. * @mfunc
  2231. * Gets the run of characters preceding the one previously pointed to by _pch
  2232. * and moves the cp of this CTxtPtr (base class) to the beginning of the run.
  2233. * This ensures enough chars in _pch to facilitate the next _cch calls to
  2234. * GetPrevChar().
  2235. */
  2236. void CTxtIStream::FillPchRev()
  2237. {
  2238. _pch = GetPchReverse(_cch);
  2239. Move(-_cch);
  2240. }
  2241. /*
  2242. * CTxtFinder::FindText(tp, cpLimit, dwFlags, pchToFind, cchToFind, &cpFirst, &cpLast)
  2243. *
  2244. * @mfunc
  2245. * Find the text string <p pchToFind> of length <p cchToFind> starting at
  2246. * this text pointer. If found, <p cpFirst> and <p cpLast> are set to the
  2247. * cp's of the first and last characters in the matched string (wrt tp).
  2248. * If not found, return FALSE.
  2249. *
  2250. * @rdesc
  2251. * TRUE string matched. First char at tp.GetCp() + cchOffFirst.
  2252. * Last char at tp.GetCp() + cchOffLast.
  2253. * FALSE string not found.
  2254. */
  2255. BOOL CTxtPtr::CTxtFinder::FindText (
  2256. const CTxtPtr &tp,
  2257. LONG cpLimit, //@parm Limit of search or <lt> 0 for end of text
  2258. DWORD dwFlags, //@parm FR_MATCHCASE case must match <nl>
  2259. // FR_WHOLEWORD match must be a whole word
  2260. const WCHAR *pchToFind, //@parm Text to search for
  2261. LONG cchToFind, //@parm Count of chars to search for
  2262. LONG &cpFirst, //@parm If string found, returns cp (wrt tp) of first char
  2263. LONG &cpLast) //@parm If string found, returns cp (wrt tp) of last char
  2264. {
  2265. if(!cchToFind)
  2266. return FALSE;
  2267. _fSearchForward = dwFlags & FR_DOWN;
  2268. // Calculate max number of chars we must search for pchToFind
  2269. if(_fSearchForward)
  2270. {
  2271. const LONG cchText = tp.GetTextLength();
  2272. if((DWORD)cpLimit > (DWORD)cchText) // NB: catches cpLimit < 0 too
  2273. cpLimit = cchText;
  2274. _cchToSearch = cpLimit - tp.GetCp();
  2275. }
  2276. else
  2277. {
  2278. if((DWORD)cpLimit > (DWORD)tp.GetCp()) // NB: catches cpLimit < 0 too
  2279. cpLimit = 0;
  2280. _cchToSearch = tp.GetCp() - cpLimit;
  2281. }
  2282. if(cchToFind > _cchToSearch)
  2283. {
  2284. // Not enough chars in requested direction within which
  2285. // to find string
  2286. return FALSE;
  2287. }
  2288. const BOOL fWholeWord = dwFlags & FR_WHOLEWORD;
  2289. _fIgnoreCase = !(dwFlags & FR_MATCHCASE);
  2290. _fMatchAlefhamza = dwFlags & FR_MATCHALEFHAMZA;
  2291. _fMatchKashida = dwFlags & FR_MATCHKASHIDA;
  2292. _fMatchDiac = dwFlags & FR_MATCHDIAC;
  2293. typedef LONG (CTxtPtr::CTxtFinder::*PFNMATCHSTRING)(WCHAR const *pchToFind,
  2294. LONG cchToFind,
  2295. CTxtIStream &tistr);
  2296. // Setup function pointer appropriate for this type of search
  2297. CTxtEdit* ped = tp._ped;
  2298. PFNMATCHSTRING pfnMatchString;
  2299. #define MATCHARABICSPECIALS (FR_MATCHALEFHAMZA | FR_MATCHKASHIDA | FR_MATCHDIAC)
  2300. // If match all Arabic special characters exactly, then use simpler
  2301. // MatchString routine. If ignore any and BiDi text exists, use
  2302. // MatchStringBiDi.
  2303. pfnMatchString = (ped->IsBiDi() &&
  2304. (dwFlags & MATCHARABICSPECIALS) != MATCHARABICSPECIALS)
  2305. ? &CTxtFinder::MatchStringBiDi
  2306. : &CTxtFinder::MatchString;
  2307. _iDirection = _fSearchForward ? 1 : -1;
  2308. BOOL fFound = FALSE;
  2309. WCHAR chFirst = _fSearchForward ? *pchToFind : pchToFind[cchToFind - 1];
  2310. const WCHAR *pchRemaining = _fSearchForward ?
  2311. &pchToFind[1] : &pchToFind[cchToFind - 2];
  2312. LONG cchRead;
  2313. LONG cchReadToFirst = 0;
  2314. LONG cchReadToLast;
  2315. CTxtIStream tistr(tp,
  2316. _fSearchForward ? CTxtIStream::DIR_FWD : CTxtIStream::DIR_REV);
  2317. while((cchRead = FindChar(chFirst, tistr)) != -1)
  2318. {
  2319. cchReadToFirst += cchRead;
  2320. if(cchToFind == 1) // Only one char in string - we've matched it!
  2321. {
  2322. if (_iDirection > 0) // Searching forward
  2323. {
  2324. Assert(tp.GetCp() + cchReadToFirst - 1 >= 0);
  2325. cpLast = cpFirst = tp.GetCp() + cchReadToFirst - 1;
  2326. }
  2327. else // Searching backward
  2328. {
  2329. Assert(tp.GetCp() - cchReadToFirst >= 0);
  2330. cpLast = cpFirst = tp.GetCp() - cchReadToFirst;
  2331. }
  2332. fFound = TRUE;
  2333. }
  2334. else
  2335. {
  2336. // Check if this first char begins a match of string
  2337. CTxtIStream tistrT(tistr);
  2338. cchRead = (this->*pfnMatchString)(pchRemaining, cchToFind - 1, tistrT);
  2339. if(cchRead != -1)
  2340. {
  2341. cchReadToLast = cchReadToFirst + cchRead;
  2342. if (_iDirection > 0) // Searching forward
  2343. {
  2344. Assert(tp.GetCp() + cchReadToFirst - 1 >= 0);
  2345. Assert(tp.GetCp() + cchReadToLast - 1 >= 0);
  2346. cpFirst = tp.GetCp() + cchReadToFirst - 1;
  2347. cpLast = tp.GetCp() + cchReadToLast - 1;
  2348. }
  2349. else // Searching backward
  2350. {
  2351. Assert(tp.GetCp() - cchReadToFirst >= 0);
  2352. Assert(tp.GetCp() - cchReadToLast >= 0);
  2353. cpFirst = tp.GetCp() - cchReadToFirst;
  2354. cpLast = tp.GetCp() - cchReadToLast;
  2355. }
  2356. fFound = TRUE;
  2357. }
  2358. }
  2359. if(fFound)
  2360. {
  2361. Assert(cpLast < tp.GetTextLength());
  2362. if(!fWholeWord)
  2363. break;
  2364. // Check if matched string is whole word
  2365. LONG cchT;
  2366. LONG cpBefore = (_fSearchForward ? cpFirst : cpLast) - 1;
  2367. LONG cpAfter = (_fSearchForward ? cpLast : cpFirst) + 1;
  2368. if((cpBefore < 0 ||
  2369. (ped->TxWordBreakProc(const_cast<LPTSTR>(CTxtPtr(tp._ped, cpBefore).GetPch(cchT)),
  2370. 0,
  2371. sizeof(WCHAR),
  2372. WB_CLASSIFY, cpBefore) & WBF_CLASS) ||
  2373. ped->_pbrk && ped->_pbrk->CanBreakCp(BRK_WORD, cpBefore + 1))
  2374. &&
  2375. (cpAfter >= tp.GetTextLength() ||
  2376. (ped->TxWordBreakProc(const_cast<LPTSTR>(CTxtPtr(tp._ped, cpAfter).GetPch(cchT)),
  2377. 0,
  2378. sizeof(WCHAR),
  2379. WB_CLASSIFY, cpAfter) & WBF_CLASS) ||
  2380. ped->_pbrk && ped->_pbrk->CanBreakCp(BRK_WORD, cpAfter)))
  2381. {
  2382. break;
  2383. }
  2384. else
  2385. fFound = FALSE;
  2386. }
  2387. }
  2388. if(fFound && !_fSearchForward)
  2389. {
  2390. // For search backwards, first and last are juxtaposed
  2391. LONG cpTemp = cpFirst;
  2392. cpFirst = cpLast;
  2393. cpLast = cpTemp;
  2394. }
  2395. return fFound;
  2396. }
  2397. /*
  2398. * CTxtPtr::CTxtFinder::CharCompMatchCase(ch1, ch2)
  2399. *
  2400. * @func Character comparison function sensitive to case according to parms
  2401. * of current search.
  2402. *
  2403. * @rdesc TRUE iff characters are equal
  2404. */
  2405. inline BOOL CTxtPtr::CTxtFinder::CharComp(
  2406. WCHAR ch1,
  2407. WCHAR ch2) const
  2408. {
  2409. // We compare the characters ourselves if ignore case AND the character isn't a surrogate
  2410. //
  2411. return (_fIgnoreCase && !IN_RANGE(0xD800, ch1, 0xDFFF)) ? CharCompIgnoreCase(ch1, ch2) : (ch1 == ch2);
  2412. }
  2413. /*
  2414. * CTxtPtr::CTxtFinder::CharCompIgnoreCase(ch1, ch2)
  2415. *
  2416. * @func Character comparison function
  2417. *
  2418. * @rdesc TRUE iff characters are equal, ignoring case
  2419. */
  2420. inline BOOL CTxtPtr::CTxtFinder::CharCompIgnoreCase(
  2421. WCHAR ch1,
  2422. WCHAR ch2) const
  2423. {
  2424. return CompareString(LOCALE_USER_DEFAULT,
  2425. NORM_IGNORECASE | NORM_IGNOREWIDTH,
  2426. &ch1, 1, &ch2, 1) == 2;
  2427. }
  2428. /*
  2429. * CTxtPtr::CTxtFinder::FindChar(ch, tistr)
  2430. *
  2431. * @mfunc
  2432. * Steps through the characters returned from <p tistr> until a character is
  2433. * found which matches ch or until _cchToSearch characters have been examined.
  2434. * If found, the return value indicates the number of chars read from <p tistr>.
  2435. * If not found, -1 is returned.
  2436. *
  2437. * @rdesc
  2438. * -1, if char not found
  2439. * n, if char found. n indicates number of chars read from <p tistr>
  2440. * to find the char
  2441. */
  2442. LONG CTxtPtr::CTxtFinder::FindChar(
  2443. WCHAR ch,
  2444. CTxtIStream &tistr)
  2445. {
  2446. LONG cchSave = _cchToSearch;
  2447. while(_cchToSearch)
  2448. {
  2449. _cchToSearch--;
  2450. WCHAR chComp = tistr.GetChar();
  2451. if(CharComp(ch, chComp) ||
  2452. (!_fMatchAlefhamza && IsAlef(ch) && IsAlef(chComp)))
  2453. {
  2454. return cchSave - _cchToSearch;
  2455. }
  2456. }
  2457. return -1;
  2458. }
  2459. /*
  2460. * CTxtPtr::CTxtFinder::MatchString(pchToFind, cchToFind, tistr)
  2461. *
  2462. * @mfunc
  2463. * This method compares the characters returned from <p tistr> against those
  2464. * found in pchToFind. If the string is found, the return value indicates
  2465. * how many characters were read from <p tistr> to match the string.
  2466. * If the string is not found, -1 is returned.
  2467. *
  2468. * @rdesc
  2469. * -1, if string not found
  2470. * n, if string found. n indicates number of chars read from <p tistr>
  2471. * to find string
  2472. */
  2473. LONG CTxtPtr::CTxtFinder::MatchString(
  2474. const WCHAR *pchToFind,
  2475. LONG cchToFind,
  2476. CTxtIStream &tistr)
  2477. {
  2478. if((DWORD)_cchToSearch < (DWORD)cchToFind)
  2479. return -1;
  2480. LONG cchT = cchToFind;
  2481. while(cchT--)
  2482. {
  2483. if(!CharComp(*pchToFind, tistr.GetChar()))
  2484. return -1;
  2485. pchToFind += _iDirection;
  2486. }
  2487. return cchToFind;
  2488. }
  2489. /*
  2490. * CTxtPtr::CTxtFinder::MatchStringBiDi(pchToFind, cchToFind, tistr)
  2491. *
  2492. * @mfunc
  2493. * This method compares the characters returned from <p tistr> against those
  2494. * found in pchToFind. If the string is found, the return value indicates
  2495. * how many characters were read from <p tistr> to match the string.
  2496. * If the string is not found, -1 is returned.
  2497. * Kashida, diacritics and Alefs are matched/not matched according
  2498. * to the type of search requested.
  2499. *
  2500. * @rdesc
  2501. * -1, if string not found
  2502. * n, if string found. n indicates number of chars read from <p tistr>
  2503. * to find string
  2504. */
  2505. LONG CTxtPtr::CTxtFinder::MatchStringBiDi(
  2506. const WCHAR *pchToFind,
  2507. LONG cchToFind,
  2508. CTxtIStream &tistr)
  2509. {
  2510. if((DWORD)_cchToSearch < (DWORD)cchToFind)
  2511. return -1;
  2512. LONG cchRead = 0;
  2513. while(cchToFind)
  2514. {
  2515. WCHAR chComp = tistr.GetChar();
  2516. cchRead++;
  2517. if(!CharComp(*pchToFind, chComp))
  2518. {
  2519. if (!_fMatchKashida && chComp == KASHIDA ||
  2520. !_fMatchDiac && IsBiDiDiacritic(chComp))
  2521. {
  2522. continue;
  2523. }
  2524. if (!_fMatchAlefhamza &&
  2525. IsAlef(*pchToFind) && IsAlef(chComp))
  2526. {
  2527. // Skip *pchToFind
  2528. }
  2529. else
  2530. return -1;
  2531. }
  2532. pchToFind += _iDirection;
  2533. cchToFind--;
  2534. }
  2535. return cchRead;
  2536. }