Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

780 lines
19 KiB

  1. /*
  2. * @doc INTERNAL
  3. *
  4. * @module URLSUP.CPP URL detection support |
  5. *
  6. * Author: alexgo 4/3/96
  7. *
  8. * Copyright (c) 1995-1997, Microsoft Corporation. All rights reserved.
  9. */
  10. #include "_common.h"
  11. #include "_edit.h"
  12. #include "_urlsup.h"
  13. #include "_m_undo.h"
  14. #include "_select.h"
  15. #include "_clasfyc.h"
  16. ASSERTDATA
  17. // Arrays for URL detection. The first array is the protocols
  18. // we support, followed by the "size" of the array.
  19. // NB!! Do _not_ modify these arrays without first making sure
  20. // that the code in ::IsURL is updated appropriately.
  21. /*
  22. FUTURE (keithcu)
  23. We should generalize our support to recognize URLs of the following type:
  24. Maybe we should do autocorrect so that:
  25. keithcu@microsoft.com converts to mailto:keithcu@microsoft.com
  26. Should we put this code in PutChar rather than here?
  27. What about URLs of the form "seattle.sidewalk.com"? Word doesn't support this yet.
  28. It is hard because do you look for the .com? What happens when .com, .edu, .gov,
  29. etc. aren't the only suffixes anymore?
  30. What about the interaction with notifications?
  31. We should add support for purple text. CFE_LINKVISITED
  32. */
  33. //Includes both types of URLs
  34. const int MAXURLHDRSIZE = 9;
  35. //Most of these can just be passed right to the client--but some need a prefix.
  36. //Can we automatically add that tag when it needs it?
  37. const LPCWSTR rgszURL[] = {
  38. L"http:",
  39. L"file:",
  40. L"mailto:",
  41. L"ftp:",
  42. L"https:",
  43. L"gopher:",
  44. L"nntp:",
  45. L"prospero:",
  46. L"telnet:",
  47. L"news:",
  48. L"wais:",
  49. L"outlook:"
  50. };
  51. const char rgcchURL[] = {
  52. 5,
  53. 5,
  54. 7,
  55. 4,
  56. 6,
  57. 7,
  58. 5,
  59. 9,
  60. 7,
  61. 5,
  62. 5,
  63. 8
  64. };
  65. #define NUMURLHDR sizeof(rgcchURL)
  66. //
  67. //The XXX. URLs
  68. //
  69. const LPCWSTR rgszDOTURL[] = {
  70. L"www.",
  71. L"ftp.",
  72. };
  73. const char rgcchDOTURL[] = {
  74. 4,
  75. 4,
  76. };
  77. #define NUMDOTURLHDR sizeof(rgcchDOTURL)
  78. inline BOOL IsURLWhiteSpace(WCHAR ch)
  79. {
  80. if (IsWhiteSpace(ch))
  81. return TRUE;
  82. // See RAID 6304. MSKK doesn't want CJK in URLs. We do what we did in 2.0
  83. if ( ch >= 0x03000 && !IsKorean(ch) )
  84. return TRUE;
  85. INT iset = GetKinsokuClass(ch);
  86. return iset == 10 || (iset == 14 && ch != WCH_EMBEDDING);
  87. }
  88. /*
  89. * CDetectURL::CDetectURL (ped)
  90. *
  91. * @mfunc constructor; registers this class in the notification manager.
  92. *
  93. * @rdesc void
  94. */
  95. CDetectURL::CDetectURL(
  96. CTxtEdit *ped) //@parm edit context to use
  97. {
  98. CNotifyMgr *pnm = ped->GetNotifyMgr();
  99. if(pnm)
  100. pnm->Add((ITxNotify *)this);
  101. _ped = ped;
  102. }
  103. /*
  104. * CDetectURL::~CDetectURL
  105. *
  106. * @mfunc destructor; removes ths class from the notification manager
  107. */
  108. CDetectURL::~CDetectURL()
  109. {
  110. CNotifyMgr *pnm = _ped->GetNotifyMgr();
  111. if(pnm)
  112. pnm->Remove((ITxNotify *)this);
  113. }
  114. //
  115. // ITxNotify methods
  116. //
  117. /*
  118. * CDetectURL::OnPreRelaceRange(cp, cchDel, cchNew, cpFormatMin, cpFormatMax)
  119. *
  120. * @mfunc called before a change is made
  121. */
  122. void CDetectURL::OnPreReplaceRange(
  123. LONG cp, //@parm start of changes
  124. LONG cchDel, //@parm #of chars deleted
  125. LONG cchNew, //@parm #of chars added
  126. LONG cpFormatMin, //@parm min cp of formatting change
  127. LONG cpFormatMax) //@parm max cp of formatting change
  128. {
  129. ; // don't need to do anything here
  130. }
  131. /*
  132. * CDetectURL::OnPostReplaceRange(cp, cchDel, cchNew, cpFormatMin, cpFormatMax)
  133. *
  134. * @mfunc called after a change has been made to the backing store. We
  135. * simply need to accumulate all such changes
  136. */
  137. void CDetectURL::OnPostReplaceRange(
  138. LONG cp, //@parm start of changes
  139. LONG cchDel, //@parm #of chars deleted
  140. LONG cchNew, //@parm #of chars added
  141. LONG cpFormatMin, //@parm min cp of formatting change
  142. LONG cpFormatMax) //@parm max cp of formatting change
  143. {
  144. // We don't need to worry about format changes; just data changes
  145. // to the backing store
  146. if(cp != CP_INFINITE)
  147. {
  148. Assert(cp != CONVERT_TO_PLAIN);
  149. _adc.UpdateRecalcRegion(cp, cchDel, cchNew);
  150. }
  151. }
  152. /*
  153. * CDetectURL::Zombie ()
  154. *
  155. * @mfunc
  156. * Turn this object into a zombie
  157. */
  158. void CDetectURL::Zombie ()
  159. {
  160. }
  161. /*
  162. * CDetectURL::ScanAndUpdate(publdr)
  163. *
  164. * @mfunc scans the affect text, detecting new URL's and removing old ones.
  165. *
  166. * @comm The algorithm we use is straightforward: <nl>
  167. *
  168. * 1. find the update region and expand out to whitespace in either
  169. * direction. <nl>
  170. *
  171. * 2. Scan through region word by word (where word is contiguous
  172. * non-whitespace).
  173. *
  174. * 3. Strip these words off punctuation marks. This may be a bit
  175. * tricky as some of the punctuation may be part of the URL itself.
  176. * We assume that generally it's not, and if it is, one has to enclose
  177. * the URL in quotes, brackets or such. We stop stripping the
  178. * punctuation off the end as soon as we find the matching bracket.
  179. *
  180. * 4. If it's a URL, enable the effects, if it's
  181. * incorrectly labelled as a URL, disabled the effects.
  182. *
  183. * Note that this algorithm will only remove
  184. */
  185. void CDetectURL::ScanAndUpdate(
  186. IUndoBuilder *publdr) //@parm undo context to use
  187. {
  188. LONG cpStart, cpEnd, cp;
  189. CTxtSelection *psel = _ped->GetSel();
  190. CTxtRange rg(*psel);
  191. BOOL fCleanedThisURL;
  192. BOOL fCleanedSomeURL = FALSE;
  193. // Clear away some unnecessary features of the range that will
  194. // just slow us down.
  195. rg.SetIgnoreFormatUpdate(TRUE);
  196. rg._rpPF.SetToNull();
  197. if(!GetScanRegion(cpStart, cpEnd))
  198. return;
  199. rg.Set(cpStart, 0);
  200. while((cp = rg.GetCp()) < cpEnd)
  201. {
  202. Assert(rg.GetCch() == 0);
  203. LONG cchAdvance;
  204. ExpandToURL(rg, cchAdvance);
  205. if(rg.GetCch() == 0)
  206. break;
  207. if(IsURL(rg))
  208. {
  209. SetURLEffects(rg, publdr);
  210. LONG cpNew = rg.GetCp() - rg.GetCch();
  211. // Anything before detected URL did not really belong to it
  212. if (rg.GetCp() > cp)
  213. {
  214. rg.Set(cp, cp - rg.GetCp());
  215. CheckAndCleanBogusURL(rg, fCleanedThisURL, publdr);
  216. fCleanedSomeURL |= fCleanedThisURL;
  217. }
  218. // Collapse to end of URL range so that ExpandToURL will
  219. // find next candidate.
  220. rg.Set(cpNew, 0);
  221. // skip to the end of word; this can't be another URL!
  222. cp = cpNew;
  223. cchAdvance = -MoveByDelimiters(rg._rpTX, 1, URL_STOPATWHITESPACE, 0);
  224. }
  225. if(cchAdvance)
  226. {
  227. rg.Set(cp, cchAdvance);
  228. CheckAndCleanBogusURL(rg, fCleanedThisURL, publdr);
  229. fCleanedSomeURL |= fCleanedThisURL;
  230. // Collapse to end of scanned range so that ExpandToURL will
  231. // find next candidate.
  232. rg.Set(cp - cchAdvance, 0);
  233. }
  234. }
  235. // If we cleaned some URL, we might need to reset the default format
  236. if(fCleanedSomeURL && !psel->GetCch())
  237. psel->Update_iFormat(-1);
  238. }
  239. //
  240. // PRIVATE methods
  241. //
  242. /*
  243. * CDetectURL::GetScanRegion (&rcpStart, &rcpEnd)
  244. *
  245. * @mfunc Gets the region of text to scan for new URLs by expanding the
  246. * changed region to be bounded by whitespace
  247. *
  248. * @rdesc BOOL
  249. */
  250. BOOL CDetectURL::GetScanRegion(
  251. LONG& rcpStart, //@parm where to put start of range
  252. LONG& rcpEnd) //@parm where to put end of range
  253. {
  254. LONG cp, cch;
  255. LONG cchExpand;
  256. WCHAR chBracket;
  257. CRchTxtPtr rtp(_ped, 0);
  258. _adc.GetUpdateRegion(&cp, NULL, &cch);
  259. if(cp == CP_INFINITE)
  260. return FALSE;
  261. // First find start of region
  262. rtp.SetCp(cp);
  263. rcpStart = cp;
  264. rcpEnd = cp + cch;
  265. // Now let's see if we need to expand to the nearest quotation mark
  266. // we do if we have quotes in our region or we have the LINK bit set
  267. // on either side of the region that we might need or not need to clear
  268. BOOL fExpandToBrackets = (rcpEnd - rcpStart ?
  269. GetAngleBracket(rtp._rpTX, rcpEnd - rcpStart) : 0);
  270. BOOL fKeepGoing = TRUE;
  271. while(fKeepGoing)
  272. {
  273. fKeepGoing = FALSE;
  274. // Expand left to the entire word
  275. rtp.SetCp(rcpStart);
  276. rcpStart += MoveByDelimiters(rtp._rpTX, -1, URL_STOPATWHITESPACE, 0);
  277. // Now the other end
  278. rtp.SetCp(rcpEnd);
  279. rcpEnd += MoveByDelimiters(rtp._rpTX, 1, URL_STOPATWHITESPACE, 0);
  280. // If we have LINK formatting around, we'll need to expand to nearest quotes
  281. rtp.SetCp(rcpStart);
  282. rtp._rpCF.AdjustBackward();
  283. fExpandToBrackets = fExpandToBrackets ||
  284. (_ped->GetCharFormat(rtp._rpCF.GetFormat())->_dwEffects & CFE_LINK);
  285. rtp.SetCp(rcpEnd);
  286. rtp._rpCF.AdjustForward();
  287. fExpandToBrackets = fExpandToBrackets ||
  288. (_ped->GetCharFormat(rtp._rpCF.GetFormat())->_dwEffects & CFE_LINK);
  289. if (fExpandToBrackets)
  290. // We have to expand to nearest angle brackets in both directions
  291. {
  292. rtp.SetCp(rcpStart);
  293. chBracket = LEFTANGLEBRACKET;
  294. cchExpand = MoveByDelimiters(rtp._rpTX, -1, URL_STOPATCHAR, &chBracket);
  295. // Did we really hit a bracket?
  296. if(chBracket == LEFTANGLEBRACKET)
  297. {
  298. rcpStart += cchExpand;
  299. fKeepGoing = TRUE;
  300. }
  301. // Same thing, different direction
  302. rtp.SetCp(rcpEnd);
  303. chBracket = RIGHTANGLEBRACKET;
  304. cchExpand = MoveByDelimiters(rtp._rpTX, 1, URL_STOPATCHAR, &chBracket);
  305. if(chBracket == RIGHTANGLEBRACKET)
  306. {
  307. rcpEnd += cchExpand;
  308. fKeepGoing = TRUE;
  309. }
  310. fExpandToBrackets = FALSE;
  311. }
  312. }
  313. LONG cchAdj = _ped->GetAdjustedTextLength();
  314. if(rcpEnd > cchAdj)
  315. rcpEnd = cchAdj;
  316. return TRUE;
  317. }
  318. /*
  319. * CDetectURL::ExpandToURL(&rg, &cchAdvance)
  320. *
  321. * @mfunc skips white space and sets the range to the very next
  322. * block of non-white space text. Strips this block off
  323. * punctuation marks
  324. */
  325. void CDetectURL::ExpandToURL(
  326. CTxtRange& rg, //@parm range to move
  327. LONG &cchAdvance//@parm how much to advance to the next URL from the current cp
  328. )
  329. {
  330. LONG cp;
  331. LONG cch;
  332. Assert(rg.GetCch() == 0);
  333. cp = rg.GetCp();
  334. // Skip white space first, record the advance
  335. cp -= (cchAdvance = -MoveByDelimiters(rg._rpTX, 1,
  336. URL_EATWHITESPACE|URL_STOPATNONWHITESPACE, 0));
  337. rg.Set(cp, 0);
  338. // Strip off punctuation marks
  339. WCHAR chStopChar = URL_INVALID_DELIMITER;
  340. // Skip all punctuation from the beginning of the word
  341. LONG cchHead = MoveByDelimiters(rg._rpTX, 1,
  342. URL_STOPATWHITESPACE|URL_STOPATNONPUNCT,
  343. &chStopChar);
  344. // Now skip up to white space (i.e. expand to the end of the word).
  345. cch = MoveByDelimiters(rg._rpTX, 1, URL_STOPATWHITESPACE|URL_EATNONWHITESPACE, 0);
  346. // This is how much we want to advance to start loking for the next URL
  347. // if this does not turn out to be one: one word
  348. // We increment/decrement the advance so we can accumulate changes in there
  349. cchAdvance -= cch;
  350. WCHAR chLeftDelimiter = chStopChar;
  351. // Check if anything left; if not, it's not interesting -- just return
  352. Assert(cchHead <= cch);
  353. if(cch == cchHead)
  354. {
  355. rg.Set(cp, -cch);
  356. return;
  357. }
  358. // Set to the end of range
  359. rg.Set(cp + cch, 0);
  360. // Get the space after so we always clear white space between words
  361. // cchAdvance -= MoveByDelimiters(rg._rpTX, 1,
  362. // URL_EATWHITESPACE|URL_STOPATNONWHITESPACE, 0);
  363. // and go back while skipping punctuation marks and not finding a match
  364. // to the left-side encloser
  365. chStopChar = BraceMatch(chStopChar);
  366. LONG cchTail = MoveByDelimiters(rg._rpTX, -1,
  367. URL_STOPATWHITESPACE|URL_STOPATNONPUNCT|URL_STOPATCHAR,
  368. &chStopChar);
  369. // Something should be left of the word, assert that
  370. Assert(cch - cchHead + cchTail > 0);
  371. if(chLeftDelimiter == LEFTANGLEBRACKET)
  372. {
  373. //If we stopped at a quote: go forward looking for the enclosing
  374. //quote, even if there are spaces.
  375. // move to the beginning
  376. rg.Set(cp + cchHead, 0);
  377. chStopChar = RIGHTANGLEBRACKET;
  378. if(GetAngleBracket(rg._rpTX) < 0) // closing bracket
  379. {
  380. LONG cchExtend = MoveByDelimiters(rg._rpTX, 1, URL_STOPATCHAR, &chStopChar);
  381. Assert(cchExtend <= URL_MAX_SIZE);
  382. // did we really get the closing bracket?
  383. if(chStopChar == RIGHTANGLEBRACKET)
  384. {
  385. rg.Set(cp + cchHead, -(cchExtend - 1));
  386. return;
  387. }
  388. }
  389. // Otherwise the quotes did not work out; fall through to
  390. // the general case
  391. }
  392. rg.Set(cp + cchHead, -(cch - cchHead + cchTail));
  393. return;
  394. }
  395. /*
  396. * CDetectURL::IsURL(&rg)
  397. *
  398. * @mfunc if the range is over a URL, return TRUE. We assume
  399. * that the range has been preset to cover a block of non-white
  400. * space text.
  401. *
  402. *
  403. * @rdesc TRUE/FALSE
  404. */
  405. BOOL CDetectURL::IsURL(
  406. CTxtRange& rg) //@parm Range of text to check
  407. {
  408. LONG i, j;
  409. TCHAR szBuf[MAXURLHDRSIZE + 1];
  410. LONG cch, rgcch;
  411. // make sure the active end is cpMin
  412. Assert(rg.GetCch() < 0);
  413. cch = rg._rpTX.GetText(MAXURLHDRSIZE, szBuf);
  414. szBuf[cch] = L'\0';
  415. rgcch = -rg.GetCch();
  416. //First, see if the word contains '\\' because that is a UNC
  417. //convention and its cheap to check.
  418. if (szBuf[0] == L'\\' && szBuf[1] == L'\\' && rgcch > 2)
  419. return TRUE;
  420. // Scan the buffer to see if we have one of ':.' since
  421. // all URLs must contain that. wcsnicmp is a fairly expensive
  422. // call to be making frequently.
  423. for(i = 0; i < cch; i++)
  424. {
  425. switch (szBuf[i])
  426. {
  427. default:
  428. break;
  429. case '.':
  430. for(j = 0; j < NUMDOTURLHDR; j++)
  431. {
  432. // The strings must match _and_ we must have at least
  433. // one more character
  434. if(W32->wcsnicmp(szBuf, rgszDOTURL[j], rgcchDOTURL[j]) == 0)
  435. return rgcch > rgcchDOTURL[j];
  436. }
  437. return FALSE;
  438. case ':':
  439. for(j = 0; j < NUMURLHDR; j++)
  440. {
  441. if(W32->wcsnicmp(szBuf, rgszURL[j], rgcchURL[j]) == 0)
  442. return rgcch > rgcchURL[j];
  443. }
  444. return FALSE;
  445. }
  446. }
  447. return FALSE;
  448. }
  449. /*
  450. * CDetectURL::SetURLEffects
  451. *
  452. * @mfunc sets URL effects for the given range.
  453. *
  454. * @comm The URL effects currently are blue text, underline, with
  455. * CFE_LINK.
  456. */
  457. void CDetectURL::SetURLEffects(
  458. CTxtRange& rg, //@parm Range on which to set the effects
  459. IUndoBuilder *publdr) //@parm Undo context to use
  460. {
  461. CCharFormat CF;
  462. CF._dwEffects = CFE_LINK;
  463. // NB! The undo system should have already figured out what should
  464. // happen with the selection by now. We just want to modify the
  465. // formatting and not worry where the selection should go on undo/redo.
  466. rg.SetCharFormat(&CF, SCF_IGNORESELAE, publdr, CFM_LINK, CFM2_CHARFORMAT);
  467. }
  468. /*
  469. * CDetectURL::CheckAndCleanBogusURL(rg, fDidClean, publdr)
  470. *
  471. * @mfunc checks the given range to see if it has CFE_LINK set,
  472. * and if so, removes is. We assume that the range is already
  473. * _not_ a well-formed URL string.
  474. */
  475. void CDetectURL::CheckAndCleanBogusURL(
  476. CTxtRange& rg, //@parm range to use
  477. BOOL &fDidClean, //@parm return TRUE if we actually did some cleaning
  478. IUndoBuilder *publdr) //@parm undo context to use
  479. {
  480. LONG cch = -rg.GetCch();
  481. Assert(cch > 0);
  482. CCharFormat CF;
  483. CFormatRunPtr rp(rg._rpCF);
  484. fDidClean = FALSE;
  485. // If there are no format runs, nothing to do
  486. if(!rp.IsValid())
  487. return;
  488. rp.AdjustForward();
  489. // Run through the format runs in this range; if there is no
  490. // link bit set, then just return.
  491. while(cch > 0)
  492. {
  493. if(_ped->GetCharFormat(rp.GetFormat())->_dwEffects & CFE_LINK)
  494. break;
  495. cch -= rp.GetCchLeft();
  496. rp.NextRun();
  497. }
  498. // If there is no link bit set on any part of the range, just return
  499. if(cch <= 0)
  500. return;
  501. // Uh-oh, it's a bogus link. Turn off the link bit.
  502. fDidClean = TRUE;
  503. CF._dwEffects = 0;
  504. // NB! The undo system should have already figured out what should
  505. // happen with the selection by now. We just want to modify the
  506. // formatting and not worry where the selection should go on undo/redo.
  507. rg.SetCharFormat(&CF, SCF_IGNORESELAE, publdr, CFM_LINK, CFM2_CHARFORMAT);
  508. }
  509. /*
  510. * CDetectURL::MoveByDelimiters(&tpRef, iDir, grfDelimeters, pchStopChar)
  511. *
  512. * @mfunc returns the signed number of characters until the next delimiter
  513. * character in the given direction.
  514. *
  515. * @rdesc signed number of characters until next delimite
  516. */
  517. LONG CDetectURL::MoveByDelimiters(
  518. const CTxtPtr& tpRef, //@parm cp/tp to start looking from
  519. LONG iDir, //@parm Direction to look, must be 1 or -1
  520. DWORD grfDelimiters, //@parm Eat or stop at different types of
  521. // characters. Use one of URL_EATWHITESPACE,
  522. // URL_EATNONWHITESPACE, URL_STOPATWHITESPACE
  523. // URL_STOPATNONWHITESPACE, URL_STOPATPUNCT,
  524. // URL_STOPATNONPUNCT ot URL_STOPATCHAR
  525. WCHAR *pchStopChar) // @parm Out: delimiter we stopped at
  526. // In: additional char that stops us
  527. // when URL_STOPATCHAR is specified
  528. {
  529. LONG cch = 0;
  530. LONG cchMax = (grfDelimiters & URL_EATWHITESPACE) // Use huge # if
  531. ? tomForward : URL_MAX_SIZE; // eating whitesp
  532. LONG cchvalid = 0;
  533. WCHAR chScanned = URL_INVALID_DELIMITER;
  534. LONG i;
  535. const WCHAR *pch;
  536. CTxtPtr tp(tpRef);
  537. // Determine the scan mode: do we stop at white space, at punctuation,
  538. // at a stop character?
  539. BOOL fWhiteSpace = (0 != (grfDelimiters & URL_STOPATWHITESPACE));
  540. BOOL fNonWhiteSpace = (0 != (grfDelimiters & URL_STOPATNONWHITESPACE));
  541. BOOL fPunct = (0 != (grfDelimiters & URL_STOPATPUNCT));
  542. BOOL fNonPunct = (0 != (grfDelimiters & URL_STOPATNONPUNCT));
  543. BOOL fStopChar = (0 != (grfDelimiters & URL_STOPATCHAR));
  544. Assert(iDir == 1 || iDir == -1);
  545. Assert(fWhiteSpace || fNonWhiteSpace || (!fPunct && !fNonPunct));
  546. Assert(!fStopChar || NULL != pchStopChar);
  547. // Break anyway if we scanned more than URL_MAX_SIZE chars
  548. for (LONG cchScanned = 0; cchScanned < cchMax;)
  549. {
  550. // Get the text
  551. if(iDir == 1)
  552. {
  553. i = 0;
  554. pch = tp.GetPch(cchvalid);
  555. }
  556. else
  557. {
  558. i = -1;
  559. pch = tp.GetPchReverse(cchvalid);
  560. // This is a bit odd, but basically compensates for the
  561. // backwards loop running one-off from the forwards loop
  562. cchvalid++;
  563. }
  564. if(!pch)
  565. goto exit;
  566. // Loop until we hit a character within criteria. Note that for
  567. // our purposes, the embedding character counts as whitespace.
  568. while(abs(i) < cchvalid && cchScanned < cchMax
  569. && (IsURLWhiteSpace(pch[i]) ? !fWhiteSpace : !fNonWhiteSpace)
  570. && (IsURLDelimiter(pch[i]) ? !fPunct : !fNonPunct)
  571. && !(fStopChar && (*pchStopChar == chScanned) && (chScanned != URL_INVALID_DELIMITER))
  572. && ((chScanned != CR && chScanned != LF) || fNonWhiteSpace))
  573. {
  574. chScanned = pch[i];
  575. i += iDir;
  576. ++cchScanned;
  577. }
  578. // If we're going backwards, i will be off by one; adjust
  579. if(iDir == -1)
  580. {
  581. Assert(i < 0 && cchvalid > 0);
  582. i++;
  583. cchvalid--;
  584. }
  585. cch += i;
  586. if(abs(i) < cchvalid)
  587. break;
  588. tp.AdvanceCp(i);
  589. }
  590. exit:
  591. // Stop char parameter is present, fill it in
  592. // with the last character scanned and accepted
  593. if (pchStopChar)
  594. *pchStopChar = chScanned;
  595. return cch;
  596. }
  597. /*
  598. * CDetectURL::BraceMatch (chEnclosing)
  599. *
  600. * @mfunc returns the matching bracket to the one passed in.
  601. * if the symbol passed in is not a bracket it returns
  602. * URL_INVALID_DELIMITER
  603. *
  604. * @rdesc returns bracket that matches chEnclosing
  605. */
  606. WCHAR CDetectURL::BraceMatch(
  607. WCHAR chEnclosing)
  608. {
  609. // We're matching "standard" roman braces only. Thus only them may be used
  610. // to enclose URLs. This should be fine (after all, only Latin letters are allowed
  611. // inside URLs, right?).
  612. // I hope that the compiler converts this into some efficient code
  613. switch(chEnclosing)
  614. {
  615. case(TEXT('\"')):
  616. case(TEXT('\'')): return chEnclosing;
  617. case(TEXT('(')): return TEXT(')');
  618. case(TEXT('<')): return TEXT('>');
  619. case(TEXT('[')): return TEXT(']');
  620. case(TEXT('{')): return TEXT('}');
  621. default: return URL_INVALID_DELIMITER;
  622. }
  623. }
  624. /*
  625. * CDetectURL::GetAngleBracket (&tpRef, cchMax)
  626. *
  627. * @mfunc Goes forward as long as the current paragraph
  628. * or URL_SCOPE_MAX not finding quotation marks and counts
  629. * those quotation marks
  630. * returns their parity
  631. *
  632. * @rdesc LONG
  633. */
  634. LONG CDetectURL::GetAngleBracket(
  635. CTxtPtr &tpRef,
  636. LONG cchMax)
  637. {
  638. CTxtPtr tp(tpRef);
  639. LONG cchvalid = 0;
  640. const WCHAR *pch;
  641. Assert (cchMax >= 0);
  642. if(!cchMax)
  643. cchMax = URL_MAX_SCOPE;
  644. // Break anyway if we scanned more than cchLimit chars
  645. for (LONG cchScanned = 0; cchScanned < cchMax; NULL)
  646. {
  647. pch = tp.GetPch(cchvalid);
  648. if(!cchvalid)
  649. return 0;
  650. for (LONG i = 0; (i < cchvalid); ++i)
  651. {
  652. if(pch[i] == CR || pch[i] == LF || cchScanned >= cchMax)
  653. return 0;
  654. if(pch[i] == LEFTANGLEBRACKET)
  655. return 1;
  656. if(pch[i] == RIGHTANGLEBRACKET)
  657. return -1;
  658. ++cchScanned;
  659. }
  660. tp.AdvanceCp(i);
  661. }
  662. return 0;
  663. }