Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

152 lines
4.1 KiB

  1. /*
  2. SVMHANDLER.H
  3. (c) copyright 1998 Microsoft Corp
  4. Contains the class encapsulating the Support Vector Machine used to do on the fly spam detection
  5. Robert Rounthwaite (RobertRo@microsoft.com)
  6. */
  7. #pragma once
  8. #ifndef REAL
  9. typedef double REAL;
  10. #endif
  11. #define SAFE_FREE( p ) if (p!=NULL) free(p);
  12. enum boolop
  13. {
  14. boolopOr,
  15. boolopAnd
  16. };
  17. #include "svmutil.h"
  18. class MAILFILTER
  19. {
  20. /*
  21. The public interface to the MAILFILTER class is below. Normal use of this class to filter mail
  22. will entail:
  23. Calling the following once: FSetSVMDataLocation() and SetSpamCutoff()
  24. Setting the "Properties of the user"
  25. ...and, for each message you filter
  26. - Calling BCalculateSpamProb()
  27. */
  28. public:
  29. // Sets the location of the SVM Data file(.LKO file). Must be called before calling any other methods
  30. // Data file must be present at time function is called
  31. // returns true if successful, false otherwise
  32. bool FSetSVMDataLocation(char *szFullPath);
  33. // Sets the Spam cutoff percentage. Must be in range from 0 to 100
  34. bool SetSpamCutoff(REAL rCutoff);
  35. // returns value set with SetSpamCutoff. Defaults == DefaultSpamCutoff
  36. // if no value has been set when SVM output file is read
  37. REAL GetSpamCutoff();
  38. // returns default value for SpamCutoff. read from SVM output file.
  39. // should call FSetSVMDataLocation before calling this function
  40. REAL GetDefaultSpamCutoff();
  41. // Properties of the user
  42. void SetFirstName(char *szFirstName);
  43. void SetLastName(char *szLastName);
  44. void SetCompanyName(char *szCompanyName);
  45. // Calculates the probability that the current message (defined by the properties of the message) is spam.
  46. // !Note! that the IN string params may be modified by the function.
  47. // Returns the probability (0 to 1) that the message is spam in prSpamProb
  48. // the boolean return is determined by comparing to the spam cutoff
  49. // if the value of a boolean param is unknown use false, use 0 for unknown time.
  50. bool BCalculateSpamProb(/* IN params */
  51. char *szFrom,
  52. char *szTo,
  53. char *szSubject,
  54. char *szBody,
  55. bool bDirectMessage,
  56. bool bHasAttach,
  57. FILETIME tMessageSent,
  58. /* OUT params */
  59. REAL *prSpamProb,
  60. bool * pbIsSpam);
  61. MAILFILTER();
  62. ~MAILFILTER();
  63. // Reads the default spam cutoff without parsing entire file
  64. // Use GetDefaultSpamCutoff if using FSetSVMDataLocation;
  65. static bool BReadDefaultSpamCutoff(char *szFullPath, REAL *prDefCutoff);
  66. private: // members
  67. struct FeatureComponent
  68. {
  69. FeatureLocation loc;
  70. union
  71. {
  72. char *szFeature;
  73. UINT iRuleNum; // used with locSpecial
  74. };
  75. // map feature to location in dst file/location in SVM output
  76. // more than one feature component may map to the same location, combined with the op
  77. int iFeature;
  78. boolop bop; // first feature in group is alway bopOr
  79. bool fPresent;
  80. FeatureComponent() { loc = locNil; }
  81. ~FeatureComponent()
  82. {
  83. if ((loc>locNil) && (loc < locSpecial))
  84. {
  85. free(szFeature);
  86. }
  87. }
  88. };
  89. FeatureComponent *rgfeaturecomps;
  90. // weights from SVM output
  91. REAL *rgrSVMWeights;
  92. // Other SVM file variables
  93. REAL _rCC;
  94. REAL _rDD;
  95. REAL _rThresh;
  96. REAL _rDefaultThresh;
  97. // Counts
  98. UINT _cFeatures;
  99. UINT _cFeatureComps;
  100. // is Feature present? -1 indicates not yet set, 0 indicates not present, 1 indicates present
  101. int *_rgiFeatureStatus;
  102. // Properties of the user
  103. char *_szFirstName;
  104. char *_szLastName;
  105. char *_szCompanyName;
  106. // Set via FSetSVMDataLocation() and SetSpamCutoff()
  107. CString _strFName;
  108. REAL _rSpamCutoff;
  109. // Properties of the message
  110. char *_szFrom;
  111. char *_szTo;
  112. char *_szSubject;
  113. char *_szBody;
  114. bool _bDirectMessage;
  115. FILETIME _tMessageSent;
  116. bool _bHasAttach;
  117. // Cached special rule results used during spam calculations
  118. bool _bRule14;
  119. bool _bRule17;
  120. private: // methods
  121. bool ReadSVMOutput(LPCTSTR lpszFileName);
  122. void EvaluateFeatureComponents();
  123. void ProcessFeatureComponentPresence();
  124. REAL RDoSVMCalc();
  125. bool FInvokeSpecialRule(UINT iRuleNum);
  126. void HandleCaseSensitiveSpecialRules();
  127. };