Leaked source code of windows server 2003
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
/*
SVMHANDLER.H (c) copyright 1998 Microsoft Corp
Contains the class encapsulating the Support Vector Machine used to do on the fly spam detection
Robert Rounthwaite (RobertRo@microsoft.com)
*/
#pragma once
#ifndef REAL
typedef double REAL; #endif
#define SAFE_FREE( p ) if (p!=NULL) free(p);
enum boolop { boolopOr, boolopAnd };
#include "svmutil.h"
class MAILFILTER { /*
The public interface to the MAILFILTER class is below. Normal use of this class to filter mail will entail: Calling the following once: FSetSVMDataLocation() and SetSpamCutoff() Setting the "Properties of the user" ...and, for each message you filter - Calling BCalculateSpamProb() */ public: // Sets the location of the SVM Data file(.LKO file). Must be called before calling any other methods
// Data file must be present at time function is called
// returns true if successful, false otherwise
bool FSetSVMDataLocation(char *szFullPath);
// Sets the Spam cutoff percentage. Must be in range from 0 to 100
bool SetSpamCutoff(REAL rCutoff); // returns value set with SetSpamCutoff. Defaults == DefaultSpamCutoff
// if no value has been set when SVM output file is read
REAL GetSpamCutoff(); // returns default value for SpamCutoff. read from SVM output file.
// should call FSetSVMDataLocation before calling this function
REAL GetDefaultSpamCutoff();
// Properties of the user
void SetFirstName(char *szFirstName); void SetLastName(char *szLastName); void SetCompanyName(char *szCompanyName);
// Calculates the probability that the current message (defined by the properties of the message) is spam.
// !Note! that the IN string params may be modified by the function.
// Returns the probability (0 to 1) that the message is spam in prSpamProb
// the boolean return is determined by comparing to the spam cutoff
// if the value of a boolean param is unknown use false, use 0 for unknown time.
bool BCalculateSpamProb(/* IN params */ char *szFrom, char *szTo, char *szSubject, char *szBody, bool bDirectMessage, bool bHasAttach, FILETIME tMessageSent, /* OUT params */ REAL *prSpamProb, bool * pbIsSpam);
MAILFILTER(); ~MAILFILTER();
// Reads the default spam cutoff without parsing entire file
// Use GetDefaultSpamCutoff if using FSetSVMDataLocation;
static bool BReadDefaultSpamCutoff(char *szFullPath, REAL *prDefCutoff);
private: // members
struct FeatureComponent { FeatureLocation loc; union { char *szFeature; UINT iRuleNum; // used with locSpecial
}; // map feature to location in dst file/location in SVM output
// more than one feature component may map to the same location, combined with the op
int iFeature; boolop bop; // first feature in group is alway bopOr
bool fPresent; FeatureComponent() { loc = locNil; } ~FeatureComponent() { if ((loc>locNil) && (loc < locSpecial)) { free(szFeature); } } };
FeatureComponent *rgfeaturecomps;
// weights from SVM output
REAL *rgrSVMWeights; // Other SVM file variables
REAL _rCC; REAL _rDD; REAL _rThresh; REAL _rDefaultThresh;
// Counts
UINT _cFeatures; UINT _cFeatureComps;
// is Feature present? -1 indicates not yet set, 0 indicates not present, 1 indicates present
int *_rgiFeatureStatus;
// Properties of the user
char *_szFirstName; char *_szLastName; char *_szCompanyName;
// Set via FSetSVMDataLocation() and SetSpamCutoff()
CString _strFName; REAL _rSpamCutoff;
// Properties of the message
char *_szFrom; char *_szTo; char *_szSubject; char *_szBody; bool _bDirectMessage; FILETIME _tMessageSent; bool _bHasAttach;
// Cached special rule results used during spam calculations
bool _bRule14; bool _bRule17;
private: // methods
bool ReadSVMOutput(LPCTSTR lpszFileName); void EvaluateFeatureComponents(); void ProcessFeatureComponentPresence(); REAL RDoSVMCalc(); bool FInvokeSpecialRule(UINT iRuleNum); void HandleCaseSensitiveSpecialRules(); };
|