|
|
/*************************************************************************
* * * ISEARCH.H * * * * Copyright (C) Microsoft Corporation 1990-1992 * * All Rights reserved. * * * ************************************************************************** * * * Module Intent * * Common defines internal to the searcher. None of this stuff is * * available outside the search engine. * * * ************************************************************************** * * * Current Owner: BinhN * * * ************************************************************************** * * * Released by Development: (date) * * * *************************************************************************/
// Critical structures that gets messed up in /Zp8
#pragma pack(1)
#define occifNONE 0x0000 // No flags.
#define occifEOF 0x0001 // End of file.
#define occifWRITTEN 0x0002 // This occurrence has been written.
#define occifMATCH 0x0004 // This occurrence is a match.
#define occifHAS_MATCH 0x0008 // Don't do combination this pass.
/**************************************************************************
* * SYMBOLS STRUCTURE * **************************************************************************/
#define cWordsPerToken 5 // a guess at average number of stemmed or wildcard variants
typedef struct WORDINFO { struct WORDINFO FAR *pNext;
// Term frequency
DWORD cTopic;
// Word data information
FILEOFFSET foData; DWORD cbData; WORD wRealLength; // Real word length
} WORDINFO, FAR *LPWORDINFO;
typedef struct STRING_TOKEN { struct STRING_TOKEN FAR *pNext; LPB lpString; // String itself
WORD cUsed; // Times this string appears in the query
WORD wWeight; // Weight of the word
WORD wWeightRemain; // Sum of term weights AFTER this one in the list
LPWORDINFO lpwi; // List of word data for this token
DWORD dwTopicCount; } STRING_TOKEN;
/* Set the default size of a string block. Assuming that a word length
* is 6, allocate enough memory for 20 words */
#define STRING_BLOCK_SIZE (sizeof(STRING_TOKEN) + 6) * 20
/* String flags */
#define EXACT_MATCH 0x01
#define WILDCARD_MATCH 0x02
#define TERM_RANGE_MATCH 0x03
/**************************************************************************
* * QUERY TREE STRUCTURE & FUNCTIONS * **************************************************************************/
#define MAX_QUERY_NODE 0xFFFF // Maximum number of tokens in a query
#define TOPICLIST_NODE 0x01 // The node is a topicId node
#define OCCURENCE_NODE 0x02 // The node is an occurrence node
#define NODE_TYPE_MASK 0x0f
#define DONT_FREE 0x10 // Don't free the node after unlink
#define STACK_SIZE 15 // Maximum level of stack
// This is an information buffer structure that you pass to "HitListGetTopic"
// which fills in its fields. You can look at the fields not marked as
// "internal", and also pass it to other API functions.
#define TOPIC_INFO \
WORD wWeight; /* Topicument-weight. */ \ DWORD dwTopicId; /* Topic-ID associated with this hit. */ \ DWORD lcOccur /* Number of occurrences (hits) */
typedef struct SNGLINK { struct SNGLINK FAR *pNext; } SGNLINK, FAR *LPSLINK;
// Internal Occurrence structure
// Be careful when changing the fields of it. See MARKER struct below
typedef struct OCCURENCE { struct OCCURENCE FAR *pNext; WORD fFlag; /* Various flags */ WORD cLength; /* Word length */ DWORD dwCount; /* Word Count, needed for phrase */ DWORD dwOffset; /* Word offset, needed for hilite */ DWORD dwFieldId; /* Field id (_DEBUG only) */ WORD wWeight; /* Hit weight */ LPVOID lpvTerm; /* Pointer to a term in WORD-prefix length
* Unicode format, i.e. a "wide ST". */ #if defined (_MIPS) || defined (ppc)
WORD wPad; DWORD dwPad; #endif
} OCCURENCE, FAR *LPIOCC;
/* Marker node. Be careful when changing the fields of it.
* 1/ The size of it must be <= the size of an OCCURENCE * 2/ Location of pNext and fFlag must be the same between the two * structure */
typedef struct MARKER { struct OCCURENCE FAR *pNext; WORD fFlag; /* Various flags */ WORD Unused; /* Unused */ struct MARKER FAR *pPrevMark; /* Previous marker */ struct MARKER FAR *pNextMark; /* Next marker */ } MARKER, FAR *LPMARKER;
/* Occurrence flags */ #define TO_BE_KEPT 0x01
#define TO_BE_SKIPPED 0x02
#define TO_BE_COMPARED 0x04
#define IS_LAST_NODE 0x08
#define IS_MARKER_NODE 0x10
/* Internal Topic List structure */ typedef struct TOPIC_LIST { struct TOPIC_LIST FAR * pNext; LPIOCC lpOccur; // Pointer to occurrence lists
unsigned short fFlag; // Various flags, such as TO_BE_KEPT
TOPIC_INFO; } TOPIC_LIST;
typedef TOPIC_LIST FAR *LPITOPIC;
/* TopicId node flags */ #define TO_BE_KEPT 0x01
#define HAS_MARKER 0x02
#define IS_MARKER_NODE 0x10
#define WRITTEN_TO_DISK 0x20
#define DL_NEXT(p) (((LPITOPIC)p)->pNext)
#define DL_OCCUR(p) (((LPITOPIC)p)->lpOccur)
/* QueryInfo flags */
#define IN_PHRASE 0x0001
#define FREE_CHARTAB 0x0002
#define FORCED_PHRASE 0x0004
#define CW_PHRASE 0x0010 // Must match COMPOUNDWORD_PHRASE in medv20.h
/* Query info node */ typedef struct QueryInfo { LPB lpbQuery; // Query expression bytes.
LPB pWhitespace; // Working variable for implicit phrase
LPQT lpQueryTree; // Query tree.
LPV lpStack; // Pointer to operator's stack
DWORD dwOffset; // Current offset
BREAKER_FUNC lpfnBreakFunc; LPSIPB lpStopListInfo; // Associated stop list info
LPCHARTAB lpCharTab; // Pointer to character table
LPOPSYM lpOpSymTab; // Operator symbol table
LPERRB lperrb; // Error buffer
DWORD fFlag; // Flag
WORD cOpEntry; // Number of operator entries
WORD Pad; } QUERY_INFO, FAR *LPQI;
/* Parameter of an unary operator */ typedef union NodeParm { VOID FAR *lpStruct; DWORD dwValue; } NODE_PARM;
/* Query Tree nodes */
#define OPERATOR_NODE 1 // Operator
#define TERM_NODE 2 // A string to be searched for
#define NULL_NODE 3 // A string that can't be found
#define EXPRESSION_NODE 4 // The node contains the result
#define STOP_NODE 5 // A stop word
typedef struct QTNODE { struct QTNODE FAR *pNext; struct QTNODE FAR *pPrev; LPITOPIC lpTopicList; /* Topic linked list */ union { STRING_TOKEN FAR *pToken; /* Word associated with this node */ WORD wProxDist; /* Proximity distance */ VOID FAR *lpStruct; /* Structure associated with unary op */ } u; DWORD cTopic; // Number of TopicId lists of this node
// This number will change when merging
// gets involved
// Max and Min topic id. This is useful for speeding up retrieval
DWORD dwMaxTopicId; // Max topic Id in the list
DWORD dwMinTopicId; // Min topic id in the list
// Word data information
FILEOFFSET foData; DWORD cbData;
/* Characteristics associated with the node */
LPB lpHiString; /* Hi limit of the string (for THRU) */ LPGROUP lpGroup; /* Group associated with this node */ DWORD dwFieldId; /* FieldID associated with term */ WORD NodeType; /* What type (operator, term, etc) */ WORD OpVal; /* Operator value */ WORD iCurOff; // Offset to the beginning of the word
WORD wRealLength; // Real word length
LPVOID lpvIndexedTerm; /* Pointer to the term in the index that
* currently matches this node. The term's * string is in WORD-prefix length Unicode * format, i.e. a "wide ST". */ // General info
WORD fFlag; /* Various flags */ WORD Offset; /* Offset from the beginning of the query */ WORD wBrkDtype; /* Breaker's dtype (for THRU) */ WORD Pad; } QTNODE, FAR *_LPQTNODE;
#define QTN_LEFT(p) (((QTNODE FAR *)p)->pPrev)
#define QTN_RIGHT(p) (((QTNODE FAR *)p)->pNext)
#define QTN_PREV(p) (((QTNODE FAR *)p)->pPrev)
#define QTN_NEXT(p) (((QTNODE FAR *)p)->pNext)
#define QTN_NODETYPE(p) (((QTNODE FAR *)p)->NodeType)
#define QTN_OPVAL(p) (((QTNODE FAR *)p)->OpVal)
#define QTN_TOPICLIST(p) (((QTNODE FAR *)p)->lpTopicList)
#define QTN_TOKEN(p) (((QTNODE FAR *)p)->u.pToken)
#define QTN_PARMS(p) (((QTNODE FAR *)p)->u.lpStruct)
#define QTN_FLAG(p) (((QTNODE FAR *)p)->fFlag)
#define QTN_HITERM(p) (((QTNODE FAR *)p)->lpHiString)
#define QTN_OFFSET(p) (((QTNODE FAR *)p)->Offset)
#define QTN_FIELDID(p) (((QTNODE FAR *)p)->dwFieldId)
#define QTN_DTYPE(p) (((QTNODE FAR *)p)->wBrkDtype)
#define QTN_GROUP(p) (((QTNODE FAR *)p)->lpGroup)
/* Block of query's nodes. We allocate 16 nodes per block */ #define QUERY_BLOCK_SIZE sizeof(QTNODE)*16
#define cTOPIC_PER_BLOCK 500
#define cOCC_PER_BLOCK 1000
/* Query tree's flags */
#define TO_BE_SORTED 0x0001
#define HAS_NEAR_RESULT 0x0002
#define ALL_OR 0x0004
#define ALL_AND 0x0008
#define PROCESSED 0x0010
#define ALL_ANDORNOT 0x0020
/* Query tree structure */ typedef struct QTREE { CUSTOMSTRUCT cStruct; /* Structure's handle, MUST BE 1ST FIELD!! */ LONG cQuery; /* Note: this can't be unsigned */ DWORD dwcOccFields; /* Occurence fields count */ DWORD dwOccSize; /* Occurence node size */ /* Unary operator related fields */ LPGROUP lpGroup; /* Group associated with all terms */ DWORD dwFieldId; /* Field-ID assigned to all followed terms.*/ WORD wProxDist; /* Proximity distance */ WORD iDefaultOp; /* Default operator. */ WORD wBrkDtype; /* Breaker's dtype (for THRU) */ WORD fFlag; /* Various querytree flags */
/* String table */ LPV lpStringBlock; /* String's memory block */ STRING_TOKEN FAR *lpStrList;/* Pointer to strings table */
LPV lpWordInfoBlock;
/* Topic list related global variables */ LPV lpTopicMemBlock; /* Pointer to Topic memory block */ LPITOPIC lpTopicStartSearch; /* Starting node for searching */ LPSLINK lpTopicFreeList; /* Pointer to free doc list */ DWORD dwTopicNodeCnt;
/* Occ list related global variables */ LPV lpOccMemBlock; /* Pointer to Occ memory block */ LPIOCC lpOccStartSearch; /* Starting occurrence for searching */ LPSLINK lpOccFreeList; /* Pointer to free occurrence list */ DWORD dwOccNodeCnt; /* Buffer for the tree's nodes */ LPV lpNodeBlock; /* Nodes memory block */ _LPQTNODE lpTopNode; /* Pointer to top node */ /* Index information */ FILEOFFSET foIdxRoot; /* Top node offset */ DWORD dwBlockSize; /* Index block size */ WORD TreeDepth; /* Depth of tree */ WORD cIdxLevels; /* Index's depth */ OCCF occf; IDXF idxf; CKEY ckeyTopicId; // 2-bytes
CKEY ckeyOccCount; // 2-bytes
CKEY ckeyWordCount; CKEY ckeyOffset; /* MAGIC value... */ LONG magic; /* Interrupt flag for online use. Online apps don't have callbacks
* so we have to provide an API to set this flag */ BYTE cInterruptCount; /* Interrupt checking */ BYTE fInterrupt; /* Similarity stuff */ LPV lpDocScores; } QTREE, FAR *_LPQT;
#define HQUERY_MAGIC 0x04121956
// This defines the "type" of word term node.
#define TERM_EXACT 1 // "Standard" term.
#define TERM_PREFIX 2 // "Wildcard" term.
#define TERM_RANGE 3 // Range term. This says "give me everything
// between some low bound and some high
// bound."
/*
* This defines the value or type of operator node. It corresponds to * the OpVal field of struct OPSYM */
#define AND_OP 0 // AND operator
#define OR_OP 1 // OR operator
#define NOT_OP 2 // NOT operator
#define PHRASE_OP 3 // PHRASE operator
#define NEAR_OP 4 // NEAR operator
#define MAX_DEFAULT_OP OR_OP // Maximum value of default operator
#define RANGE_OP 5
#define GROUP_OP 6
#define FIELD_OP 7
#define BRKR_OP 8
#define MAX_OPERATOR 8
#define STOP_OP 9 // stop word
#define QUOTE 50
#define RIGHT_PAREN 51
#define LEFT_PAREN 52
#define TERM_TOKEN 53
/* Operator type */ #define BINARY_OP 0x01
#define UNARY_OP 0x02
#define PARSE_TOKEN 0x04
/* Operator attribute */ #define COMMUTATIVE 0x10 // a * b = b * a
#define ASSOCIATIVE 0x20 // a * (b * c) = (a * b) * c
#define ZERO 0x40 // a * a = a
extern WORD OperatorAttributeTable[]; /*
* Those are properties of a binary node expression. */ #define EXPRESSION_TERM 1 // One branch is an expression, one
// is a term
#define EXPRESSION_EXPRESSION 2 // Both branches are expressions
#if 1
typedef ERR (PASCAL NEAR *FNHANDLER) (LPQT, _LPQTNODE, LPITOPIC, LPV, int); #else
typedef ERR (PASCAL NEAR *FNHANDLER) (LPQT, LPV, LPV, LPV, int); #endif
#define ORDERED_BASED 1 // Based on topicId numbered
#define HIT_COUNT_BASED 2 // Based doc's hit count
#define WEIGHT_BASED 3 // based on doc's weight
typedef struct RetVars { LPQT lpqt; // Pointer to query tree for global variables
LPBYTE pLeadByteTable; // Pointer to lead byte table for DBCS
DWORD dwTopicCount; // Number of topics
DWORD dwFieldID; // Current fieldid
DWORD cOccFields; DWORD dwOccSize; LPB lpbCur; NODEINFO LeafInfo; NODEINFO DataInfo; SRCHINFO SrchInfo; // Search information
LCID lcid; // WIN32 locale ID specified at build time
WORD wWordLength; // Word length
WORD fFlag; // General flags
BYTE pBTreeWord[CB_MAX_WORD_LEN]; // Buffer for the decoded word
BYTE pModifiedWord[CB_MAX_WORD_LEN]; // Buffer for the modified word
BYTE pStemmedBTreeWord[CB_MAX_WORD_LEN]; // Stemmed BTree word
BYTE pStemmedQueryWord[CB_MAX_WORD_LEN]; // Stemmed searched word
BYTE fRank; // If non-zero the result is ranked.
BYTE pNodeBuf[BTREE_NODE_SIZE]; // Generic b-tree node buffer.
BYTE pDataBuf[FILE_BUFFER]; } RETV, FAR *LPRETV; /**************************************************************************
* * OPEN INDEX STRUCTURE * **************************************************************************/
typedef struct Idx { GHANDLE hStruct; // Handle to this structure.
DWORD dwKey; FCALLBACK_MSG Callback;
LPBYTE pLeadByteTable; // Pointer to table of DBCS lead bytes
HANDLE hLeadByteTable; IH20 ih; // Index header.
HFPB hfpbIdxSubFile; // Index file handle. If this is NULL, the
// index isn't open, else it is.
//HFPB hfpbSysFile; // Handle of Index File system
GHANDLE hTopNode; // Handle to "lrgbTopNode".
LRGB lrgbTopNode; // Pointer to the index top node.
FLOAT fSigmaTable; // Sigma table
HANDLE hSigma; // Handle of sigma table
LPERRB lperrb; // Pointer to error block
WORD wSlackSize; // Size of slack in a node
WORD Pad; } IDX, FAR *_LPIDX; // The "_" indicates that this is
// a private structure that needs
// to be available publicly. The
// public will call this an "LPIDX".
/*************************************************************************
* * Hitlist structure * *************************************************************************/
typedef struct HitList { GHANDLE hStruct; // Structure's handle. MUST BE 1ST FIELD
DWORD lcReturnedTopics; // The number of Topics returned. (what the user wants)
DWORD lcTotalNumOfTopics; // The total number of topics hit by the query.
LPITOPIC lpTopicList; // Starting of TopicList
LPBLK lpOccMemBlock; // Pointer to Occ memory block
LPBLK lpTopicMemBlock; // Pointer to Topic memory block
/* All the following fields are for internal use only */
LPVOID lpHttpQ; // for online search only
DWORD lcMaxTopic; // Max TopicId number (internal)
LPITOPIC lpLastTopic; // Last accessed Topic pointer (internal)
DWORD lLastTopicId; // Last accessed TopicId (internal)
/* Topic list cache */ GHANDLE hTopic; // Handle to Topic file
GHANDLE hTopicCache; // Handle to Topic cache
LPITOPIC lpTopicCache; // Cache for Topic info
DWORD dwTopicCacheStart; // Starting Topic number if the cache
DWORD dwTopicInCacheCount; // Number of topic currently in cache
/* Occurrence cache */ GHANDLE hOcc; // Handle to occurrences file
GHANDLE hOccCache; // Handle to Occ cache
DWORD dwCurTopic; // Current doc that the hit list belongs to
LPIOCC lpOccCache; // Cache for Occ info
DWORD dwOccCacheStart; // Starting Occ number if the cache
/* Various hitlist info for hitlist merge */ struct HitList FAR * lpMainHitList; struct HitList FAR * lpUpdateHitList; BYTE lszTopicName[cbMAX_PATH]; // Topic filename
BYTE lszOccName[cbMAX_PATH]; // Occ filename
} HL, FAR *_LPHL;
#define DO_FAST_MERGE(pSrch, lpqt) (((pSrch)->Flag & QUERYRESULT_SKIPOCCINFO) && ((lpqt)->fFlag & ALL_ANDORNOT))
/*************************************************************************
* * Global Variables * * Those variables should be read only *************************************************************************/ extern FNHANDLER HandlerFuncTable[]; extern OPSYM OperatorSymbolTable[]; extern OPSYM FlatOpSymbolTable[]; extern BYTE LigatureTable[];
/*************************************************************************
* * Functions Prototypes * *************************************************************************/
/* qtparse.c */
PUBLIC LPQT PASCAL NEAR QueryTreeAlloc(void); PUBLIC ERR PASCAL NEAR QueryTreeAddToken (_LPQT, int, LST, DWORD, BOOL); PUBLIC LPQT PASCAL NEAR QueryTreeBuild (LPQI); #if defined(_DEBUG) && DOS_ONLY
PUBLIC ERR PASCAL FAR PrintList(LPQT); #endif
/* search.c */ PUBLIC ERR PASCAL NEAR ResolveTree(LPIDX, _LPQTNODE, LPRETV, BOOL); PUBLIC BOOL NEAR PASCAL FGroupLookup(LPGROUP, DWORD);
/* qtlist */ PUBLIC TOPIC_LIST FAR* PASCAL NEAR TopicNodeAllocate(LPQT); PUBLIC VOID PASCAL NEAR TopicNodeFree (LPQT, _LPQTNODE, LPITOPIC, LPITOPIC); PUBLIC ERR PASCAL NEAR TopicNodeInsert (LPQT, _LPQTNODE, LPITOPIC); PUBLIC LPITOPIC PASCAL NEAR TopicNodeSearch(LPQT, _LPQTNODE, DWORD); PUBLIC LPIOCC PASCAL NEAR OccNodeAllocate(LPQT); PUBLIC LPIOCC PASCAL NEAR OccNodeSearch(LPQT, LPITOPIC , LPIOCC ); PUBLIC ERR PASCAL NEAR OccNodeInsert(LPQT, LPITOPIC, LPIOCC); PUBLIC int PASCAL NEAR OccCompare(LPIOCC, LPIOCC); PUBLIC VOID PASCAL NEAR RemoveNode(LPQT, LPV, LPSLINK, LPSLINK, int); PUBLIC VOID PASCAL NEAR FreeTree (_LPQTNODE);
/* combine.c */ PUBLIC VOID PASCAL NEAR RemoveUnmarkedTopicList (LPQT, _LPQTNODE, BOOL); PUBLIC VOID PASCAL NEAR RemoveUnmarkedNearTopicList (_LPQT, _LPQTNODE); PUBLIC VOID PASCAL NEAR MarkTopicList (_LPQTNODE); PUBLIC VOID PASCAL NEAR MergeOccurence(LPQT, LPITOPIC , LPITOPIC); PUBLIC VOID PASCAL NEAR SortResult (LPQT, _LPQTNODE, WORD);
PUBLIC ERR PASCAL NEAR OrHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC ERR PASCAL NEAR AndHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC ERR PASCAL NEAR NotHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC ERR PASCAL NEAR NearHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC ERR PASCAL NEAR PhraseHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int); PUBLIC VOID PASCAL NEAR NearHandlerCleanUp (LPQT, _LPQTNODE); PUBLIC ERR PASCAL NEAR TopicListSort (_LPQTNODE lpQtNode, BOOL fFlag);
// Critical structures that gets messed up in /Zp8
#pragma pack()
|