windows-server-2003/base/fs/ntfs/workque.c


								/*++


								Copyright (c) 1991  Microsoft Corporation


								Module Name:


								    WorkQue.c


								Abstract:


								    This module implements the Work queue routines for the Ntfs File

								    system.


								Author:


								    Gary Kimura     [GaryKi]        21-May-1991


								Revision History:


								--*/


								#include "NtfsProc.h"


								//

								//  The following constant is the maximum number of ExWorkerThreads that we

								//  will allow to be servicing a particular target device at any one time.

								//


								#define FSP_PER_DEVICE_THRESHOLD         (2)


								#ifdef ALLOC_PRAGMA

								#pragma alloc_text(PAGE, NtfsOplockComplete)

								#endif


								VOID

								NtfsAddToWorkqueInternal (

								    IN PIRP_CONTEXT IrpContext,

								    IN PIRP Irp OPTIONAL,

								    IN BOOLEAN CanBlock

								    );


								VOID

								NtfsOplockComplete (

								    IN PVOID Context,

								    IN PIRP Irp

								    )


								/*++


								Routine Description:


								    This routine is called by the oplock package when an oplock break has

								    completed, allowing an Irp to resume execution.  If the status in

								    the Irp is STATUS_SUCCESS, then we either queue the Irp to the Fsp queue or

								    signal an event depending on whether the caller handles oplock completions synchronously.

								    Otherwise we complete the Irp with the status in the Irp.


								Arguments:


								    Context - Pointer to the IrpContext to be queued to the Fsp


								    Irp - I/O Request Packet.


								Return Value:


								    None.


								--*/


								{

								    NTSTATUS Status = Irp->IoStatus.Status;

								    PIRP_CONTEXT IrpContext = (PIRP_CONTEXT) Context;

								    PKEVENT Event = NULL;


								    PAGED_CODE();


								    //

								    //  Check for an event that we should to signal synchronous completion

								    //  This exists in 2 cases

								    //

								    //  1) Non-fsp creates (fsp creates don't have a completion context which is

								    //     how we distinguish them

								    //

								    //  2) Successful Non fsp read/writes (These indicate the NTFS_IO_CONTEXT_INLINE_OPLOCK

								    //     flag in their NtfsIoContext

								    //


								    if (IrpContext->MajorFunction == IRP_MJ_CREATE) {


								        if ((IrpContext->Union.OplockCleanup != NULL) &&

								            (IrpContext->Union.OplockCleanup->CompletionContext != NULL)) {


								            Event = &IrpContext->Union.OplockCleanup->CompletionContext->Event;


								            ASSERT( FlagOn( IrpContext->State, IRP_CONTEXT_STATE_PERSISTENT ) );

								        }


								    } else if ((IrpContext->MajorFunction == IRP_MJ_WRITE) &&

								               (IrpContext->Union.NtfsIoContext != NULL) &&

								               FlagOn( IrpContext->Union.NtfsIoContext->Flags, NTFS_IO_CONTEXT_INLINE_OPLOCK )) {


								        Event = &IrpContext->Union.NtfsIoContext->Wait.SyncEvent;


								        //

								        //  Set the irp to not delete itself or the ntfsiocontext when we clean it up

								        //


								        SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_DONT_DELETE );


								    }


								    //

								    //  If we have a completion event then we want to  clean up the IrpContext

								    //  and then signal the event

								    //


								    if (Event) {


								        NtfsCompleteRequest( IrpContext, NULL, Status );

								        KeSetEvent( Event, 0, FALSE );

								        ASSERT( Status != STATUS_PENDING && Status != STATUS_REPARSE );


								    } else if (Status == STATUS_SUCCESS) {


								        //

								        //  Insert the Irp context in the workqueue to retry on a regular

								        //  successfull oplock break

								        //


								        NtfsAddToWorkqueInternal( IrpContext, Irp, FALSE );


								    } else {


								        //

								        //  Otherwise complete the Irp and cleanup the IrpContext.

								        //


								        ASSERT( Status != STATUS_PENDING && Status != STATUS_REPARSE );

								        NtfsCompleteRequest( IrpContext, Irp, Status );

								    }


								    return;

								}


								VOID

								NtfsPrePostIrp (

								    IN PVOID Context,

								    IN PIRP Irp OPTIONAL

								    )


								/*++


								Routine Description:


								    This routine performs any neccessary work before STATUS_PENDING is

								    returned with the Fsd thread.  This routine is called within the

								    filesystem and by the oplock package.


								Arguments:


								    Context - Pointer to the IrpContext to be queued to the Fsp


								    Irp - I/O Request Packet (or FileObject in special close path)


								Return Value:


								    None.


								--*/


								{

								    NtfsPrePostIrpInternal( Context, Irp, TRUE, FALSE );

								}


								VOID

								NtfsWriteOplockPrePostIrp (

								    IN PVOID Context,

								    IN PIRP Irp OPTIONAL

								    )


								/*++


								Routine Description:


								    This routine performs any neccessary work before STATUS_PENDING is

								    returned with the Fsd thread.  This routine is called  by the oplock package

								    for write irps. We will decide whether to save the toplevelcontext based

								    on whether the oplock is being handled inline or not


								Arguments:


								    Context - Pointer to the IrpContext to be queued to the Fsp


								    Irp - I/O Request Packet (or FileObject in special close path)


								Return Value:


								    None.


								--*/


								{

								    PIRP_CONTEXT IrpContext = (PIRP_CONTEXT)Context;

								    BOOLEAN Inline = BooleanFlagOn( IrpContext->Union.NtfsIoContext->Flags, NTFS_IO_CONTEXT_INLINE_OPLOCK );


								    //

								    //  Cleanup the iocontext before posting an oplock - so if its on the stack

								    //  we don't attempt to reference it during oplock completion

								    //


								    if (!Inline) {


								        if (FlagOn( IrpContext->State, IRP_CONTEXT_STATE_ALLOC_IO_CONTEXT )) {

								            ExFreeToNPagedLookasideList( &NtfsIoContextLookasideList, IrpContext->Union.NtfsIoContext );

								        }


								        IrpContext->Union.NtfsIoContext = NULL;

								    }


								    NtfsPrePostIrpInternal( Context, Irp, TRUE, Inline );

								}


								VOID

								NtfsPrePostIrpInternal (

								    IN PVOID Context,

								    IN PIRP Irp OPTIONAL,

								    IN BOOLEAN PendIrp,

								    IN BOOLEAN SaveContext

								    )


								/*++


								Routine Description:


								    This routine performs any neccessary work before STATUS_PENDING is

								    returned with the Fsd thread.  This routine is called within the

								    filesystem and by the oplock package.


								Arguments:


								    Context - Pointer to the IrpContext to be queued to the Fsp


								    Irp - I/O Request Packet (or FileObject in special close path)


								    PendIrp - if true mark the irp pending as well


								    SaveContext - if true don't restore top level context even if its owned

								        The caller will be waiting on the posted irp inline and continuing processing


								Return Value:


								    None.


								--*/


								{

								    PIRP_CONTEXT IrpContext;

								    PIO_STACK_LOCATION IrpSp = NULL;


								#if (DBG || defined( NTFS_FREE_ASSERTS ))

								    PUSN_FCB ThisUsn, LastUsn;

								#endif


								    IrpContext = (PIRP_CONTEXT) Context;


								    //

								    //  Make this is a valid allocated IrpContext. It's ok for

								    //  this to be allocated on the caller's stack as long as the

								    //  caller's not doing this operation asynchronously.

								    //


								    ASSERT_IRP_CONTEXT( IrpContext );

								    ASSERT((FlagOn( IrpContext->State, IRP_CONTEXT_STATE_ALLOC_FROM_POOL )) ||

								           (IrpContext->NodeTypeCode == NTFS_NTC_IRP_CONTEXT));


								    //

								    //  Make sure if we are posting the request, which may be

								    //  because of log file full, that we free any Fcbs or PagingIo

								    //  resources which were acquired.

								    //


								    //

								    //  Just in case we somehow get here with a transaction ID, clear

								    //  it here so we do not loop forever.

								    //


								    if (IrpContext->TransactionId != 0) {


								        NtfsCleanupFailedTransaction( IrpContext );

								    }


								    //

								    //  Cleanup all of the fields of the IrpContext.

								    //  Restore the thread context pointer if associated with this IrpContext.

								    //


								    if (!SaveContext && FlagOn( IrpContext->State, IRP_CONTEXT_STATE_OWNS_TOP_LEVEL )) {


								        NtfsRestoreTopLevelIrp();

								        ClearFlag( IrpContext->State, IRP_CONTEXT_STATE_OWNS_TOP_LEVEL );

								    }


								    SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_DONT_DELETE );

								    NtfsCleanupIrpContext( IrpContext, FALSE );


								#if (DBG || defined( NTFS_FREE_ASSERTS ))

								    //

								    //  If we are aborting a transaction, then it is important to clear out the

								    //  Usn reasons, so we do not try to write a Usn Journal record for

								    //  somthing that did not happen!  Worse yet if we get a log file full

								    //  we fail the abort, which is not allowed.

								    //

								    //  First, reset the bits in the Fcb, so we will not fail to allow posting

								    //  and writing these bits later.  Note that all the reversible changes are

								    //  done with the Fcb exclusive, and they are actually backed out anyway.

								    //  All the nonreversible ones (only unnamed and named data overwrite) are

								    //  forced out first anyway before the data is actually modified.

								    //


								    ThisUsn = &IrpContext->Usn;


								    do {


								        ASSERT( !FlagOn( ThisUsn->UsnFcbFlags, USN_FCB_FLAG_NEW_REASON ));


								        if (ThisUsn->NextUsnFcb == NULL) { break; }


								        LastUsn = ThisUsn;

								        ThisUsn = ThisUsn->NextUsnFcb;


								    } while (TRUE);

								#endif


								    IrpContext->OriginatingIrp = Irp;


								    //

								    //  Note that close.c uses a trick where the "Irp" is really

								    //  a file object.

								    //


								    if (ARGUMENT_PRESENT( Irp )) {


								        if (Irp->Type == IO_TYPE_IRP) {


								            IrpSp = IoGetCurrentIrpStackLocation( Irp );


								            //

								            //  We need to lock the user's buffer, unless this is an MDL-read,

								            //  in which case there is no user buffer.

								            //

								            //  **** we need a better test than non-MDL (read or write)!


								            if ((IrpContext->MajorFunction == IRP_MJ_READ) ||

								                (IrpContext->MajorFunction == IRP_MJ_WRITE)) {


								                ClearFlag( IrpContext->MinorFunction, IRP_MN_DPC );


								                //

								                //  Lock the user's buffer if this is not an Mdl request.

								                //


								                if (!FlagOn( IrpContext->MinorFunction, IRP_MN_MDL )) {


								                    NtfsLockUserBuffer( IrpContext,

								                                        Irp,

								                                        (IrpContext->MajorFunction == IRP_MJ_READ) ?

								                                        IoWriteAccess : IoReadAccess,

								                                        IrpSp->Parameters.Write.Length );

								                }


								            //

								            //  We also need to check whether this is a query directory operation.

								            //


								            } else if (IrpContext->MajorFunction == IRP_MJ_DIRECTORY_CONTROL

								                       && IrpContext->MinorFunction == IRP_MN_QUERY_DIRECTORY) {


								                NtfsLockUserBuffer( IrpContext,

								                                    Irp,

								                                    IoWriteAccess,

								                                    IrpSp->Parameters.QueryDirectory.Length );


								            //

								            //  These two FSCTLs use neither I/O, so check for them.

								            //


								            } else if ((IrpContext->MajorFunction == IRP_MJ_FILE_SYSTEM_CONTROL) &&

								                       (IrpContext->MinorFunction == IRP_MN_USER_FS_REQUEST) &&

								                       ((IrpSp->Parameters.FileSystemControl.FsControlCode == FSCTL_READ_USN_JOURNAL) ||

								                        (IrpSp->Parameters.FileSystemControl.FsControlCode == FSCTL_GET_RETRIEVAL_POINTERS))) {


								                NtfsLockUserBuffer( IrpContext,

								                                    Irp,

								                                    IoWriteAccess,

								                                    IrpSp->Parameters.FileSystemControl.OutputBufferLength );

								            }


								            //

								            //  Mark that we've already returned pending to the user

								            //


								            if (PendIrp) {

								                IoMarkIrpPending( Irp );

								            }


								        }

								    }


								    return;

								}


								NTSTATUS

								NtfsPostRequest (

								    IN PIRP_CONTEXT IrpContext,

								    IN PIRP Irp OPTIONAL

								    )


								/*++


								Routine Description:


								    This routine enqueues the request packet specified by IrpContext to the

								    work queue associated with the FileSystemDeviceObject.  This is a FSD

								    routine.


								Arguments:


								    IrpContext - Pointer to the IrpContext to be queued to the Fsp


								    Irp - I/O Request Packet (or FileObject in special close path)


								Return Value:


								    STATUS_PENDING


								--*/


								{

								    //

								    //  Before posting, free any Scb snapshots.  Note that if someone

								    //  is calling this routine directly to post, then he better not

								    //  have changed any disk structures, and thus we should have no

								    //  work to do.  On the other hand, if someone raised a status

								    //  (like STATUS_CANT_WAIT), then we do both a transaction abort

								    //  and restore of these Scb values.

								    //


								    NtfsPrePostIrp( IrpContext, Irp );


								    NtfsAddToWorkque( IrpContext, Irp );


								    //

								    //  And return to our caller

								    //


								    return STATUS_PENDING;

								}


								VOID

								NtfsCancelOverflowRequest (

								    IN PDEVICE_OBJECT Device,

								    IN PIRP Irp

								    )


								/*++


								Routine Description:


								    This routine may be called by the I/O system to cancel an outstanding

								    Irp in the overflow queue. If its an irp that must be processed we  move the irp to the

								    top of the queue o.w we cancel it direclty. The dequeuing code guarantees the cancel routine is removed before

								    the irpcontext is dequeued.  It also won't dequeue an irp that is marked with a 1 in the info

								    field. Note we are guarranteed by io subsys that

								    the irp will remain for the lifetime of this call even after we drop the spinlock


								Arguments:


								    DeviceObject - DeviceObject from I/O system


								    Irp - Supplies the pointer to the Irp being canceled.


								Return Value:


								    None


								--*/


								{

								    PIRP_CONTEXT IrpContext;

								    PVOLUME_DEVICE_OBJECT Vdo;

								    KIRQL SavedIrql;

								    PIO_STACK_LOCATION IrpSp;

								    BOOLEAN Cancel;


								    IrpContext = (PIRP_CONTEXT)Irp->IoStatus.Information;

								    IrpSp = IoGetCurrentIrpStackLocation( Irp );

								    Cancel = (IrpContext->MajorFunction != IRP_MJ_CLEANUP) &&

								             (IrpContext->MajorFunction != IRP_MJ_CLOSE);


								    ASSERT( Cancel );


								    ASSERT( IrpContext->NodeTypeCode == NTFS_NTC_IRP_CONTEXT );


								    Vdo = CONTAINING_RECORD( Device,

								                             VOLUME_DEVICE_OBJECT,

								                             DeviceObject );

								    IoReleaseCancelSpinLock( Irp->CancelIrql );


								    //

								    //  Gain the critical workqueue spinlock and

								    //  either cancel it or move it to the head of the list

								    //  Note the workqueue code always tests the cancel first before working which

								    //  is what synchronizes this

								    //


								    ExAcquireSpinLock( &Vdo->OverflowQueueSpinLock, &SavedIrql );

								    RemoveEntryList( &IrpContext->WorkQueueItem.List );


								    //

								    //  Reset the shared fields

								    //


								    InitializeListHead( &IrpContext->RecentlyDeallocatedQueue );

								    InitializeListHead( &IrpContext->ExclusiveFcbList );


								    if (!Cancel) {


								        RtlZeroMemory( &IrpContext->WorkQueueItem, sizeof( WORK_QUEUE_ITEM ));


								        InsertHeadList( &Vdo->OverflowQueue, &IrpContext->WorkQueueItem.List );

								        Irp->Cancel = 0;

								    } else {

								        Vdo->OverflowQueueCount -= 1;

								    }


								    ExReleaseSpinLock( &Vdo->OverflowQueueSpinLock, SavedIrql );


								    if (Cancel) {


								        if (Vdo->OverflowQueueCount < OVERFLOW_QUEUE_LIMIT) {

								            KeSetEvent( &Vdo->OverflowQueueEvent, IO_NO_INCREMENT, FALSE );

								        }

								        NtfsCompleteRequest( IrpContext, Irp, STATUS_CANCELLED );

								    }

								}


								VOID

								NtfsAddToWorkque (

								    IN PIRP_CONTEXT IrpContext,

								    IN PIRP Irp OPTIONAL

								    )

								{


								    NtfsAddToWorkqueInternal( IrpContext, Irp, TRUE );

								}


								//

								//  Local support routine.

								//


								VOID

								NtfsAddToWorkqueInternal (

								    IN PIRP_CONTEXT IrpContext,

								    IN PIRP Irp OPTIONAL,

								    IN BOOLEAN CanBlock

								    )


								/*++


								Routine Description:


								    This routine is called to acually store the posted Irp to the Fsp

								    workque.


								Arguments:


								    IrpContext - Pointer to the IrpContext to be queued to the Fsp


								    Irp - I/O Request Packet.


								Return Value:


								    None.


								--*/


								{

								    PIO_STACK_LOCATION IrpSp;

								    NTSTATUS Status = STATUS_SUCCESS;

								    KIRQL Irql;


								    Irql = KeGetCurrentIrql();


								    if (ARGUMENT_PRESENT( Irp )) {


								        IrpSp = IoGetCurrentIrpStackLocation( Irp );


								        //

								        //  Check if this request has an associated file object, and thus volume

								        //  device object.

								        //


								        if ( IrpSp->FileObject != NULL ) {


								            KIRQL SavedIrql;

								            PVOLUME_DEVICE_OBJECT Vdo;


								            Vdo = CONTAINING_RECORD( IrpSp->DeviceObject,

								                                     VOLUME_DEVICE_OBJECT,

								                                     DeviceObject );


								            //

								            //  Check to see if this request should be sent to the overflow

								            //  queue.  If not, then send it off to an exworker thread. Block here

								            //  for non deferred write threads when the overflow queue is full and

								            //  we're not in a dpc (hotfix from async completion routine)

								            //


								            if ((Vdo->OverflowQueueCount >= OVERFLOW_QUEUE_LIMIT) &&

								                CanBlock &&

								                !FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_DEFERRED_WRITE ) &&

								                (Irql < DISPATCH_LEVEL)) {


								                KeWaitForSingleObject( &Vdo->OverflowQueueEvent, Executive, KernelMode, FALSE, NULL );

								            }


								            ExAcquireSpinLock( &Vdo->OverflowQueueSpinLock, &SavedIrql );


								            if ( Vdo->PostedRequestCount > FSP_PER_DEVICE_THRESHOLD) {


								                //

								                //  We cannot currently respond to this IRP so we'll just enqueue it

								                //  to the overflow queue on the volume.

								                //


								                if (NtfsSetCancelRoutine( Irp, NtfsCancelOverflowRequest, (ULONG_PTR)IrpContext, TRUE )) {


								                    if (Status == STATUS_SUCCESS) {


								                        ASSERT( IsListEmpty( &IrpContext->ExclusiveFcbList ) );

								                        ASSERT( IsListEmpty( &IrpContext->RecentlyDeallocatedQueue ) );

								                        RtlZeroMemory( &IrpContext->WorkQueueItem, sizeof( WORK_QUEUE_ITEM ));


								                        InsertTailList( &Vdo->OverflowQueue, &IrpContext->WorkQueueItem.List );

								                        Vdo->OverflowQueueCount += 1;

								                    }


								                } else {


								                    Status = STATUS_CANCELLED;

								                }


								                ExReleaseSpinLock( &Vdo->OverflowQueueSpinLock, SavedIrql );


								                if (Status != STATUS_SUCCESS) {


								                    if (Vdo->OverflowQueueCount < OVERFLOW_QUEUE_LIMIT) {

								                        KeSetEvent( &Vdo->OverflowQueueEvent, IO_NO_INCREMENT, FALSE );

								                    }

								                    NtfsCompleteRequest( IrpContext, Irp, Status );

								                }


								                return;


								            } else {


								                //

								                //  We are going to send this Irp to an ex worker thread so up

								                //  the count.

								                //


								                if (Vdo->OverflowQueueCount < OVERFLOW_QUEUE_LIMIT) {

								                    KeSetEvent( &Vdo->OverflowQueueEvent, IO_NO_INCREMENT, FALSE );

								                }

								                Vdo->PostedRequestCount += 1;


								                ExReleaseSpinLock( &Vdo->OverflowQueueSpinLock, SavedIrql );

								            }

								        }

								    }


								    //

								    //  Send it off.....

								    //


								    ASSERT( IsListEmpty( &IrpContext->ExclusiveFcbList ) );

								    ASSERT( IsListEmpty( &IrpContext->RecentlyDeallocatedQueue ) );

								    RtlZeroMemory( &IrpContext->WorkQueueItem, sizeof( WORK_QUEUE_ITEM ));


								    ExInitializeWorkItem( &IrpContext->WorkQueueItem,

								                          NtfsFspDispatch,

								                          (PVOID)IrpContext );

								    ExQueueWorkItem( &IrpContext->WorkQueueItem, CriticalWorkQueue );


								    return;

								}