/*++

Copyright (c) 1991  Microsoft Corporation

Module Name:

    WorkQue.c

Abstract:

    This module implements the Work queue routines for the Ntfs File
    system.

Author:

    Gary Kimura     [GaryKi]        21-May-1991

Revision History:

--*/

#include "NtfsProc.h"

//
//  The following constant is the maximum number of ExWorkerThreads that we
//  will allow to be servicing a particular target device at any one time.
//

#define FSP_PER_DEVICE_THRESHOLD         (2)

#ifdef ALLOC_PRAGMA
#pragma alloc_text(PAGE, NtfsOplockComplete)
#endif

VOID
NtfsAddToWorkqueInternal (
    IN PIRP_CONTEXT IrpContext,
    IN PIRP Irp OPTIONAL,
    IN BOOLEAN CanBlock
    );


VOID
NtfsOplockComplete (
    IN PVOID Context,
    IN PIRP Irp
    )

/*++

Routine Description:

    This routine is called by the oplock package when an oplock break has
    completed, allowing an Irp to resume execution.  If the status in
    the Irp is STATUS_SUCCESS, then we either queue the Irp to the Fsp queue or
    signal an event depending on whether the caller handles oplock completions synchronously.
    Otherwise we complete the Irp with the status in the Irp.

Arguments:

    Context - Pointer to the IrpContext to be queued to the Fsp

    Irp - I/O Request Packet.

Return Value:

    None.

--*/

{
    NTSTATUS Status = Irp->IoStatus.Status;
    PIRP_CONTEXT IrpContext = (PIRP_CONTEXT) Context;
    PKEVENT Event = NULL;
    
    PAGED_CODE();

    //
    //  Check for an event that we should to signal synchronous completion
    //  This exists in 2 cases
    //  
    //  1) Non-fsp creates (fsp creates don't have a completion context which is
    //     how we distinguish them
    //  
    //  2) Successful Non fsp read/writes (These indicate the NTFS_IO_CONTEXT_INLINE_OPLOCK 
    //     flag in their NtfsIoContext
    // 
    
    if (IrpContext->MajorFunction == IRP_MJ_CREATE) {
     
        if ((IrpContext->Union.OplockCleanup != NULL) &&
            (IrpContext->Union.OplockCleanup->CompletionContext != NULL)) {

            Event = &IrpContext->Union.OplockCleanup->CompletionContext->Event;

            ASSERT( FlagOn( IrpContext->State, IRP_CONTEXT_STATE_PERSISTENT ) );    
        }

    } else if ((IrpContext->MajorFunction == IRP_MJ_WRITE) &&
               (IrpContext->Union.NtfsIoContext != NULL) &&
               FlagOn( IrpContext->Union.NtfsIoContext->Flags, NTFS_IO_CONTEXT_INLINE_OPLOCK )) {

        Event = &IrpContext->Union.NtfsIoContext->Wait.SyncEvent;

        //
        //  Set the irp to not delete itself or the ntfsiocontext when we clean it up
        //  

        SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_DONT_DELETE );

    }

    //
    //  If we have a completion event then we want to  clean up the IrpContext
    //  and then signal the event 
    //

    if (Event) {

        NtfsCompleteRequest( IrpContext, NULL, Status );
        KeSetEvent( Event, 0, FALSE );
        ASSERT( Status != STATUS_PENDING && Status != STATUS_REPARSE );


    } else if (Status == STATUS_SUCCESS) {

        //
        //  Insert the Irp context in the workqueue to retry on a regular
        //  successfull oplock break
        //

        NtfsAddToWorkqueInternal( IrpContext, Irp, FALSE );

    } else {

        //
        //  Otherwise complete the Irp and cleanup the IrpContext.
        //

        ASSERT( Status != STATUS_PENDING && Status != STATUS_REPARSE );
        NtfsCompleteRequest( IrpContext, Irp, Status );
    }

    return;
}



VOID
NtfsPrePostIrp (
    IN PVOID Context,
    IN PIRP Irp OPTIONAL
    )

/*++

Routine Description:

    This routine performs any neccessary work before STATUS_PENDING is
    returned with the Fsd thread.  This routine is called within the
    filesystem and by the oplock package.

Arguments:

    Context - Pointer to the IrpContext to be queued to the Fsp

    Irp - I/O Request Packet (or FileObject in special close path)

Return Value:

    None.

--*/

{
    NtfsPrePostIrpInternal( Context, Irp, TRUE, FALSE );
}


VOID
NtfsWriteOplockPrePostIrp (
    IN PVOID Context,
    IN PIRP Irp OPTIONAL
    )

/*++

Routine Description:

    This routine performs any neccessary work before STATUS_PENDING is
    returned with the Fsd thread.  This routine is called  by the oplock package
    for write irps. We will decide whether to save the toplevelcontext based
    on whether the oplock is being handled inline or not

Arguments:

    Context - Pointer to the IrpContext to be queued to the Fsp

    Irp - I/O Request Packet (or FileObject in special close path)

Return Value:

    None.

--*/

{
    PIRP_CONTEXT IrpContext = (PIRP_CONTEXT)Context;
    BOOLEAN Inline = BooleanFlagOn( IrpContext->Union.NtfsIoContext->Flags, NTFS_IO_CONTEXT_INLINE_OPLOCK );

    //
    //  Cleanup the iocontext before posting an oplock - so if its on the stack
    //  we don't attempt to reference it during oplock completion
    // 

    if (!Inline) {

        if (FlagOn( IrpContext->State, IRP_CONTEXT_STATE_ALLOC_IO_CONTEXT )) {
            ExFreeToNPagedLookasideList( &NtfsIoContextLookasideList, IrpContext->Union.NtfsIoContext );
        }

        IrpContext->Union.NtfsIoContext = NULL;
    }

    NtfsPrePostIrpInternal( Context, Irp, TRUE, Inline );
}




VOID
NtfsPrePostIrpInternal (
    IN PVOID Context,
    IN PIRP Irp OPTIONAL,
    IN BOOLEAN PendIrp,
    IN BOOLEAN SaveContext
    )

/*++

Routine Description:

    This routine performs any neccessary work before STATUS_PENDING is
    returned with the Fsd thread.  This routine is called within the
    filesystem and by the oplock package.

Arguments:

    Context - Pointer to the IrpContext to be queued to the Fsp

    Irp - I/O Request Packet (or FileObject in special close path)
    
    PendIrp - if true mark the irp pending as well
    
    SaveContext - if true don't restore top level context even if its owned
        The caller will be waiting on the posted irp inline and continuing processing

Return Value:

    None.

--*/

{
    PIRP_CONTEXT IrpContext;
    PIO_STACK_LOCATION IrpSp = NULL;

#if (DBG || defined( NTFS_FREE_ASSERTS ))
    PUSN_FCB ThisUsn, LastUsn;
#endif

    IrpContext = (PIRP_CONTEXT) Context;

    //
    //  Make this is a valid allocated IrpContext. It's ok for
    //  this to be allocated on the caller's stack as long as the
    //  caller's not doing this operation asynchronously.
    //

    ASSERT_IRP_CONTEXT( IrpContext );
    ASSERT((FlagOn( IrpContext->State, IRP_CONTEXT_STATE_ALLOC_FROM_POOL )) ||
           (IrpContext->NodeTypeCode == NTFS_NTC_IRP_CONTEXT));

    //
    //  Make sure if we are posting the request, which may be
    //  because of log file full, that we free any Fcbs or PagingIo
    //  resources which were acquired.
    //

    //
    //  Just in case we somehow get here with a transaction ID, clear
    //  it here so we do not loop forever.
    //

    if (IrpContext->TransactionId != 0) {

        NtfsCleanupFailedTransaction( IrpContext );
    }

    //
    //  Cleanup all of the fields of the IrpContext.
    //  Restore the thread context pointer if associated with this IrpContext.
    //

    if (!SaveContext && FlagOn( IrpContext->State, IRP_CONTEXT_STATE_OWNS_TOP_LEVEL )) {

        NtfsRestoreTopLevelIrp();
        ClearFlag( IrpContext->State, IRP_CONTEXT_STATE_OWNS_TOP_LEVEL );
    }

    SetFlag( IrpContext->Flags, IRP_CONTEXT_FLAG_DONT_DELETE );
    NtfsCleanupIrpContext( IrpContext, FALSE );

#if (DBG || defined( NTFS_FREE_ASSERTS ))
    //
    //  If we are aborting a transaction, then it is important to clear out the
    //  Usn reasons, so we do not try to write a Usn Journal record for
    //  somthing that did not happen!  Worse yet if we get a log file full
    //  we fail the abort, which is not allowed.
    //
    //  First, reset the bits in the Fcb, so we will not fail to allow posting
    //  and writing these bits later.  Note that all the reversible changes are
    //  done with the Fcb exclusive, and they are actually backed out anyway.
    //  All the nonreversible ones (only unnamed and named data overwrite) are
    //  forced out first anyway before the data is actually modified.
    //

    ThisUsn = &IrpContext->Usn;

    do {

        ASSERT( !FlagOn( ThisUsn->UsnFcbFlags, USN_FCB_FLAG_NEW_REASON ));

        if (ThisUsn->NextUsnFcb == NULL) { break; }

        LastUsn = ThisUsn;
        ThisUsn = ThisUsn->NextUsnFcb;

    } while (TRUE);
#endif

    IrpContext->OriginatingIrp = Irp;

    //
    //  Note that close.c uses a trick where the "Irp" is really
    //  a file object.
    //

    if (ARGUMENT_PRESENT( Irp )) {

        if (Irp->Type == IO_TYPE_IRP) {

            IrpSp = IoGetCurrentIrpStackLocation( Irp );

            //
            //  We need to lock the user's buffer, unless this is an MDL-read,
            //  in which case there is no user buffer.
            //
            //  **** we need a better test than non-MDL (read or write)!

            if ((IrpContext->MajorFunction == IRP_MJ_READ) || 
                (IrpContext->MajorFunction == IRP_MJ_WRITE)) {

                ClearFlag( IrpContext->MinorFunction, IRP_MN_DPC );

                //
                //  Lock the user's buffer if this is not an Mdl request.
                //

                if (!FlagOn( IrpContext->MinorFunction, IRP_MN_MDL )) {

                    NtfsLockUserBuffer( IrpContext,
                                        Irp,
                                        (IrpContext->MajorFunction == IRP_MJ_READ) ?
                                        IoWriteAccess : IoReadAccess,
                                        IrpSp->Parameters.Write.Length );
                }

            //
            //  We also need to check whether this is a query directory operation.
            //

            } else if (IrpContext->MajorFunction == IRP_MJ_DIRECTORY_CONTROL
                       && IrpContext->MinorFunction == IRP_MN_QUERY_DIRECTORY) {

                NtfsLockUserBuffer( IrpContext,
                                    Irp,
                                    IoWriteAccess,
                                    IrpSp->Parameters.QueryDirectory.Length );

            //
            //  These two FSCTLs use neither I/O, so check for them.
            //

            } else if ((IrpContext->MajorFunction == IRP_MJ_FILE_SYSTEM_CONTROL) &&
                       (IrpContext->MinorFunction == IRP_MN_USER_FS_REQUEST) &&
                       ((IrpSp->Parameters.FileSystemControl.FsControlCode == FSCTL_READ_USN_JOURNAL) ||
                        (IrpSp->Parameters.FileSystemControl.FsControlCode == FSCTL_GET_RETRIEVAL_POINTERS))) {

                NtfsLockUserBuffer( IrpContext,
                                    Irp,
                                    IoWriteAccess,
                                    IrpSp->Parameters.FileSystemControl.OutputBufferLength );
            }

            //
            //  Mark that we've already returned pending to the user
            //

            if (PendIrp) {
                IoMarkIrpPending( Irp );
            }
            
        }
    }

    return;
}


NTSTATUS
NtfsPostRequest (
    IN PIRP_CONTEXT IrpContext,
    IN PIRP Irp OPTIONAL
    )

/*++

Routine Description:

    This routine enqueues the request packet specified by IrpContext to the
    work queue associated with the FileSystemDeviceObject.  This is a FSD
    routine.

Arguments:

    IrpContext - Pointer to the IrpContext to be queued to the Fsp

    Irp - I/O Request Packet (or FileObject in special close path)

Return Value:

    STATUS_PENDING


--*/

{
    //
    //  Before posting, free any Scb snapshots.  Note that if someone
    //  is calling this routine directly to post, then he better not
    //  have changed any disk structures, and thus we should have no
    //  work to do.  On the other hand, if someone raised a status
    //  (like STATUS_CANT_WAIT), then we do both a transaction abort
    //  and restore of these Scb values.
    //

    NtfsPrePostIrp( IrpContext, Irp );

    NtfsAddToWorkque( IrpContext, Irp );

    //
    //  And return to our caller
    //

    return STATUS_PENDING;
}



VOID
NtfsCancelOverflowRequest (
    IN PDEVICE_OBJECT Device,
    IN PIRP Irp
    )

/*++

Routine Description:

    This routine may be called by the I/O system to cancel an outstanding
    Irp in the overflow queue. If its an irp that must be processed we  move the irp to the
    top of the queue o.w we cancel it direclty. The dequeuing code guarantees the cancel routine is removed before
    the irpcontext is dequeued.  It also won't dequeue an irp that is marked with a 1 in the info
    field. Note we are guarranteed by io subsys that
    the irp will remain for the lifetime of this call even after we drop the spinlock

Arguments:

    DeviceObject - DeviceObject from I/O system

    Irp - Supplies the pointer to the Irp being canceled.

Return Value:

    None

--*/

{
    PIRP_CONTEXT IrpContext;
    PVOLUME_DEVICE_OBJECT Vdo;
    KIRQL SavedIrql;
    PIO_STACK_LOCATION IrpSp;
    BOOLEAN Cancel;

    IrpContext = (PIRP_CONTEXT)Irp->IoStatus.Information;
    IrpSp = IoGetCurrentIrpStackLocation( Irp );
    Cancel = (IrpContext->MajorFunction != IRP_MJ_CLEANUP) && 
             (IrpContext->MajorFunction != IRP_MJ_CLOSE);

    ASSERT( Cancel );
                         
    ASSERT( IrpContext->NodeTypeCode == NTFS_NTC_IRP_CONTEXT );

    Vdo = CONTAINING_RECORD( Device,
                             VOLUME_DEVICE_OBJECT,
                             DeviceObject );
    IoReleaseCancelSpinLock( Irp->CancelIrql );

    //
    //  Gain the critical workqueue spinlock and
    //  either cancel it or move it to the head of the list
    //  Note the workqueue code always tests the cancel first before working which
    //  is what synchronizes this
    //                        
    
    ExAcquireSpinLock( &Vdo->OverflowQueueSpinLock, &SavedIrql );
    RemoveEntryList( &IrpContext->WorkQueueItem.List );

    //
    //  Reset the shared fields
    //  

    InitializeListHead( &IrpContext->RecentlyDeallocatedQueue );
    InitializeListHead( &IrpContext->ExclusiveFcbList );

    if (!Cancel) {

        RtlZeroMemory( &IrpContext->WorkQueueItem, sizeof( WORK_QUEUE_ITEM ));

        InsertHeadList( &Vdo->OverflowQueue, &IrpContext->WorkQueueItem.List );
        Irp->Cancel = 0;
    } else {
        Vdo->OverflowQueueCount -= 1;
    }

    ExReleaseSpinLock( &Vdo->OverflowQueueSpinLock, SavedIrql );

    if (Cancel) {

        if (Vdo->OverflowQueueCount < OVERFLOW_QUEUE_LIMIT) {
            KeSetEvent( &Vdo->OverflowQueueEvent, IO_NO_INCREMENT, FALSE );
        }
        NtfsCompleteRequest( IrpContext, Irp, STATUS_CANCELLED );
    }
}


VOID
NtfsAddToWorkque (
    IN PIRP_CONTEXT IrpContext,
    IN PIRP Irp OPTIONAL
    )
{

    NtfsAddToWorkqueInternal( IrpContext, Irp, TRUE );
}



//
//  Local support routine.
//

VOID
NtfsAddToWorkqueInternal (
    IN PIRP_CONTEXT IrpContext,
    IN PIRP Irp OPTIONAL,
    IN BOOLEAN CanBlock
    )

/*++

Routine Description:

    This routine is called to acually store the posted Irp to the Fsp
    workque.

Arguments:

    IrpContext - Pointer to the IrpContext to be queued to the Fsp

    Irp - I/O Request Packet.

Return Value:

    None.

--*/

{
    PIO_STACK_LOCATION IrpSp;
    NTSTATUS Status = STATUS_SUCCESS;
    KIRQL Irql;

    Irql = KeGetCurrentIrql();


    if (ARGUMENT_PRESENT( Irp )) {

        IrpSp = IoGetCurrentIrpStackLocation( Irp );

        //
        //  Check if this request has an associated file object, and thus volume
        //  device object.
        //

        if ( IrpSp->FileObject != NULL ) {

            KIRQL SavedIrql;
            PVOLUME_DEVICE_OBJECT Vdo;

            Vdo = CONTAINING_RECORD( IrpSp->DeviceObject,
                                     VOLUME_DEVICE_OBJECT,
                                     DeviceObject );

            //
            //  Check to see if this request should be sent to the overflow
            //  queue.  If not, then send it off to an exworker thread. Block here
            //  for non deferred write threads when the overflow queue is full and
            //  we're not in a dpc (hotfix from async completion routine)
            //

            if ((Vdo->OverflowQueueCount >= OVERFLOW_QUEUE_LIMIT) &&
                CanBlock &&
                !FlagOn( IrpContext->Flags, IRP_CONTEXT_FLAG_DEFERRED_WRITE ) &&
                (Irql < DISPATCH_LEVEL)) {
                
                KeWaitForSingleObject( &Vdo->OverflowQueueEvent, Executive, KernelMode, FALSE, NULL );                    
            }

            ExAcquireSpinLock( &Vdo->OverflowQueueSpinLock, &SavedIrql );

            if ( Vdo->PostedRequestCount > FSP_PER_DEVICE_THRESHOLD) {

                //
                //  We cannot currently respond to this IRP so we'll just enqueue it
                //  to the overflow queue on the volume.
                //

                
                if (NtfsSetCancelRoutine( Irp, NtfsCancelOverflowRequest, (ULONG_PTR)IrpContext, TRUE )) {

                    if (Status == STATUS_SUCCESS) {

                        ASSERT( IsListEmpty( &IrpContext->ExclusiveFcbList ) );
                        ASSERT( IsListEmpty( &IrpContext->RecentlyDeallocatedQueue ) );
                        RtlZeroMemory( &IrpContext->WorkQueueItem, sizeof( WORK_QUEUE_ITEM ));


                        InsertTailList( &Vdo->OverflowQueue, &IrpContext->WorkQueueItem.List );
                        Vdo->OverflowQueueCount += 1;
                    }
                     
                } else {
                    
                    Status = STATUS_CANCELLED;
                }
                
                ExReleaseSpinLock( &Vdo->OverflowQueueSpinLock, SavedIrql );

                if (Status != STATUS_SUCCESS) {

                    if (Vdo->OverflowQueueCount < OVERFLOW_QUEUE_LIMIT) {
                        KeSetEvent( &Vdo->OverflowQueueEvent, IO_NO_INCREMENT, FALSE );
                    }
                    NtfsCompleteRequest( IrpContext, Irp, Status );
                }

                return;

            } else {

                //
                //  We are going to send this Irp to an ex worker thread so up
                //  the count.
                //

                if (Vdo->OverflowQueueCount < OVERFLOW_QUEUE_LIMIT) {
                    KeSetEvent( &Vdo->OverflowQueueEvent, IO_NO_INCREMENT, FALSE );
                }
                Vdo->PostedRequestCount += 1;

                ExReleaseSpinLock( &Vdo->OverflowQueueSpinLock, SavedIrql );
            }
        }
    }

    //
    //  Send it off.....
    //
    
    ASSERT( IsListEmpty( &IrpContext->ExclusiveFcbList ) );
    ASSERT( IsListEmpty( &IrpContext->RecentlyDeallocatedQueue ) );
    RtlZeroMemory( &IrpContext->WorkQueueItem, sizeof( WORK_QUEUE_ITEM ));


    ExInitializeWorkItem( &IrpContext->WorkQueueItem,
                          NtfsFspDispatch,
                          (PVOID)IrpContext );
    ExQueueWorkItem( &IrpContext->WorkQueueItem, CriticalWorkQueue );

    return;
}