mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2395 lines
64 KiB
2395 lines
64 KiB
/*++
|
|
|
|
Copyright (c) 1994 Digital Equipment Corporation
|
|
|
|
Module Name:
|
|
|
|
gammaerr.c
|
|
|
|
Abstract:
|
|
|
|
This module implements error handling (machine checks and error
|
|
interrupts) for the Sable platform.
|
|
|
|
Author:
|
|
|
|
Joe Notarangelo 15-Feb-1994
|
|
|
|
Environment:
|
|
|
|
Kernel mode only.
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
//jnfix - this module current only deals with errors initiated by the
|
|
//jnfix - T2, there is nothing completed for CPU Asic errors
|
|
|
|
#include "halp.h"
|
|
#include "gamma.h"
|
|
#include "axp21164.h"
|
|
#include "stdio.h"
|
|
|
|
//
|
|
// Declare the extern variable UncorrectableError declared in
|
|
// inithal.c.
|
|
//
|
|
extern PERROR_FRAME PUncorrectableError;
|
|
|
|
extern ULONG HalDisablePCIParityChecking;
|
|
extern ULONG HalpMemorySlot[];
|
|
extern ULONG HalpCPUSlot[];
|
|
|
|
ULONG SlotToPhysicalCPU[4] = {3, 0, 1, 2};
|
|
|
|
typedef BOOLEAN (*PSECOND_LEVEL_DISPATCH)(
|
|
PKINTERRUPT InterruptObject,
|
|
PVOID ServiceContext
|
|
);
|
|
|
|
ULONG
|
|
HalpTranslateSyndromToECC(
|
|
PULONG Syndrome
|
|
);
|
|
|
|
ULONG SGLCorrectedErrors = 0;
|
|
|
|
//
|
|
// PCI Config space access in progress. The READ_CONFIG_* routines
|
|
// must set this value to the correct return address before performing
|
|
// config space reads. The machine check handler will use this value for
|
|
// the return address if a machine check is detected from a config
|
|
// space read.
|
|
//
|
|
LONG HalpConfigIoAccess = 0; // Machine check return address
|
|
|
|
VOID
|
|
HalpSetMachineCheckEnables(
|
|
IN BOOLEAN DisableMachineChecks,
|
|
IN BOOLEAN DisableProcessorCorrectables,
|
|
IN BOOLEAN DisableSystemCorrectables
|
|
);
|
|
|
|
VOID
|
|
HalpSableReportFatalError(
|
|
VOID
|
|
);
|
|
|
|
#define MAX_ERROR_STRING 128
|
|
|
|
|
|
VOID
|
|
HalpInitializeMachineChecks(
|
|
IN BOOLEAN ReportCorrectableErrors
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine initializes machine check handling for an APECS-based
|
|
system by clearing all pending errors in the COMANCHE and EPIC and
|
|
enabling correctable errors according to the callers specification.
|
|
|
|
Arguments:
|
|
|
|
ReportCorrectableErrors - Supplies a boolean value which specifies
|
|
if correctable error reporting should be
|
|
enabled.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
T2_CERR1 Cerr1;
|
|
T2_PERR1 Perr1;
|
|
T2_IOCSR Iocsr;
|
|
|
|
//
|
|
// Clear any pending CBUS errors.
|
|
//
|
|
|
|
Cerr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr1 );
|
|
WRITE_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr1, Cerr1.all );
|
|
|
|
//
|
|
// Clear any pending PCI errors.
|
|
//
|
|
|
|
Perr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Perr1 );
|
|
|
|
Perr1.ForceReadDataParityError64 = 0;
|
|
Perr1.ForceAddressParityError64 = 0;
|
|
Perr1.ForceWriteDataParityError64 = 0;
|
|
Perr1.DetectTargetAbort = 1;
|
|
|
|
WRITE_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Perr1, Perr1.all );
|
|
|
|
//
|
|
// Enable the errors we want to handle in the T2 via the Iocsr,
|
|
// must read-modify-write Iocsr as it contains values we want to
|
|
// preserve.
|
|
//
|
|
|
|
Iocsr.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Iocsr );
|
|
|
|
//
|
|
// Enable all of the hard error checking and error interrupts.
|
|
//
|
|
|
|
Iocsr.EnableTlbErrorCheck = 1;
|
|
Iocsr.EnableCxAckCheckForDma = 1;
|
|
// Iocsr.EnableCommandOutOfSyncCheck = 1;
|
|
Iocsr.EnableCbusErrorInterrupt = 1;
|
|
Iocsr.EnableCbusParityCheck = 1;
|
|
|
|
#if 0
|
|
//
|
|
// T3 Bug: There are 2 write buffers which can be used for PIO or
|
|
// PPC. By default they are initialized to PIO. However, using
|
|
// them for PIO causes T3 state machine errors. To work around this
|
|
// problem convert them to PPC buffers, instead. This decreases PIO
|
|
// performance.
|
|
//
|
|
|
|
if (Iocsr.T2RevisionNumber >= 4) {
|
|
|
|
Iocsr.EnablePpc1 = 1;
|
|
Iocsr.EnablePpc2 = 1;
|
|
|
|
}
|
|
#endif // wkc - the SRM sets this now....
|
|
|
|
Iocsr.ForcePciRdpeDetect = 0;
|
|
Iocsr.ForcePciApeDetect = 0;
|
|
Iocsr.ForcePciWdpeDetect = 0;
|
|
Iocsr.EnablePciNmi = 1;
|
|
Iocsr.EnablePciDti = 1;
|
|
Iocsr.EnablePciSerr = 1;
|
|
|
|
if (HalDisablePCIParityChecking == 0xffffffff) {
|
|
|
|
//
|
|
// Disable PCI Parity Checking
|
|
//
|
|
|
|
Iocsr.EnablePciPerr = 0;
|
|
Iocsr.EnablePciRdp = 0;
|
|
Iocsr.EnablePciAp = 0;
|
|
Iocsr.EnablePciWdp = 0;
|
|
|
|
} else {
|
|
|
|
Iocsr.EnablePciPerr = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciRdp = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciAp = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciWdp = !HalDisablePCIParityChecking;
|
|
|
|
}
|
|
|
|
WRITE_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Iocsr, Iocsr.all );
|
|
|
|
//
|
|
// Ascertain whether this is a Gamma or Lynx platform.
|
|
//
|
|
|
|
if( Iocsr.T2RevisionNumber >= 4 ){
|
|
|
|
HalpLynxPlatform = TRUE;
|
|
|
|
}
|
|
|
|
//
|
|
// Set the machine check enables within the EV5.
|
|
//
|
|
|
|
if( ReportCorrectableErrors == TRUE ){
|
|
HalpSetMachineCheckEnables( FALSE, FALSE, FALSE );
|
|
} else {
|
|
HalpSetMachineCheckEnables( FALSE, TRUE, TRUE );
|
|
}
|
|
|
|
{
|
|
//
|
|
// Clear any existing Rattler errors:
|
|
//
|
|
|
|
RATTLER_ESREG_CSR Esreg;
|
|
RATTLER_SIC_CSR Sicr;
|
|
|
|
Esreg.all =
|
|
READ_CPU_REGISTER(&((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg);
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg,
|
|
Esreg.all);
|
|
|
|
|
|
Sicr.all = 0;
|
|
Sicr.SystemEventClear = 1;
|
|
Sicr.SystemBusErrorInterruptClear0 = 1;
|
|
Sicr.SystemBusErrorInterruptClear1 = 1;
|
|
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Sicr,
|
|
Sicr.all);
|
|
|
|
}
|
|
|
|
#if defined(XIO_PASS1) || defined(XIO_PASS2)
|
|
|
|
//
|
|
// The next line *may* generate a machine check. This would happen
|
|
// if an XIO module is not present in the system. It should be safe
|
|
// to take machine checks now. Here goes nothing...
|
|
//
|
|
|
|
Iocsr.all = READ_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Iocsr );
|
|
|
|
if( Iocsr.all != (ULONGLONG)-1 ){
|
|
|
|
HalpXioPresent = TRUE;
|
|
|
|
//
|
|
// Clear any pending CBUS errors.
|
|
//
|
|
|
|
Cerr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Cerr1 );
|
|
WRITE_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Cerr1, Cerr1.all );
|
|
|
|
//
|
|
// Clear any pending PCI errors.
|
|
//
|
|
|
|
Perr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Perr1 );
|
|
|
|
Perr1.ForceReadDataParityError64 = 0;
|
|
Perr1.ForceAddressParityError64 = 0;
|
|
Perr1.ForceWriteDataParityError64 = 0;
|
|
Perr1.DetectTargetAbort = 1;
|
|
|
|
WRITE_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Perr1, Perr1.all );
|
|
|
|
Iocsr.EnableTlbErrorCheck = 1;
|
|
Iocsr.EnableCxAckCheckForDma = 1;
|
|
// Iocsr.EnableCommandOutOfSyncCheck = 1;
|
|
Iocsr.EnableCbusErrorInterrupt = 1;
|
|
Iocsr.EnableCbusParityCheck = 1;
|
|
|
|
//
|
|
// T3 Bug: There are 2 write buffers which can be used for PIO or
|
|
// PPC. By default they are initialized to PIO. However, using
|
|
// them for PIO causes T3 state machine errors. To work around
|
|
// this problem convert them to PPC buffers, instead. This
|
|
// decreases PIO performance.
|
|
//
|
|
|
|
Iocsr.EnablePpc1 = 1;
|
|
Iocsr.EnablePpc2 = 1;
|
|
|
|
Iocsr.EnablePciStall = 0;
|
|
Iocsr.ForcePciRdpeDetect = 0;
|
|
Iocsr.ForcePciApeDetect = 0;
|
|
Iocsr.ForcePciWdpeDetect = 0;
|
|
Iocsr.EnablePciNmi = 1;
|
|
Iocsr.EnablePciDti = 1;
|
|
Iocsr.EnablePciSerr = 1;
|
|
|
|
if (HalDisablePCIParityChecking == 0xffffffff) {
|
|
|
|
//
|
|
// Disable PCI Parity Checking
|
|
//
|
|
|
|
Iocsr.EnablePciRdp64 = 0;
|
|
Iocsr.EnablePciAp64 = 0;
|
|
Iocsr.EnablePciWdp64 = 0;
|
|
Iocsr.EnablePciPerr = 0;
|
|
Iocsr.EnablePciRdp = 0;
|
|
Iocsr.EnablePciAp = 0;
|
|
Iocsr.EnablePciWdp = 0;
|
|
|
|
} else {
|
|
|
|
Iocsr.EnablePciRdp64 = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciAp64 = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciWdp64 = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciPerr = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciRdp = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciAp = !HalDisablePCIParityChecking;
|
|
Iocsr.EnablePciWdp = !HalDisablePCIParityChecking;
|
|
|
|
}
|
|
|
|
WRITE_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Iocsr,
|
|
Iocsr.all );
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HALDBG
|
|
if (HalDisablePCIParityChecking == 0) {
|
|
DbgPrint("gammaerr: PCI Parity Checking ON\n");
|
|
} else if (HalDisablePCIParityChecking == 1) {
|
|
DbgPrint("gammaerr: PCI Parity Checking OFF\n");
|
|
} else {
|
|
DbgPrint("gammaerr: PCI Parity Checking OFF - not set by ARC yet\n");
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
VOID
|
|
HalpGammaCorrectableInterrupt(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is the interrupt handler for an Gamma Correctable errors.
|
|
This function does nothing.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
return;
|
|
}
|
|
|
|
|
|
VOID
|
|
HalpBuildGammaUncorrectableErrorFrame(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is called when an uncorrectable error occurs.
|
|
This routine builds the global Sable Uncorrectable Error frame.
|
|
|
|
Arguments:
|
|
|
|
|
|
Return Value:
|
|
|
|
|
|
--*/
|
|
{
|
|
//
|
|
// We will *try* to get the CPU module information that was active at the
|
|
// time of the machine check.
|
|
// We will *try* to get as much information about the system, the CPU
|
|
// modules and the memory modules at the time of the crash.
|
|
//
|
|
extern ULONG HalpLogicalToPhysicalProcessor[HAL_MAXIMUM_PROCESSOR+1];
|
|
|
|
//
|
|
// SABLE_CPU_CSRS is defined to be RATTLER_CPU_CSRS in gamma.h
|
|
//
|
|
extern PSABLE_CPU_CSRS HalpSableCpuCsrs[HAL_MAXIMUM_PROCESSOR+1];
|
|
extern KAFFINITY HalpActiveProcessors;
|
|
|
|
PSABLE_CPU_CSRS CpuCsrsQva;
|
|
PGAMMA_UNCORRECTABLE_FRAME gammauncorrerr = NULL;
|
|
PEXTENDED_ERROR PExtErr;
|
|
ULONG LogicalCpuNumber;
|
|
ULONG i = 0;
|
|
ULONG TotalNumberOfCpus = 0;
|
|
T2_IOCSR Iocsr;
|
|
T2_PERR1 Perr1;
|
|
T2_PERR2 Perr2;
|
|
|
|
if(PUncorrectableError){
|
|
gammauncorrerr = (PGAMMA_UNCORRECTABLE_FRAME)
|
|
PUncorrectableError->UncorrectableFrame.RawSystemInformation;
|
|
PExtErr = &PUncorrectableError->UncorrectableFrame.ErrorInformation;
|
|
}
|
|
|
|
if(gammauncorrerr){
|
|
//
|
|
// Get the Error registers from all the CPU modules.
|
|
// Although called CPU error this is sable specific and not CPU
|
|
// specific the CPU error itself will be logged in the EV4 error frame.
|
|
// HalpActiveProcessors is a mask of all processors that are active.
|
|
// 8 bits per byte to get the total number of bits in KAFFINITY
|
|
//
|
|
DbgPrint("gammaerr.c - HalpBuildGammaUncorrectableErrorFrame :\n");
|
|
for(i = 0 ; i < sizeof(KAFFINITY)*8 ; i++ ) {
|
|
if( (HalpActiveProcessors >> i) & 0x1UL) {
|
|
LogicalCpuNumber = i;
|
|
TotalNumberOfCpus++;
|
|
}
|
|
else
|
|
continue;
|
|
|
|
CpuCsrsQva = HalpSableCpuCsrs[LogicalCpuNumber];
|
|
|
|
DbgPrint("\tCurrent CPU Module's[LN#=%d] CSRS QVA = %08lx\n",
|
|
LogicalCpuNumber, CpuCsrsQva);
|
|
DbgPrint("\n\t CPU Module Error Log : \n");
|
|
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Esreg =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Esreg);
|
|
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuer =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Evbuer);
|
|
|
|
DbgPrint("\t\tEvbuer = %016Lx\n",
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuer);
|
|
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuear =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Evbuear);
|
|
|
|
PUncorrectableError->UncorrectableFrame.PhysicalAddress =
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuear;
|
|
PUncorrectableError->UncorrectableFrame.Flags.
|
|
PhysicalAddressValid = 1;
|
|
|
|
DbgPrint("\t\tEvbuear = %016Lx\n",
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuear);
|
|
|
|
//
|
|
// If the Parity Error Bit (bit 5 and bit 37) is Set then
|
|
// read the victim address.
|
|
//
|
|
|
|
if(
|
|
( gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuer
|
|
& ((ULONGLONG)1 << 5) ) ||
|
|
( gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuer
|
|
& ((ULONGLONG)1 << 37) )
|
|
){
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Vear =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Evbvear);
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid
|
|
= 1;
|
|
sprintf(PUncorrectableError->UncorrectableFrame.ErrorString,
|
|
"Parity Error on Victim Address");
|
|
PUncorrectableError->UncorrectableFrame.Flags.
|
|
MemoryErrorSource = SYSTEM_CACHE;
|
|
PUncorrectableError->UncorrectableFrame.PhysicalAddress =
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuear;
|
|
PUncorrectableError->UncorrectableFrame.Flags.
|
|
PhysicalAddressValid = 1;
|
|
PExtErr->CacheError.Flags.CacheBoardValid = 1;
|
|
PExtErr->CacheError.CacheBoardNumber = LogicalCpuNumber;
|
|
HalpGetProcessorInfo(&PExtErr->CacheError.ProcessorInfo);
|
|
|
|
}
|
|
if(
|
|
( gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuer
|
|
& ((ULONGLONG)1 << 4) ) ||
|
|
( gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuer
|
|
& ((ULONGLONG)1 << 36) )
|
|
){
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid
|
|
= 1;
|
|
sprintf(PUncorrectableError->UncorrectableFrame.ErrorString,
|
|
"Parity Error on Address-Cmd Bus");
|
|
PUncorrectableError->UncorrectableFrame.Flags.
|
|
MemoryErrorSource = SYSTEM_CACHE;
|
|
PUncorrectableError->UncorrectableFrame.PhysicalAddress =
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Uncorrectable.Evbuear;
|
|
PUncorrectableError->UncorrectableFrame.Flags.
|
|
PhysicalAddressValid = 1;
|
|
|
|
PExtErr->CacheError.Flags.CacheBoardValid = 1;
|
|
PExtErr->CacheError.CacheBoardNumber = LogicalCpuNumber;
|
|
HalpGetProcessorInfo(&PExtErr->CacheError.ProcessorInfo);
|
|
}
|
|
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Dter =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Dter);
|
|
DbgPrint("\t\tDter = %016Lx\n",
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Dter);
|
|
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Cberr =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Cber);
|
|
DbgPrint("\t\tCberr = %016Lx\n",
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Cberr);
|
|
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Cbeal =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Cbealr);
|
|
DbgPrint("\t\tCbeal = %016Lx\n",
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Cbeal);
|
|
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Cbeah =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Cbeahr);
|
|
DbgPrint("\t\tCbeah = %016Lx\n",
|
|
gammauncorrerr->CpuError[LogicalCpuNumber].Cbeah);
|
|
|
|
|
|
//
|
|
// Fill in some of the control registers in the configuration
|
|
// structures.
|
|
//
|
|
DbgPrint("\n\t CPU Module Configuration : \n");
|
|
gammauncorrerr->Configuration.CpuConfigs[LogicalCpuNumber].Creg =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Creg);
|
|
DbgPrint("\t\tCreg = %016Lx\n",
|
|
gammauncorrerr->Configuration.CpuConfigs[LogicalCpuNumber].Creg);
|
|
|
|
gammauncorrerr->Configuration.CpuConfigs[LogicalCpuNumber].Cbctl =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Cbcr);
|
|
DbgPrint("\t\tCbctl = %016Lx\n",
|
|
gammauncorrerr->Configuration.CpuConfigs[LogicalCpuNumber].Cbctl);
|
|
|
|
gammauncorrerr->Configuration.CpuConfigs[LogicalCpuNumber].Dtctr =
|
|
READ_CPU_REGISTER(&((PSABLE_CPU_CSRS)CpuCsrsQva)->Dtctr);
|
|
DbgPrint("\t\tDtctr = %016Lx\n",
|
|
gammauncorrerr->Configuration.CpuConfigs[LogicalCpuNumber].Dtctr);
|
|
|
|
}
|
|
|
|
gammauncorrerr->Configuration.NumberOfCpus = TotalNumberOfCpus;
|
|
DbgPrint("\tTotalNumberOfCpus = %d\n", TotalNumberOfCpus);
|
|
|
|
//
|
|
// Since I dont know how to get how many memory modules
|
|
// are available and which slots they are in we will skip
|
|
// the memory error logging. When we do this we will also fill in
|
|
// the memory configuration details.
|
|
//
|
|
|
|
//
|
|
// Get T2 errors.
|
|
//
|
|
DbgPrint("\n\tT2 Error Log :\n");
|
|
gammauncorrerr->IoChipsetError.Cerr1 =
|
|
READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr1 );
|
|
DbgPrint("\t\tCerr1 = %016Lx\n",
|
|
gammauncorrerr->IoChipsetError.Cerr1);
|
|
|
|
Perr1.all = gammauncorrerr->IoChipsetError.Perr1 =
|
|
READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Perr1 );
|
|
DbgPrint("\t\tPerr1 = %016Lx\n",
|
|
gammauncorrerr->IoChipsetError.Perr1);
|
|
|
|
gammauncorrerr->IoChipsetError.Cerr2 =
|
|
READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr2 );
|
|
DbgPrint("\t\tCerr2 = %016Lx\n",
|
|
gammauncorrerr->IoChipsetError.Cerr2);
|
|
|
|
gammauncorrerr->IoChipsetError.Cerr3 =
|
|
READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr3 );
|
|
DbgPrint("\t\tCerr3 = %016Lx\n",
|
|
gammauncorrerr->IoChipsetError.Cerr3);
|
|
|
|
Perr2.all = gammauncorrerr->IoChipsetError.Perr2 =
|
|
READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Perr2 );
|
|
DbgPrint("\t\tPerr2 = %016Lx\n",
|
|
gammauncorrerr->IoChipsetError.Perr2);
|
|
|
|
if( (Perr1.WriteDataParityError == 1) ||
|
|
(Perr1.AddressParityError == 1) ||
|
|
(Perr1.ReadDataParityError == 1) ||
|
|
(Perr1.ParityError == 1) ||
|
|
(Perr1.SystemError == 1) ||
|
|
(Perr1.NonMaskableInterrupt == 1) ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.PhysicalAddress =
|
|
Perr2.ErrorAddress;
|
|
PUncorrectableError->UncorrectableFrame.Flags.
|
|
PhysicalAddressValid = 1;
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// T2 Configurations
|
|
//
|
|
DbgPrint("\n\tT2 Configuration :\n");
|
|
Iocsr.all = gammauncorrerr->Configuration.T2IoCsr =
|
|
READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Iocsr );
|
|
DbgPrint("\t\tIocsr = %016Lx\n",
|
|
gammauncorrerr->Configuration.T2IoCsr);
|
|
|
|
gammauncorrerr->Configuration.T2Revision = Iocsr.T2RevisionNumber;
|
|
DbgPrint("\t\tT2 Revision = %d\n",
|
|
gammauncorrerr->Configuration.T2Revision);
|
|
|
|
|
|
}
|
|
|
|
//
|
|
// Now fill in the Extended error information.
|
|
//
|
|
return;
|
|
}
|
|
|
|
|
|
VOID
|
|
HalpGammaErrorInterrupt(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is the interrupt handler for an Gamma machine check interrupt
|
|
The function calls HalpSableReportFatalError()
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
None. If a Fatal Error is detected the system is crashed.
|
|
|
|
--*/
|
|
{
|
|
RATTLER_ESREG_CSR Esreg;
|
|
RATTLER_SIC_CSR Sicr;
|
|
|
|
Esreg.all =
|
|
READ_CPU_REGISTER(&((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg);
|
|
|
|
if( TRUE ){ //Esreg.EvNoResponse1 == 1 ){
|
|
|
|
//
|
|
// Dismiss the CBUS timeout errors and return. Let the machine check
|
|
// handler handle the PCI fixup.
|
|
//
|
|
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg,
|
|
Esreg.all
|
|
);
|
|
|
|
Sicr.all = 0;
|
|
Sicr.SystemBusErrorInterruptClear0 = 1;
|
|
Sicr.SystemBusErrorInterruptClear1 = 1;
|
|
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Sicr,
|
|
Sicr.all
|
|
);
|
|
|
|
//
|
|
// Read the SICR to force the write to complete. Otherwise the CPU
|
|
// can unwind from the interrupt before the wrattler completes
|
|
// the processing of the write, causing another interrupt to be
|
|
// taken. This read forces the write to fully complete before
|
|
// proceeding.
|
|
//
|
|
Sicr.all =
|
|
READ_CPU_REGISTER(&((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Sicr);
|
|
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Report the error and crash the system
|
|
//
|
|
HalpBuildGammaUncorrectableErrorFrame();
|
|
|
|
if(PUncorrectableError) {
|
|
PUncorrectableError->UncorrectableFrame.Flags.SystemInformationValid =
|
|
1;
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
sprintf(PUncorrectableError->UncorrectableFrame.ErrorString,
|
|
"Gamma: Uncorrectable Error interrupt from T2");
|
|
}
|
|
|
|
|
|
HalpSableReportFatalError();
|
|
|
|
KeBugCheckEx( DATA_BUS_ERROR,
|
|
0xfacefeed, //jnfix - quick error interrupt id
|
|
0,
|
|
0,
|
|
(ULONG)PUncorrectableError );
|
|
|
|
|
|
return; // never
|
|
}
|
|
|
|
|
|
BOOLEAN
|
|
HalpIsaLegacyMemoryAccess(
|
|
ULONGLONG PA
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Checks for ISA legacy memory access on PCI bus1
|
|
|
|
Arguments:
|
|
|
|
PA Physical Address
|
|
|
|
Return Value:
|
|
|
|
true/false
|
|
|
|
--*/
|
|
|
|
{
|
|
|
|
if ( PA >= GAMMA_PCI1_SPARSE_MEMORY_PHYSICAL &&
|
|
PA < GAMMA_PCI1_SPARSE_ISA_LEGACY_MEMORY_PHYSICAL) {
|
|
return TRUE;
|
|
} else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
BOOLEAN
|
|
HalpIsaLegacyIOAccess(
|
|
ULONGLONG PA
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Checks for ISA legacy I/O access on PCI bus1
|
|
|
|
The rules are:
|
|
|
|
The first 4K of IO space is ISA legacy.
|
|
Each 1K after that, the first 256 bytes (of that 1K) is NOT ISA legacy, but
|
|
the other section (768 bytes) is.
|
|
|
|
|
|
Arguments:
|
|
|
|
PA Physical Address
|
|
|
|
Return Value:
|
|
|
|
true/false
|
|
|
|
--*/
|
|
|
|
{
|
|
ULONG LowOrder;
|
|
|
|
//
|
|
// First check the range...
|
|
//
|
|
|
|
if ( PA >= GAMMA_PCI1_SPARSE_IO_PHYSICAL &&
|
|
PA < GAMMA_PCI1_SPARSE_ISA_LEGACY_IO_PHYSICAL) {
|
|
|
|
//
|
|
// Whack off high order physical address bits and shift down
|
|
// by the IO bit shift
|
|
//
|
|
|
|
LowOrder = ((ULONG)(PA)) >> IO_BIT_SHIFT;
|
|
|
|
//
|
|
// Less than 4K?
|
|
//
|
|
|
|
if (LowOrder < 0x1000) {
|
|
return TRUE;
|
|
}
|
|
|
|
//
|
|
// Modulo 1K (0400)
|
|
//
|
|
|
|
LowOrder &= 0x3ff;
|
|
|
|
//
|
|
// first 256 bytes?
|
|
//
|
|
|
|
if (LowOrder < 0x100) {
|
|
return FALSE;
|
|
} else {
|
|
return TRUE;
|
|
}
|
|
} else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
BOOLEAN
|
|
HalpPlatformMachineCheck(
|
|
IN PEXCEPTION_RECORD ExceptionRecord,
|
|
IN PKEXCEPTION_FRAME ExceptionFrame,
|
|
IN PKTRAP_FRAME TrapFrame
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is given control when an hard error is acknowledged
|
|
by the APECS chipset. The routine is given the chance to
|
|
correct and dismiss the error.
|
|
|
|
Arguments:
|
|
|
|
ExceptionRecord - Supplies a pointer to the exception record generated
|
|
at the point of the exception.
|
|
|
|
ExceptionFrame - Supplies a pointer to the exception frame generated
|
|
at the point of the exception.
|
|
|
|
TrapFrame - Supplies a pointer to the trap frame generated
|
|
at the point of the exception.
|
|
|
|
Return Value:
|
|
|
|
TRUE is returned if the machine check has been handled and dismissed -
|
|
indicating that execution can continue. FALSE is return otherwise.
|
|
|
|
--*/
|
|
{
|
|
T2_CERR1 Cerr1;
|
|
T2_PERR1 Perr1;
|
|
T2_PERR2 Perr2;
|
|
PLOGOUT_FRAME_21164 LogoutFrame;
|
|
ULONGLONG PA;
|
|
enum {
|
|
Pci0ConfigurationSpace,
|
|
Pci1ConfigurationSpace,
|
|
MemCsrSpace,
|
|
CPUCsrSpace,
|
|
#if defined(XIO_PASS1) || defined(XIO_PASS2)
|
|
T4CsrSpace
|
|
#endif
|
|
} AddressSpace;
|
|
PVOID TxCsrQva;
|
|
PALPHA_INSTRUCTION FaultingInstruction;
|
|
|
|
RATTLER_ESREG_CSR Esreg;
|
|
RATTLER_SIC_CSR Sicr;
|
|
CHAR ErrSpace[32];
|
|
|
|
//
|
|
// Check if there are any CBUS errors pending. Any of these errors
|
|
// are fatal.
|
|
//
|
|
|
|
Cerr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr1 );
|
|
|
|
if( (Cerr1.UncorrectableReadError == 1) ||
|
|
(Cerr1.NoAcknowledgeError == 1) ||
|
|
(Cerr1.CommandAddressParityError == 1) ||
|
|
(Cerr1.MissedCommandAddressParity == 1) ||
|
|
(Cerr1.ResponderWriteDataParityError == 1) ||
|
|
(Cerr1.MissedRspWriteDataParityError == 1) ||
|
|
(Cerr1.ReadDataParityError == 1) ||
|
|
(Cerr1.MissedReadDataParityError == 1) ||
|
|
(Cerr1.CmdrWriteDataParityError == 1) ||
|
|
(Cerr1.BusSynchronizationError == 1) ||
|
|
(Cerr1.InvalidPfnError == 1) ){
|
|
|
|
#if HALDBG
|
|
DbgPrint("HalpPlatformMachineCheck: T2 CERR1 = %Lx\n", Cerr1.all);
|
|
#endif
|
|
sprintf(ErrSpace,"System Bus");
|
|
PUncorrectableError->UncorrectableFrame.Flags.AddressSpace =
|
|
IO_SPACE;
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.Interface = CBus;
|
|
|
|
goto FatalError;
|
|
|
|
}
|
|
|
|
#if defined(XIO_PASS1) || defined(XIO_PASS2)
|
|
|
|
if( HalpXioPresent ){
|
|
|
|
Cerr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Cerr1);
|
|
|
|
if( (Cerr1.UncorrectableReadError == 1) ||
|
|
(Cerr1.NoAcknowledgeError == 1) ||
|
|
(Cerr1.CommandAddressParityError == 1) ||
|
|
(Cerr1.MissedCommandAddressParity == 1) ||
|
|
(Cerr1.ResponderWriteDataParityError == 1) ||
|
|
(Cerr1.MissedRspWriteDataParityError == 1) ||
|
|
(Cerr1.ReadDataParityError == 1) ||
|
|
(Cerr1.MissedReadDataParityError == 1) ||
|
|
(Cerr1.CmdrWriteDataParityError == 1) ||
|
|
(Cerr1.BusSynchronizationError == 1) ||
|
|
(Cerr1.InvalidPfnError == 1) ){
|
|
|
|
#if HALDBG
|
|
DbgPrint("HalpPlatformMachineCheck: T4 CERR1 = %Lx\n",
|
|
Cerr1.all);
|
|
#endif
|
|
|
|
sprintf(ErrSpace,"System Bus");
|
|
PUncorrectableError->UncorrectableFrame.Flags.AddressSpace =
|
|
IO_SPACE;
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.Interface = CBus;
|
|
goto FatalError;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
//
|
|
// Check if there are any non-recoverable PCI errors.
|
|
//
|
|
|
|
Perr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Perr1 );
|
|
|
|
if( (Perr1.WriteDataParityError == 1) ||
|
|
(Perr1.AddressParityError == 1) ||
|
|
(Perr1.ReadDataParityError == 1) ||
|
|
(Perr1.ParityError == 1) ||
|
|
(Perr1.SystemError == 1) ||
|
|
(Perr1.NonMaskableInterrupt == 1) ){
|
|
|
|
#if HALDBG
|
|
DbgPrint("HalpPlatformMachineCheck: T2 PERR1 = %Lx\n", Perr1.all);
|
|
#endif
|
|
|
|
sprintf(ErrSpace,"PCI Bus");
|
|
PUncorrectableError->UncorrectableFrame.Flags.AddressSpace =
|
|
IO_SPACE;
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.Interface = PCIBus;
|
|
goto FatalError;
|
|
|
|
}
|
|
|
|
#if defined(XIO_PASS1) || defined(XIO_PASS2)
|
|
|
|
if( HalpXioPresent ){
|
|
|
|
Perr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T4_CSRS_QVA))->Perr1 );
|
|
|
|
if( (Perr1.WriteDataParityError == 1) ||
|
|
(Perr1.AddressParityError == 1) ||
|
|
(Perr1.ReadDataParityError == 1) ||
|
|
(Perr1.ParityError == 1) ||
|
|
(Perr1.SystemError == 1) ||
|
|
(Perr1.NonMaskableInterrupt == 1) ||
|
|
(Perr1.PpcSizeError == 1) ||
|
|
(Perr1.WriteDataParityError64 == 1) ||
|
|
(Perr1.AddressParityError64 == 1) ||
|
|
(Perr1.ReadDataParityError64 == 1) ||
|
|
(Perr1.TargetAbort == 1) ){
|
|
|
|
#if HALDBG
|
|
DbgPrint("HalpPlatformMachineCheck: T4 PERR1 = %Lx\n",
|
|
Perr1.all);
|
|
#endif
|
|
|
|
sprintf(ErrSpace,"PCI Configuration");
|
|
PUncorrectableError->UncorrectableFrame.Flags.AddressSpace =
|
|
IO_SPACE;
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.Interface = PCIBus;
|
|
|
|
goto FatalError;
|
|
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
//
|
|
// Get a pointer to the EV5 machine check logout frame.
|
|
//
|
|
|
|
LogoutFrame =
|
|
(PLOGOUT_FRAME_21164)ExceptionRecord->ExceptionInformation[1];
|
|
|
|
//
|
|
// Get the physical address which caused the machine check.
|
|
//
|
|
|
|
PA = LogoutFrame->EiAddr.EiAddr << 4;
|
|
|
|
//
|
|
// We handle and dismiss 3 classes of machine checks:
|
|
//
|
|
// - Read accesses from PCI 0 configuration space
|
|
// - Read accesses from PCI 1 configuration space
|
|
// - Read accesses from T4 CSR space
|
|
//
|
|
// Any other type of machine check is fatal.
|
|
//
|
|
// The following set of conditionals check which address space the
|
|
// machine check occured in, to decide how to handle it.
|
|
//
|
|
|
|
if( (PA >= GAMMA_PCI0_CONFIGURATION_PHYSICAL) &&
|
|
(PA < GAMMA_PCI1_CONFIGURATION_PHYSICAL) ){
|
|
|
|
//
|
|
// The machine check occured in PCI 0 configuration space. Save
|
|
// the address space and a QVA to T2 CSR space, we'll need them
|
|
// below.
|
|
//
|
|
|
|
AddressSpace = Pci0ConfigurationSpace;
|
|
TxCsrQva = (PVOID)T2_CSRS_QVA;
|
|
|
|
} else if( (PA >= GAMMA_PCI1_CONFIGURATION_PHYSICAL) &&
|
|
(PA < GAMMA_PCI0_SPARSE_IO_PHYSICAL) ){
|
|
|
|
//
|
|
// The machine check occured in PCI 1 configuration space.
|
|
// Save the address space and a QVA to T2 CSR space, we'll
|
|
// need them below.
|
|
//
|
|
|
|
AddressSpace = Pci1ConfigurationSpace;
|
|
TxCsrQva = (PVOID)T4_CSRS_QVA;
|
|
|
|
} else if( (PA >= GAMMA_CPU0_CSRS_PHYSICAL) &&
|
|
(PA <= GAMMA_CPU3_SICR_PHYSICAL)) {
|
|
|
|
//
|
|
// The machine check occured within CPU CSR space. Save
|
|
// the addres space, w'll need it below.
|
|
//
|
|
|
|
AddressSpace = CPUCsrSpace;
|
|
|
|
} else if( (PA >= GAMMA_MEM0_CSRS_PHYSICAL) &&
|
|
(PA < GAMMA_T2_CSRS_PHYSICAL)) {
|
|
|
|
//
|
|
// The machine check occured within MEM CSR space. Save
|
|
// the addres space, w'll need it below.
|
|
//
|
|
|
|
AddressSpace = MemCsrSpace;
|
|
|
|
} else
|
|
|
|
#if defined(XIO_PASS1) || defined(XIO_PASS2)
|
|
|
|
if( (PA >= GAMMA_T4_CSRS_PHYSICAL) &&
|
|
(PA < GAMMA_PCI0_CONFIGURATION_PHYSICAL) ){
|
|
|
|
//
|
|
// The machine check occured within T4 CSR space. Save
|
|
// the address space, we'll need it below.
|
|
//
|
|
|
|
AddressSpace = T4CsrSpace;
|
|
|
|
} else if (HalpIsaLegacyMemoryAccess(PA) ||
|
|
HalpIsaLegacyIOAccess(PA)) {
|
|
|
|
#if HALDBG
|
|
if (HalpIsaLegacyMemoryAccess(PA)) {
|
|
DbgPrint("Isa Legacy Memory access on PCI1: PA: %Lx \n", PA);
|
|
} else if (HalpIsaLegacyIOAccess(PA)) {
|
|
DbgPrint("Isa Legacy I/O access on PCI1: PA: %Lx \n", PA);
|
|
}
|
|
#endif
|
|
//
|
|
// Check for a master abort under within the first 1Mb of
|
|
// sparse space to fix broken drivers that sniff ISA legacy
|
|
// space on PCI slot 1 (which has no ISA address space...)
|
|
// This happens when a dorky driver was pokes around ISA legacy space
|
|
// on our second (peer) PCI bus -- which has no ISA legacy space.
|
|
// We attempt to silently return all ff's -- and look like there
|
|
// is non-responding ISA space...
|
|
//
|
|
|
|
Esreg.all = READ_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg );
|
|
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg, Esreg.all );
|
|
|
|
Sicr.all = 0;
|
|
Sicr.SystemBusErrorInterruptClear0 = 1;
|
|
Sicr.SystemBusErrorInterruptClear1 = 1;
|
|
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Sicr, Sicr.all );
|
|
|
|
//
|
|
// Read the SICR to force the write to complete. Otherwise the CPU
|
|
// can unwind from the machine check before the rattler completes
|
|
// the processing of the write. This read forces the write to
|
|
// fully complete before proceeding.
|
|
//
|
|
|
|
Sicr.all = READ_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Sicr );
|
|
|
|
|
|
//
|
|
// Our I/O access routines preload -1 into V0, so if we simply return,
|
|
// V0 should correctly be set to all ff's
|
|
//
|
|
|
|
return TRUE;
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
|
//
|
|
// Just based on the physical address, we have determined
|
|
// we cannot handle this machine check.
|
|
//
|
|
|
|
goto FatalError;
|
|
|
|
}
|
|
|
|
//
|
|
// The configuration space and T2 register access routines set
|
|
// HalpConfigIoAccess to point to the faulting instruction. If it
|
|
// is non-NULL update the exception frame to reflect the real
|
|
// address of the faulting instruction.
|
|
//
|
|
|
|
if( HalpConfigIoAccess != 0 ){
|
|
|
|
TrapFrame->Fir = (LONGLONG)HalpConfigIoAccess;
|
|
|
|
}
|
|
|
|
//
|
|
// Get a pointer to the faulting instruction. (It is possible
|
|
// that the exception address is actually an instruction or two
|
|
// beyond the instruction which actually caused the machine check.)
|
|
//
|
|
|
|
FaultingInstruction = (PALPHA_INSTRUCTION)TrapFrame->Fir;
|
|
|
|
//
|
|
// There are typically 2 MBs which follow the load which caused the
|
|
// machine check. The exception address could be one of them.
|
|
// If it is, advance the instruction pointer ahead of them.
|
|
//
|
|
|
|
while( (FaultingInstruction->Memory.Opcode == MEMSPC_OP) &&
|
|
(FaultingInstruction->Memory.MemDisp == MB_FUNC) ){
|
|
|
|
FaultingInstruction--;
|
|
|
|
}
|
|
|
|
//
|
|
// If the instruction uses v0 as Ra (i.e. v0 is the target register
|
|
// of the instruction) then this would typically indicate an T2 or
|
|
// configuration space access routine, and getting a machine check
|
|
// therein is acceptable. Otherwise, we took it someplace else, and
|
|
// it is fatal.
|
|
//
|
|
|
|
if( FaultingInstruction->Memory.Ra != V0_REG ){
|
|
|
|
goto FatalError;
|
|
|
|
}
|
|
|
|
//
|
|
// Perform address space-dependent handling.
|
|
//
|
|
|
|
switch( AddressSpace ){
|
|
|
|
#if defined(XIO_PASS1) || defined(XIO_PASS2)
|
|
|
|
case Pci1ConfigurationSpace:
|
|
|
|
//
|
|
// If no XIO module is present then we do not fix-up read accesses
|
|
// from PCI 1 configuration space. (This should never happen.)
|
|
//
|
|
|
|
if( !HalpXioPresent ){
|
|
|
|
goto FatalError;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
case Pci0ConfigurationSpace:
|
|
|
|
//
|
|
// Read the state of the T2/T4.
|
|
//
|
|
|
|
Perr1.all = READ_T2_REGISTER( &((PT2_CSRS)(TxCsrQva))->Perr1 );
|
|
Perr2.all = READ_T2_REGISTER( &((PT2_CSRS)(TxCsrQva))->Perr2 );
|
|
Cerr1.all = READ_T2_REGISTER( &((PT2_CSRS)(TxCsrQva))->Cerr1 );
|
|
|
|
//
|
|
// The T2/T4 responds differently when an error was received
|
|
// on type 0 and type 1 configuration cycles. For type 0 the
|
|
// T2/T4 detects and reports the device timeout. For type 1
|
|
// the PPB detects the timeout. Type 0 cycles error with
|
|
// the DeviceTimeout bit set. Type 1 cycles look just like
|
|
// NXM. Thus, the code below requires both checks.
|
|
//
|
|
|
|
if( (Perr1.DeviceTimeoutError != 1) &&
|
|
((Perr1.all != 0) ||
|
|
(Cerr1.all != 0) ||
|
|
(Perr2.PciCommand != 0xA)) ){
|
|
|
|
goto FatalError;
|
|
|
|
}
|
|
|
|
//
|
|
// Clear any PCI or Cbus errors which may have been latched.
|
|
//
|
|
|
|
WRITE_T2_REGISTER( &((PT2_CSRS)(TxCsrQva))->Perr1, Perr1.all );
|
|
|
|
break;
|
|
|
|
#if defined(XIO_PASS1) || defined(XIO_PASS2)
|
|
|
|
case T4CsrSpace:
|
|
|
|
//
|
|
// A read was performed from T4 CSR space when no XIO module was
|
|
// present. This was done, presumably, to detect the presence of
|
|
// the T4, and correspondingly, the XIO module. There is nothing
|
|
// special to do in this case, just fix-up the reference and
|
|
// dismiss the machine check.
|
|
//
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
case MemCsrSpace:
|
|
case CPUCsrSpace:
|
|
|
|
//
|
|
// A read was performed from the Mem CSR space when no memory module was
|
|
// present. This was done, presumably, to detect the presence of
|
|
// a memory board.
|
|
//
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
//
|
|
// Dismiss the CBUS timeout errors and return. Let the machine
|
|
// check handler handle the PCI fixup.
|
|
//
|
|
// The Esreg.EVNoResponse bits get set on a PCI bus timeout. This
|
|
// generates an Error interrupt, which must be cleared in the Sicr.
|
|
// Clear the error bit here, dismissing the interrupt.
|
|
//
|
|
|
|
Esreg.all = READ_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg );
|
|
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg, Esreg.all );
|
|
|
|
Sicr.all = 0;
|
|
Sicr.SystemBusErrorInterruptClear0 = 1;
|
|
Sicr.SystemBusErrorInterruptClear1 = 1;
|
|
|
|
WRITE_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Sicr, Sicr.all );
|
|
|
|
//
|
|
// Read the SICR to force the write to complete. Otherwise the CPU
|
|
// can unwind from the machine check before the rattler completes
|
|
// the processing of the write. This read forces the write to
|
|
// fully complete before proceeding.
|
|
//
|
|
|
|
Sicr.all = READ_CPU_REGISTER( &((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Sicr );
|
|
|
|
//
|
|
// Advance the instruction pointer.
|
|
//
|
|
|
|
TrapFrame->Fir += 4;
|
|
|
|
//
|
|
// Make it appear as if the load instruction read all ones.
|
|
//
|
|
|
|
TrapFrame->IntV0 = (ULONGLONG)-1;
|
|
|
|
//
|
|
// Dismiss the machine check.
|
|
//
|
|
|
|
return TRUE;
|
|
|
|
//
|
|
// The system is not well and cannot continue reliable execution.
|
|
// Print some useful messages and return FALSE to indicate that the
|
|
// error was not handled.
|
|
//
|
|
|
|
FatalError:
|
|
//
|
|
// Build the error frame. Later may be move it in front and use
|
|
// the field in the error frame rather than reading the error registers
|
|
// twice.
|
|
//
|
|
|
|
HalpBuildGammaUncorrectableErrorFrame();
|
|
|
|
if(PUncorrectableError) {
|
|
PUncorrectableError->UncorrectableFrame.Flags.SystemInformationValid =
|
|
1;
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
sprintf(PUncorrectableError->UncorrectableFrame.ErrorString,
|
|
"Gamma: Uncorrectable Error detected in %s", ErrSpace);
|
|
}
|
|
|
|
|
|
HalpSableReportFatalError();
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
ULONG
|
|
HalpTranslateSyndromToECC(
|
|
IN OUT PULONG Syndrome
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Translate the syndrome to a particular bit. If the syndrome indicates
|
|
a data bit, then return 0, if a check bit, then return 1.
|
|
|
|
In the place of the incoming syndrome, stuff the resulting bit.
|
|
|
|
Arguments:
|
|
|
|
Syndrome Pointer to the syndrome
|
|
|
|
Return Value:
|
|
|
|
0 for data bit
|
|
1 for check bit
|
|
|
|
--*/
|
|
{
|
|
|
|
static UCHAR SyndromeToECCTable[0xff] = {0, };
|
|
static BOOLEAN SyndromeToECCTableInitialized = FALSE;
|
|
|
|
ULONG Temp = *Syndrome;
|
|
|
|
//
|
|
// Initialize the table.
|
|
//
|
|
|
|
if (!SyndromeToECCTableInitialized) {
|
|
SyndromeToECCTableInitialized = TRUE;
|
|
|
|
//
|
|
// fill in the table
|
|
//
|
|
|
|
SyndromeToECCTable[0x1] = 0;
|
|
SyndromeToECCTable[0x2] = 1;
|
|
SyndromeToECCTable[0x4] = 2;
|
|
SyndromeToECCTable[0x8] = 3;
|
|
SyndromeToECCTable[0x10] = 4;
|
|
SyndromeToECCTable[0x20] = 5;
|
|
SyndromeToECCTable[0x40] = 6;
|
|
|
|
SyndromeToECCTable[0x4F] = 0;
|
|
SyndromeToECCTable[0x4A] = 1;
|
|
SyndromeToECCTable[0x52] = 2;
|
|
SyndromeToECCTable[0x54] = 3;
|
|
SyndromeToECCTable[0x57] = 4;
|
|
SyndromeToECCTable[0x58] = 5;
|
|
SyndromeToECCTable[0x5B] = 6;
|
|
SyndromeToECCTable[0x5D] = 7;
|
|
SyndromeToECCTable[0x23] = 8;
|
|
SyndromeToECCTable[0x25] = 9;
|
|
SyndromeToECCTable[0x26] = 10;
|
|
SyndromeToECCTable[0x29] = 11;
|
|
SyndromeToECCTable[0x2A] = 12;
|
|
SyndromeToECCTable[0x2C] = 13;
|
|
SyndromeToECCTable[0x31] = 14;
|
|
SyndromeToECCTable[0x34] = 15;
|
|
SyndromeToECCTable[0x0E] = 16;
|
|
SyndromeToECCTable[0x0B] = 17;
|
|
SyndromeToECCTable[0x13] = 18;
|
|
SyndromeToECCTable[0x15] = 19;
|
|
SyndromeToECCTable[0x16] = 20;
|
|
SyndromeToECCTable[0x19] = 21;
|
|
SyndromeToECCTable[0x1A] = 22;
|
|
SyndromeToECCTable[0x1C] = 23;
|
|
SyndromeToECCTable[0x62] = 24;
|
|
SyndromeToECCTable[0x64] = 25;
|
|
SyndromeToECCTable[0x67] = 26;
|
|
SyndromeToECCTable[0x68] = 27;
|
|
SyndromeToECCTable[0x6B] = 28;
|
|
SyndromeToECCTable[0x6D] = 29;
|
|
SyndromeToECCTable[0x70] = 30;
|
|
SyndromeToECCTable[0x75] = 31;
|
|
}
|
|
|
|
*Syndrome = SyndromeToECCTable[Temp];
|
|
|
|
if (Temp == 0x01 || Temp == 0x02 || Temp == 0x04 || Temp == 0x08 ||
|
|
Temp == 0x10 || Temp == 0x20 || Temp == 0x40) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
VOID
|
|
HalpCPUCorrectableError(
|
|
IN ULONG PhysicalSlot,
|
|
IN OUT PCORRECTABLE_ERROR CorrPtr
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
We have determined that a correctable error has occurred on a CPU
|
|
module -- the only thing this can be is a Bcache error. Populate the
|
|
correctable error frame.
|
|
|
|
Arguments:
|
|
|
|
PhysicalSlot Physical CPU slot number
|
|
CorrPtr A pointer to the correctable error frame
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
|
|
GAMMA_ESREG_CSR1 CSR1;
|
|
GAMMA_EVBCER_CSR4 CSR4;
|
|
ULONG CERBase;
|
|
|
|
//
|
|
// Get CPU's bcache CSRs
|
|
//
|
|
|
|
CERBase = HalpCPUSlot[PhysicalSlot];
|
|
CSR1.all = READ_CPU_REGISTER((PVOID)(CERBase | 0x1));
|
|
CSR4.all = READ_CPU_REGISTER((PVOID)(CERBase | 0x4));
|
|
|
|
//
|
|
// Set the bits, one by one
|
|
//
|
|
|
|
CorrPtr->Flags.AddressSpace = 1; // memory space
|
|
CorrPtr->Flags.PhysicalAddressValid = 0;
|
|
CorrPtr->Flags.ErrorBitMasksValid = 0;
|
|
CorrPtr->Flags.ExtendedErrorValid = 1;
|
|
CorrPtr->Flags.ProcessorInformationValid = 1;
|
|
CorrPtr->Flags.SystemInformationValid = 0;
|
|
CorrPtr->Flags.ServerManagementInformationValid = 0;
|
|
CorrPtr->Flags.MemoryErrorSource = 2; // processor cache
|
|
|
|
CorrPtr->Flags.ScrubError = 0; // ??
|
|
CorrPtr->Flags.LostCorrectable = CSR4.MissedCorrectable0 |
|
|
CSR4.MissedCorrectable1;
|
|
|
|
|
|
CorrPtr->Flags.LostAddressSpace = 0;
|
|
CorrPtr->Flags.LostMemoryErrorSource = 0;
|
|
|
|
CorrPtr->PhysicalAddress = 0;
|
|
CorrPtr->DataBitErrorMask = 0;
|
|
CorrPtr->CheckBitErrorMask = 0;
|
|
|
|
CorrPtr->ErrorInformation.CacheError.Flags.CacheLevelValid = 0;
|
|
CorrPtr->ErrorInformation.CacheError.Flags.CacheBoardValid = 0;
|
|
CorrPtr->ErrorInformation.CacheError.Flags.CacheSimmValid = 0;
|
|
|
|
CorrPtr->ErrorInformation.CacheError.ProcessorInfo.ProcessorType = 21064;
|
|
CorrPtr->ErrorInformation.CacheError.ProcessorInfo.ProcessorRevision = 0;
|
|
CorrPtr->ErrorInformation.CacheError.ProcessorInfo.PhysicalProcessorNumber =
|
|
SlotToPhysicalCPU[PhysicalSlot];
|
|
CorrPtr->ErrorInformation.CacheError.ProcessorInfo.LogicalProcessorNumber = 0;
|
|
CorrPtr->ErrorInformation.CacheError.CacheLevel = 0;
|
|
CorrPtr->ErrorInformation.CacheError.CacheSimm = 0;
|
|
CorrPtr->ErrorInformation.CacheError.TransferType = 0;
|
|
|
|
CorrPtr->RawProcessorInformationLength = 0;
|
|
|
|
//
|
|
// wkc fix -- get info from the CER
|
|
//
|
|
|
|
}
|
|
|
|
|
|
VOID
|
|
HalpMemoryCorrectableError(
|
|
IN ULONG PhysicalSlot,
|
|
IN OUT PCORRECTABLE_ERROR CorrPtr
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
We have determined that a correctable error has occurred on a memory
|
|
module. Populate the correctable error frame.
|
|
|
|
Arguments:
|
|
|
|
PhysicalSlot The physical slot of the falting board
|
|
CorrPtr A pointer to the correctable error frame
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
SGL_MEM_CSR0 CSR;
|
|
ULONG CSRBase;
|
|
|
|
//
|
|
// Get MEM modules base addr
|
|
//
|
|
|
|
CSRBase = HalpMemorySlot[PhysicalSlot];
|
|
|
|
CSR.all = READ_MEM_REGISTER((PVOID)CSRBase);
|
|
|
|
//
|
|
// Set the bits, one by one
|
|
//
|
|
|
|
CorrPtr->Flags.AddressSpace = 0; // ??
|
|
CorrPtr->Flags.PhysicalAddressValid = 0;
|
|
CorrPtr->Flags.ErrorBitMasksValid = 0;
|
|
CorrPtr->Flags.ExtendedErrorValid = 1;
|
|
CorrPtr->Flags.ProcessorInformationValid = 0;
|
|
CorrPtr->Flags.SystemInformationValid = 0;
|
|
CorrPtr->Flags.ServerManagementInformationValid = 0;
|
|
CorrPtr->Flags.MemoryErrorSource = 4; // processor memory
|
|
|
|
CorrPtr->PhysicalAddress = 0;
|
|
CorrPtr->DataBitErrorMask = 0;
|
|
CorrPtr->CheckBitErrorMask = 0;
|
|
|
|
CorrPtr->ErrorInformation.MemoryError.Flags.MemoryBoardValid = 0;
|
|
CorrPtr->ErrorInformation.MemoryError.Flags.MemorySimmValid = 0;
|
|
|
|
CorrPtr->ErrorInformation.MemoryError.MemoryBoard = PhysicalSlot;
|
|
CorrPtr->ErrorInformation.MemoryError.MemorySimm = 0;
|
|
CorrPtr->ErrorInformation.MemoryError.TransferType = 0;
|
|
|
|
CorrPtr->RawProcessorInformationLength = 0;
|
|
|
|
//
|
|
// wkc fix -- get info from the CSR
|
|
//
|
|
|
|
}
|
|
|
|
|
|
VOID
|
|
HalpT2CorrectableError(
|
|
IN ULONG PhysicalSlot,
|
|
IN OUT PCORRECTABLE_ERROR CorrPtr
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
We have determined that a correctable error has occurred on the CBus.
|
|
Populate the correctable error frame.
|
|
|
|
Arguments:
|
|
|
|
Physical Slot
|
|
CorrPtr A pointer to the correctable error frame
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
//
|
|
// This should never be called, because there are no correctable T2 errors.
|
|
//
|
|
|
|
}
|
|
|
|
|
|
|
|
ULONG
|
|
HalpCheckCPUForError(
|
|
IN OUT PULONG Slot
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Check the CPU module CSR for BCACHE error.
|
|
|
|
Arguments:
|
|
|
|
Slot The return value for the slot if an error is found
|
|
|
|
Return Value:
|
|
|
|
Either CorrectableError or NoError
|
|
|
|
--*/
|
|
{
|
|
|
|
ULONG i;
|
|
|
|
GAMMA_EVBCER_CSR4 CSR4;
|
|
ULONG BaseCSRQVA;
|
|
|
|
//
|
|
// Run through the CPU modules looking for a correctable
|
|
// error.
|
|
//
|
|
|
|
for (i=0; i<4; i++) {
|
|
|
|
//
|
|
// If a cpu board is present, then use the QVA stored in that
|
|
// location -- if a CPU module is not present, then the value is 0.
|
|
//
|
|
|
|
if (HalpCPUSlot[i] != 0) {
|
|
|
|
BaseCSRQVA = HalpCPUSlot[i];
|
|
|
|
//
|
|
// Read the backup cache correctable error register (CSR1)
|
|
//
|
|
|
|
CSR4.all = READ_CPU_REGISTER((PVOID)(BaseCSRQVA | 0x4));
|
|
|
|
//
|
|
// Check the two correctable error bits -- if one at least one
|
|
// is set, then go off and build the frame and jump directly
|
|
// to the correctable error flow.
|
|
//
|
|
|
|
if (CSR4.MissedCorrectable0 ||
|
|
CSR4.MissedCorrectable1 ||
|
|
CSR4.CorrectableError0 ||
|
|
CSR4.CorrectableError1) {
|
|
|
|
*Slot = i;
|
|
return CorrectableError;
|
|
}
|
|
}
|
|
}
|
|
|
|
return NoError;
|
|
}
|
|
|
|
|
|
ULONG
|
|
HalpCheckMEMForError(
|
|
PULONG Slot
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Check the Memory module CSR for errors.
|
|
|
|
Arguments:
|
|
|
|
Slot The return value for the slot if an error is found
|
|
|
|
Return Value:
|
|
|
|
Either CorrectableError or NoError or UncorrectableError
|
|
|
|
--*/
|
|
{
|
|
|
|
SGL_MEM_CSR0 CSR;
|
|
ULONG i;
|
|
ULONG BaseCSRQVA;
|
|
|
|
//
|
|
// If we have fallen through the CPU correctable errors,
|
|
// check the Memory boards
|
|
//
|
|
|
|
for (i=0; i<4; i++) {
|
|
|
|
//
|
|
// If a memory board is present, then the value is the QVA of CSR0
|
|
// on that memory board. If not present, the value is 0.
|
|
//
|
|
|
|
if (HalpMemorySlot[i] != 0) {
|
|
|
|
BaseCSRQVA = HalpMemorySlot[i];
|
|
|
|
CSR.all = READ_MEM_REGISTER((PVOID)BaseCSRQVA);
|
|
|
|
//
|
|
// Sync Errors are NOT part of the summary registers (bogus
|
|
// if you ask me....), but check them first.
|
|
//
|
|
|
|
if (CSR.SyncError1 || CSR.SyncError2) {
|
|
*Slot = i;
|
|
return CorrectableError;
|
|
}
|
|
|
|
//
|
|
// The error summary bit indicates if ANY error bits are
|
|
// lit. If no error on this module, then skip to the next one.
|
|
//
|
|
|
|
if (CSR.ErrorSummary1 == 0 && CSR.ErrorSummary2 == 0) {
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// Because one of the summary registers are set, then this memory
|
|
// module has indicated an error. Check the correctable bits. If
|
|
// any are set, then build a correctable error frame, otherwise,
|
|
// drop back 20 and punt.
|
|
//
|
|
|
|
*Slot = i;
|
|
|
|
if (CSR.EDCCorrectable1 || CSR.EDCCorrectable2 ||
|
|
CSR.EDCMissdedCorrectable1 || CSR.EDCMissdedCorrectable2) {
|
|
|
|
return CorrectableError;
|
|
} else {
|
|
return UncorrectableError;
|
|
}
|
|
}
|
|
}
|
|
|
|
return NoError;
|
|
|
|
}
|
|
|
|
|
|
ULONG
|
|
HalpCheckT2ForError(
|
|
PULONG Slot
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Check the System Host Chips for Errors.
|
|
|
|
Arguments:
|
|
|
|
Slot The return value for the QVA of the T2 of an error is returned.
|
|
|
|
Return Value:
|
|
|
|
Either CorrectableError or NoError or UncorrectableError
|
|
|
|
--*/
|
|
{
|
|
T2_CERR1 Cerr1;
|
|
|
|
*Slot = 0;
|
|
|
|
//
|
|
// Run through the T2 chips (OK, they may be T2, or T3 or T4...)
|
|
// and check for correctable errors
|
|
//
|
|
|
|
Cerr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr1 );
|
|
|
|
if( (Cerr1.UncorrectableReadError == 1) ||
|
|
(Cerr1.NoAcknowledgeError == 1) ||
|
|
(Cerr1.CommandAddressParityError == 1) ||
|
|
(Cerr1.MissedCommandAddressParity == 1) ||
|
|
(Cerr1.ResponderWriteDataParityError == 1) ||
|
|
(Cerr1.MissedRspWriteDataParityError == 1) ||
|
|
(Cerr1.ReadDataParityError == 1) ||
|
|
(Cerr1.MissedReadDataParityError == 1) ||
|
|
(Cerr1.CmdrWriteDataParityError == 1) ||
|
|
(Cerr1.BusSynchronizationError == 1) ||
|
|
(Cerr1.InvalidPfnError == 1) ){
|
|
|
|
return UncorrectableError;
|
|
}
|
|
|
|
//
|
|
// There are no uncorrectable CBus errors
|
|
//
|
|
|
|
return NoError;
|
|
}
|
|
|
|
|
|
VOID
|
|
HalpSableErrorInterrupt(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This routine is entered as a result of an error interrupt from the
|
|
T2 on a Sable system. This function determines if the error is
|
|
fatal or recoverable and if recoverable performs the recovery and
|
|
error logging.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
|
|
static ERROR_FRAME Frame;
|
|
|
|
ULONG DetectedError;
|
|
|
|
ULONG Slot = 0;
|
|
PULONG DispatchCode;
|
|
PKINTERRUPT InterruptObject;
|
|
PKSPIN_LOCK ErrorlogSpinLock;
|
|
PCORRECTABLE_ERROR CorrPtr;
|
|
PBOOLEAN ErrorlogBusy;
|
|
ERROR_FRAME TempFrame;
|
|
|
|
//
|
|
// Get the interrupt information
|
|
//
|
|
|
|
DispatchCode = (PULONG)(PCR->InterruptRoutine[CORRECTABLE_VECTOR]);
|
|
InterruptObject = CONTAINING_RECORD(DispatchCode,
|
|
KINTERRUPT,
|
|
DispatchCode);
|
|
|
|
//
|
|
// Set various pointers so we can use them later.
|
|
//
|
|
|
|
CorrPtr = &TempFrame.CorrectableFrame;
|
|
ErrorlogBusy = (PBOOLEAN)((PUCHAR)InterruptObject->ServiceContext +
|
|
sizeof(PERROR_FRAME));
|
|
ErrorlogSpinLock = (PKSPIN_LOCK)((PUCHAR)ErrorlogBusy + sizeof(PBOOLEAN));
|
|
|
|
//
|
|
// Clear the data structures that we will use.
|
|
//
|
|
|
|
RtlZeroMemory(&TempFrame, sizeof(ERROR_FRAME));
|
|
|
|
//
|
|
// Find out if a CPU module had any errors
|
|
//
|
|
|
|
DetectedError = HalpCheckCPUForError(&Slot);
|
|
|
|
if (DetectedError == UncorrectableError) {
|
|
goto UCError;
|
|
} else if (DetectedError == CorrectableError) {
|
|
HalpCPUCorrectableError(Slot, CorrPtr);
|
|
goto CError;
|
|
}
|
|
|
|
//
|
|
// Find out if Memory module had any errors
|
|
//
|
|
|
|
DetectedError = HalpCheckMEMForError(&Slot);
|
|
|
|
if (DetectedError == UncorrectableError) {
|
|
goto UCError;
|
|
} else if (DetectedError == CorrectableError) {
|
|
HalpMemoryCorrectableError(Slot, CorrPtr);
|
|
goto CError;
|
|
}
|
|
|
|
|
|
//
|
|
// Find out if the T2's had any errors
|
|
//
|
|
|
|
DetectedError = HalpCheckT2ForError(&Slot);
|
|
|
|
if (DetectedError == UncorrectableError) {
|
|
goto UCError;
|
|
} else if (DetectedError == CorrectableError) {
|
|
HalpT2CorrectableError(Slot, CorrPtr);
|
|
goto CError;
|
|
} else {
|
|
return; // no error?
|
|
}
|
|
|
|
CError:
|
|
|
|
//
|
|
// Build the rest of the error frame
|
|
//
|
|
|
|
SGLCorrectedErrors += 1;
|
|
|
|
TempFrame.FrameType = CorrectableFrame;
|
|
TempFrame.VersionNumber = ERROR_FRAME_VERSION;
|
|
TempFrame.SequenceNumber = SGLCorrectedErrors;
|
|
TempFrame.PerformanceCounterValue =
|
|
KeQueryPerformanceCounter(NULL).QuadPart;
|
|
|
|
//
|
|
// Acquire the spinlock.
|
|
//
|
|
|
|
KiAcquireSpinLock(ErrorlogSpinLock);
|
|
|
|
//
|
|
// Check to see if an errorlog operation is in progress already.
|
|
// Then add our platform info...
|
|
//
|
|
|
|
if (!*ErrorlogBusy) {
|
|
|
|
// wkc fix....
|
|
|
|
} else {
|
|
|
|
//
|
|
// An errorlog operation is in progress already. We will
|
|
// set various lost bits and then get out without doing
|
|
// an actual errorloging call.
|
|
//
|
|
|
|
Frame.CorrectableFrame.Flags.LostCorrectable = TRUE;
|
|
Frame.CorrectableFrame.Flags.LostAddressSpace =
|
|
TempFrame.CorrectableFrame.Flags.AddressSpace;
|
|
Frame.CorrectableFrame.Flags.LostMemoryErrorSource =
|
|
TempFrame.CorrectableFrame.Flags.MemoryErrorSource;
|
|
}
|
|
|
|
//
|
|
// Release the spinlock.
|
|
//
|
|
|
|
KiReleaseSpinLock(ErrorlogSpinLock);
|
|
|
|
//
|
|
// Dispatch to the secondary correctable interrupt service routine.
|
|
// The assumption here is that if this interrupt ever happens, then
|
|
// some driver enabled it, and the driver should have the ISR connected.
|
|
//
|
|
|
|
((PSECOND_LEVEL_DISPATCH)InterruptObject->DispatchAddress)(
|
|
InterruptObject,
|
|
InterruptObject->ServiceContext
|
|
);
|
|
|
|
//
|
|
// Clear the error and return (wkcfix -- clear now? or in routines).
|
|
//
|
|
|
|
return;
|
|
|
|
|
|
UCError: // wkcfix
|
|
|
|
//
|
|
// The interrupt indicates a fatal system error.
|
|
// Display information about the error and shutdown the machine.
|
|
//
|
|
|
|
HalpSableReportFatalError();
|
|
|
|
KeBugCheckEx( DATA_BUS_ERROR,
|
|
0xfacefeed, //jnfix - quick error interrupt id
|
|
0,
|
|
0,
|
|
0 );
|
|
}
|
|
|
|
|
|
VOID
|
|
HalpSableReportFatalError(
|
|
VOID
|
|
)
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
This function reports and interprets a fatal hardware error on
|
|
a Sable system. Currently, only the T2 error registers - CERR1 and PERR1
|
|
are used to interpret the error.
|
|
|
|
Arguments:
|
|
|
|
None.
|
|
|
|
Return Value:
|
|
|
|
None.
|
|
|
|
--*/
|
|
{
|
|
T2_CERR1 Cerr1;
|
|
ULONGLONG Cerr2;
|
|
ULONGLONG Cerr3;
|
|
UCHAR OutBuffer[MAX_ERROR_STRING];
|
|
T2_PERR1 Perr1;
|
|
T2_PERR2 Perr2;
|
|
RATTLER_ESREG_CSR Esreg;
|
|
PCHAR parityErrString = NULL;
|
|
PEXTENDED_ERROR exterr;
|
|
|
|
//
|
|
// Begin the error output by acquiring ownership of the display
|
|
// and printing the dreaded banner.
|
|
//
|
|
|
|
if(PUncorrectableError) {
|
|
exterr = &PUncorrectableError->UncorrectableFrame.ErrorInformation;
|
|
parityErrString = PUncorrectableError->UncorrectableFrame.ErrorString;
|
|
}
|
|
|
|
HalAcquireDisplayOwnership(NULL);
|
|
|
|
HalDisplayString( "\nFatal system hardware error.\n\n" );
|
|
|
|
//
|
|
// Read both of the error registers. It is possible that more
|
|
// than one error was reported simulataneously.
|
|
//
|
|
|
|
Cerr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr1 );
|
|
Perr1.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Perr1 );
|
|
|
|
//
|
|
// Read all of the relevant error address registers.
|
|
//
|
|
|
|
Cerr2 = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr2 );
|
|
Cerr3 = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Cerr3 );
|
|
|
|
Perr2.all = READ_T2_REGISTER( &((PT2_CSRS)(T2_CSRS_QVA))->Perr2 );
|
|
|
|
//
|
|
// Interpret any errors from CERR1.
|
|
//
|
|
|
|
sprintf( OutBuffer, "T2 CERR1 = 0x%Lx\n", Cerr1.all );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
if( Cerr1.UncorrectableReadError == 1 ){
|
|
|
|
sprintf( OutBuffer,
|
|
"Uncorrectable read error, CBUS Address = 0x%Lx%16Lx\n",
|
|
Cerr3,
|
|
Cerr2 );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Cerr1.NoAcknowledgeError == 1 ){
|
|
|
|
sprintf( OutBuffer,
|
|
"No Acknowledgement Error, CBUS Address = 0x%Lx%16Lx\n",
|
|
Cerr3,
|
|
Cerr2 );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Cerr1.CommandAddressParityError == 1 ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
sprintf( OutBuffer,
|
|
"Command Address Parity Error, CBUS Address = 0x%Lx%16Lx\n",
|
|
Cerr3,
|
|
Cerr2 );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
if( Cerr1.CaParityErrorLw3 == 1 ){
|
|
sprintf( parityErrString,
|
|
"C/A Parity Error on longword 3\n");
|
|
HalDisplayString( "C/A Parity Error on longword 3\n" );
|
|
}
|
|
|
|
if( Cerr1.CaParityErrorLw2 == 1 ){
|
|
sprintf( parityErrString,
|
|
"C/A Parity Error on longword 2\n" );
|
|
HalDisplayString( "C/A Parity Error on longword 2\n" );
|
|
}
|
|
|
|
if( Cerr1.CaParityErrorLw1 == 1 ){
|
|
sprintf( parityErrString,
|
|
"C/A Parity Error on longword 1\n");
|
|
HalDisplayString( "C/A Parity Error on longword 1\n" );
|
|
}
|
|
|
|
if( Cerr1.CaParityErrorLw0 == 1 ){
|
|
sprintf( parityErrString,
|
|
"C/A Parity Error on longword 0\n" );
|
|
HalDisplayString( "C/A Parity Error on longword 0\n" );
|
|
}
|
|
|
|
}
|
|
|
|
if( Cerr1.MissedCommandAddressParity == 1 ){
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
sprintf( parityErrString,
|
|
"Missed C/A Parity Error\n" );
|
|
HalDisplayString( "Missed C/A Parity Error\n" );
|
|
}
|
|
|
|
if( (Cerr1.ResponderWriteDataParityError == 1) ||
|
|
(Cerr1.ReadDataParityError == 1) ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
|
|
sprintf( OutBuffer,
|
|
"T2 detected Data Parity error, CBUS Address = 0x%Lx16Lx\n",
|
|
Cerr3,
|
|
Cerr2 );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
sprintf( OutBuffer,
|
|
"T2 was %s on error transaction\n",
|
|
Cerr1.ResponderWriteDataParityError == 1 ? "responder" :
|
|
"commander" );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
if( Cerr1.DataParityErrorLw0 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 0\n" );
|
|
HalDisplayString( "Data Parity on longword 0\n" );
|
|
}
|
|
|
|
if( Cerr1.DataParityErrorLw1 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 1\n" );
|
|
HalDisplayString( "Data Parity on longword 1\n" );
|
|
}
|
|
|
|
if( Cerr1.DataParityErrorLw2 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 2\n");
|
|
HalDisplayString( "Data Parity on longword 2\n" );
|
|
}
|
|
|
|
if( Cerr1.DataParityErrorLw3 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 3\n" );
|
|
HalDisplayString( "Data Parity on longword 3\n" );
|
|
}
|
|
|
|
if( Cerr1.DataParityErrorLw4 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 4\n" );
|
|
HalDisplayString( "Data Parity on longword 4\n" );
|
|
}
|
|
|
|
if( Cerr1.DataParityErrorLw5 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 5\n" );
|
|
HalDisplayString( "Data Parity on longword 5\n" );
|
|
}
|
|
|
|
if( Cerr1.DataParityErrorLw6 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 6\n" );
|
|
HalDisplayString( "Data Parity on longword 6\n" );
|
|
}
|
|
|
|
if( Cerr1.DataParityErrorLw7 == 1 ){
|
|
sprintf( parityErrString,
|
|
"Data Parity on longword 7\n" );
|
|
HalDisplayString( "Data Parity on longword 7\n" );
|
|
}
|
|
|
|
} //(Cerr1.ResponderWriteDataParityError == 1) || ...
|
|
|
|
|
|
if( Cerr1.MissedRspWriteDataParityError == 1 ){
|
|
HalDisplayString( "Missed data parity error as responder\n" );
|
|
}
|
|
|
|
if( Cerr1.MissedReadDataParityError == 1 ){
|
|
HalDisplayString( "Missed data parity error as commander\n" );
|
|
}
|
|
|
|
|
|
if( Cerr1.CmdrWriteDataParityError == 1 ){
|
|
|
|
sprintf( OutBuffer,
|
|
"Commander Write Parity Error, CBUS Address = 0x%Lx%16Lx\n",
|
|
Cerr3,
|
|
Cerr2 );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Cerr1.BusSynchronizationError == 1 ){
|
|
|
|
sprintf( OutBuffer,
|
|
"Bus Synchronization Error, CBUS Address = 0x%Lx%16Lx\n",
|
|
Cerr3,
|
|
Cerr2 );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Cerr1.InvalidPfnError == 1 ){
|
|
|
|
sprintf( OutBuffer,
|
|
"Invalid PFN for scatter/gather, CBUS Address = 0x%Lx%16Lx\n",
|
|
Cerr3,
|
|
Cerr2 );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
//
|
|
// Interpret any errors from T2 PERR1.
|
|
//
|
|
|
|
sprintf( OutBuffer, "PERR1 = 0x%Lx\n", Perr1.all );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
if( Perr1.WriteDataParityError == 1 ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
|
|
sprintf( parityErrString,
|
|
"T2 (slave) detected write parity error\n");
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.BusAddress.LowPart = Perr2.ErrorAddress;
|
|
sprintf( OutBuffer,
|
|
"T2 (slave) detected write parity error, PCI Cmd: %x, PCI Address: %lx\n",
|
|
Perr2.PciCommand,
|
|
Perr2.ErrorAddress );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Perr1.AddressParityError == 1 ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
|
|
sprintf( parityErrString,
|
|
"T2 (slave) detected address parity error\n");
|
|
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.BusAddress.LowPart = Perr2.ErrorAddress;
|
|
sprintf( OutBuffer,
|
|
"T2 (slave) detected address parity error, PCI Cmd: %x, PCI Address: %lx\n",
|
|
Perr2.PciCommand,
|
|
Perr2.ErrorAddress );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Perr1.ReadDataParityError == 1 ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
|
|
sprintf( parityErrString,
|
|
"T2 (master) detected read parity error\n");
|
|
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.BusAddress.LowPart = Perr2.ErrorAddress;
|
|
|
|
sprintf( OutBuffer,
|
|
"T2 (master) detected read parity error, PCI Cmd: %x, PCI Address: %lx\n",
|
|
Perr2.PciCommand,
|
|
Perr2.ErrorAddress );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Perr1.ParityError == 1 ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
|
|
sprintf( parityErrString,
|
|
"Participant asserted PERR#, parity error\n");
|
|
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.BusAddress.LowPart = Perr2.ErrorAddress;
|
|
|
|
sprintf( OutBuffer,
|
|
"Participant asserted PERR#, parity error, PCI Cmd: %x, PCI Address: %lx\n",
|
|
Perr2.PciCommand,
|
|
Perr2.ErrorAddress );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Perr1.ParityError == 1 ){
|
|
|
|
PUncorrectableError->UncorrectableFrame.Flags.ErrorStringValid = 1;
|
|
|
|
sprintf( parityErrString,
|
|
"Slave asserted SERR#, parity error\n");
|
|
|
|
PUncorrectableError->UncorrectableFrame.ErrorInformation.
|
|
IoError.BusAddress.LowPart = Perr2.ErrorAddress;
|
|
|
|
sprintf( OutBuffer,
|
|
"Slave asserted SERR#, PCI Cmd: %x, PCI Address: %lx\n",
|
|
Perr2.PciCommand,
|
|
Perr2.ErrorAddress );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Perr1.DeviceTimeoutError == 1 ){
|
|
|
|
sprintf( OutBuffer,
|
|
"Device timeout error, PCI Cmd: %x, PCI Address: %lx\n",
|
|
Perr2.PciCommand,
|
|
Perr2.ErrorAddress );
|
|
HalDisplayString( OutBuffer );
|
|
|
|
}
|
|
|
|
if( Perr1.DeviceTimeoutError == 1 ){
|
|
|
|
HalDisplayString( "PCI NMI asserted.\n" );
|
|
|
|
}
|
|
|
|
//
|
|
// Interpret RATTLER errors: (GAMMA Specific)
|
|
//
|
|
|
|
Esreg.all =
|
|
READ_CPU_REGISTER(&((PRATTLER_CPU_CSRS)HAL_PCR->CpuCsrsQva)->Esreg);
|
|
|
|
sprintf(OutBuffer, "ESREG = 0x%Lx\n", Esreg.all);
|
|
HalDisplayString( OutBuffer );
|
|
|
|
return;
|
|
|
|
}
|