//++ // // Copyright (c) 1993, 94, 95, 96 IBM Corporation // // Module Name: // // pxcache.s // // Abstract: // // This module implements the routines to flush cache on the PowerPC. // // Author: // // Peter L. Johnston (plj@vnet.ibm.com) September 1993 // // Environment: // // Kernel mode only. // // Revision History: // 27-Dec-93 plj Added 603 support. // 13-Mar-94 plj Fixed problem introduced during switch to pas, // added 604 support. // 18-Jan-95 plj Add 603+, 604+ and 620 support. // 15-Nov-95 plj Switch to MP safe version (slightly faster too) // Also, support 603++ and 604++. // //-- #include "kxppc.h" // // Override the ENABLE/DISABLE INTERRUPTS macros from kxppc.h because // unless this machine has a 603e/ev in it, we don't need the workaround // for errata 15. // #undef DISABLE_INTERRUPTS #undef ENABLE_INTERRUPTS #define DISABLE_INTERRUPTS(p0, s0) ; \ mfmsr p0 ; \ rlwinm s0,p0,0,~MASK_SPR(MSR_EE,1) ; \ mtmsr s0 #define ENABLE_INTERRUPTS(p0) mtmsr p0 .set HID0, 1008 // H/W Implementation Dependent reg 0 // // Define various known processor types. // .set PV601, 1 // 601 .set PV603, 3 // 603 .set PV603E, 6 // 603 plus .set PV603EV,7 // 603 plus plus .set PV603ART,8 // Arthur (603xx desig unknown) .set PV604, 4 // 604 .set PV604E, 9 // 604 plus // 604 plus plus same as 604 plus .set PV620, 20 // 620 // // Note, in the following, the 603's "I-Cache Flash Invalidate" // and the 604's "I-Cache Invalidate All" basically perform the // same function although the usage is slightly different. // In the 603 case, ICFI must be cleared under program control // after it is set. In the 604 the bit clears automatically. // The 620's ICEFI behaves in the same way as the 604's ICIA. // .set H0_603_ICFI, 0x0800 // I-Cache Flash Invalidate .set H0_604_ICIA, 0x0800 // I-Cache Invalidate All .set H0_620_ICEFI,0x0800 // I-Cache Edge Flash Invalidate .set H0_603_DCFA, 0x0040 // D-Cache Flush Assist (Arthur) // // Cache layout // // Processor | Size (bytes) | Line | Block | Sets | PVR Processor // | I-Cache | D-Cache | Size | Size | | Version // ---------------------------------------------------------------------- // 601 | 32KB Unified | 64B | 32B | | 0x0001xxxx // 603 | 8KB | 8KB | 32 | 32 | | 0x0003xxxx // 603+ | 16KB | 16KB | 32 | 32 | 4x128 | 0x0006xxxx // 603++ | 16KB | 16KB | 32 | 32 | 4x128 | 0x0007xxxx // Arthur | 32KB | 32KB | 32 | 32 | 8x128 | 0x0008xxxx // 604 | 16KB | 16KB | 32 | 32 | 4x128 | 0x0004xxxx // 604+ | 32KB | 32KB | 32 | 32 | 4x128 | 0x0009xxxx // 604++ | 32KB | 32KB | 32 | 32 | | 0x0009xxxx // 620 | 32KB | 32KB | 64 | 64 | | 0x0014xxxx // .set DCLSZ601, 64 // 601 cache line size .set DCBSZ601, 32 // 601 cache block size .set DCL601, 32 * 1024 / DCLSZ601 // 601 num cache lines .set DCBSZL2601, 5 // 601 log2(block size) .set DCBSZ603, 32 // 603 cache block size .set DCB603, 8 * 1024 / DCBSZ603 // 603 num cache blocks .set DCBSZL2603, 5 // 603 log2(block size) .set DCB603E, 16 * 1024 / DCBSZ603 // 603+ num cache blocks .set DCB603ART, 32 * 1024 / DCBSZ603 // Arthur num cache blocks .set DCBSZ604, 32 // 604 cache block size .set DCB604, 16 * 1024 / DCBSZ604 // 604 num cache blocks .set DCBSZL2604, 5 // 604 log2(block size) .set DCB604E, 32 * 1024 / DCBSZ604 // 604+ num cache blocks .set DCBSZ620, 64 // 620 cache block size .set DCB620, 32 * 1024 / DCBSZ620 // 620 num cache blocks .set DCBSZL2620, 6 // 620 log2(block size) // // The following variables are declared locally so their addresses // will appear in the TOC. During initialization, we overwrite // the TOC entries with the entry points for the cache flushing // routines appropriate for the processor we are running on. // // It is done this way rather than filling in a table to reduce the // number of access required to get the address at runtime. // (This is known as "Data in TOC" which is not very much used in // NT at this time). // .data .globl HalpSweepDcache HalpSweepDcache: .long 0 .globl HalpSweepIcache HalpSweepIcache: .long 0 .globl HalpSweepDcacheRange HalpSweepDcacheRange: .long 0 .globl HalpSweepIcacheRange HalpSweepIcacheRange: .long 0 //++ // // Routine Description: // // HalpCacheSweepSetup // // This routine is called during HAL initialization. Its function // is to set the branch tables for cache flushing routines according // to the processor type. // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpCacheSweepSetup) mfpvr r.3 // get processor type rlwinm r.3, r.3, 16, 0xffff // remove version cmpwi r.3, PV603E // binary search for the right code lwz r.4, [toc].data(r.toc) // get address of local data section bge hcss.high // jif 603+ or greater cmpwi r.3, PV603 beq hcss.603 // jif 603 bgt hcss.604 // > 603, < 603+ must be 604 // // processor is a 601 // lwz r.5, [toc]HalpSweepDcache601(r.toc) mr r.7, r.5 // do nothing mr r.8, r.5 // do nothing mr r.6, r.5 // 601 icache use dcache routine // // On a 601, the routine HalFlushIoBuffers is not required because the // 601 has a unified cache and coherency with other system components // is maintained. HalpSweepDcache601 is a no-op, ie it does nothing // but return. HalFlushIoBuffers is only called via the TOC (ie it is // not used withing the HAL) so change the TOC entry for it to point to // HalpSweepDcache601. // .extern HalFlushIoBuffers stw r.5, [toc]HalFlushIoBuffers(r.toc) b hcss.done // // processor is a 603 // hcss.603: lwz r.5, [toc]HalpSweepDcache603(r.toc) lwz r.6, [toc]HalpSweepIcache603(r.toc) lwz r.7, [toc]HalpSweepDcacheRange603(r.toc) lwz r.8, [toc]HalpSweepIcacheRange603(r.toc) b hcss.done // // processor is a 604 // hcss.604: lwz r.5, [toc]HalpSweepDcache604(r.toc) lwz r.6, [toc]HalpSweepIcache604(r.toc) lwz r.7, [toc]HalpSweepDcacheRange604(r.toc) lwz r.8, [toc]HalpSweepIcacheRange604(r.toc) b hcss.done // // Processor type >= 603+, continue isolation of processor type. // hcss.high: beq hcss.603p // jif 603 plus cmpwi cr.7, r.3, PV603EV cmpwi cr.0, r.3, PV604E cmpwi cr.1, r.3, PV620 cmpwi cr.6, r.3, PV603ART beq cr.7, hcss.603p // treat 603++ same as 603+ beq cr.0, hcss.604p // jif 604 plus beq cr.1, hcss.620 // jif 620 beq cr.6, hcss.603art // jif Arthur // // If we got here we are running on a processor whose cache characteristics // are not known. Return non-zero for error. // li r.3, 1 blr // // processor is a 603 plus // hcss.603p: lwz r.5, [toc]HalpSweepDcache603p(r.toc) lwz r.6, [toc]HalpSweepIcache603p(r.toc) lwz r.7, [toc]HalpSweepDcacheRange603p(r.toc) lwz r.8, [toc]HalpSweepIcacheRange603p(r.toc) b hcss.done // // Processor is an Arthur. Note: Arthur uses the 603 routines // for everything except SweepDcache. // hcss.603art: lwz r.5, [toc]HalpSweepDcache603art(r.toc) lwz r.6, [toc]HalpSweepIcache603(r.toc) lwz r.7, [toc]HalpSweepDcacheRange603(r.toc) lwz r.8, [toc]HalpSweepIcacheRange603(r.toc) b hcss.done // // processor is a 604 plus // hcss.604p: lwz r.5, [toc]HalpSweepDcache604p(r.toc) lwz r.6, [toc]HalpSweepIcache604p(r.toc) lwz r.7, [toc]HalpSweepDcacheRange604p(r.toc) lwz r.8, [toc]HalpSweepIcacheRange604p(r.toc) b hcss.done // // processor is a 620 // hcss.620: lwz r.5, [toc]HalpSweepDcache620(r.toc) lwz r.6, [toc]HalpSweepIcache620(r.toc) lwz r.7, [toc]HalpSweepDcacheRange620(r.toc) lwz r.8, [toc]HalpSweepIcacheRange620(r.toc) b hcss.done hcss.done: // // r.5 thru r.9 contain the address of the function descriptors // for the routines we really want to use. Dereference them to // get at the entry point addresses. // lwz r.5, 0(r.5) lwz r.6, 0(r.6) lwz r.7, 0(r.7) lwz r.8, 0(r.8) // // Store the entry point addresses directly into the TOC. // This is so direct linkage from within the HAL to the // generic cache flushing routines can get to the desired // routines for this processor. // stw r.5, [toc]HalpSweepDcache(r.toc) stw r.6, [toc]HalpSweepIcache(r.toc) stw r.7, [toc]HalpSweepDcacheRange(r.toc) stw r.8, [toc]HalpSweepIcacheRange(r.toc) // // Modify the Function Descriptors for the generic routines to // point directly at the target routines so that linkage from // other executables (eg the kernel) will be direct rather // than via the generic routines. // lwz r.3, [toc]HalSweepDcache(r.toc) lwz r.4, [toc]HalSweepIcache(r.toc) stw r.5, 0(r.3) stw r.6, 0(r.4) lwz r.3, [toc]HalSweepDcacheRange(r.toc) lwz r.4, [toc]HalSweepIcacheRange(r.toc) stw r.7, 0(r.3) stw r.8, 0(r.4) li r.3, 0 // return code = success LEAF_EXIT(HalpCacheSweepSetup) //++ // // Routines HalSweepDcache // HalSweepIcache // HalSweepDcacheRange // HalSweepIcacheRange // // are simply dispatch points for the appropriate routine for // the processor being used. // //-- LEAF_ENTRY(HalSweepDcache) lwz r.12, [toc]HalpSweepDcache(r.toc) mtctr r.12 bctr DUMMY_EXIT(HalSweepDcache) LEAF_ENTRY(HalSweepIcache) lwz r.12, [toc]HalpSweepIcache(r.toc) mtctr r.12 bctr DUMMY_EXIT(HalSweepIcache) LEAF_ENTRY(HalSweepDcacheRange) lwz r.12, [toc]HalpSweepDcacheRange(r.toc) mtctr r.12 bctr DUMMY_EXIT(HalSweepDcacheRange) LEAF_ENTRY(HalSweepIcacheRange) lwz r.12, [toc]HalpSweepIcacheRange(r.toc) mtctr r.12 bctr DUMMY_EXIT(HalSweepIcacheRange) //++ // // 601 Cache Flushing Routines // // The 601 has a unified instruction/data cache and coherency is // maintained. For this reason these routines don't need to do // anything. // // HalpSweepDcache601 // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepDcache601) LEAF_EXIT(HalpSweepDcache601) //++ // // HalpSweepDcacheRange603 // // HalpSweepDcacheRange603p // // HalpSweepDcacheRange604 // // HalpSweepDcacheRange604p // // Force data in a given address range to memory. // // Because this routine works on a range of blocks and block size // is the same on 601, 603, 603+, 604 and 604+ we can use the same // code for each of them. // // Arguments: // // r.3 Start address // r.4 Length (in bytes) // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepDcacheRange604) ALTERNATE_ENTRY(HalpSweepDcacheRange603) ALTERNATE_ENTRY(HalpSweepDcacheRange603p) ALTERNATE_ENTRY(HalpSweepDcacheRange604p) rlwinm r.5, r.3, 0, DCBSZ601-1 // isolate offset in start block addi r.4, r.4, DCBSZ601-1 // bump range by block sz - 1 add r.4, r.4, r.5 // add start block offset srwi r.4, r.4, DCBSZL2601 // number of blocks mtctr r.4 sync hsdr601.fl: dcbst 0, r.3 // flush block addi r.3, r.3, DCBSZ601 // bump address bdnz hsdr601.fl LEAF_EXIT(HalpSweepDcacheRange604) //++ // // HalpSweepIcacheRange601 // // Due to the unified cache, this routine is meaningless on a 601. // The reason for flushing a range of instruction address is because // of code modification (eg breakpoints) in which case the nature // of the unified cache is that the *right* code is in the cache, // or because of a transfer of a code page in which case the unified // snooping cache will have done the right thing. // // Therefore this routine simply returns. // // Arguments: // // r.3 Start address // r.4 Length (in bytes) // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepIcacheRange601) // return LEAF_EXIT(HalpSweepIcacheRange601) //++ // // 603, 603+ Cache Flushing Routines // // The 603 has seperate instruction and data caches of 8 KB each. // The 603+ has seperate instruction and data caches of 16 KB each. // Line size = Block size = 32 bytes. // // The mechanics of cache manipulation are the same for the 603 and // 603+. // // // // HalpSweepDcache603 HalpSweepDcache603p // // Sweep the entire data cache. This is accomplished by loading // the cache with data corresponding to a known address range and // then ensuring that each block in the cache is not dirty. // // The 603 does not have a hashed page table so we can't use the // hashed page table as the data range. Instead use the start of // KSEG0. // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepDcache603p) li r.4, DCB603E // size of 603+ cache in blocks b hsd603 DUMMY_EXIT(HalpSweepDcache603p) LEAF_ENTRY(HalpSweepDcache603) li r.4, DCB603 // size of 603 cache in blocks hsd603: mtctr r.4 DISABLE_INTERRUPTS(r.10,r.11) cror 0,0,0 // 603e/603ev errata 15 sync // ensure ALL previous stores completed LWI(r.3,0x80000000) // known usable virtual address subi r.5, r.3, DCBSZ603 // dec addr prior to inc hsd603.ld: lbzu r.8, DCBSZ603(r.5) bdnz hsd603.ld ENABLE_INTERRUPTS(r.10) cror 0,0,0 // 603e/603ev errata 15 mtctr r.4 hsd603.fl: dcbst 0, r.3 // ensure block is in memory addi r.3, r.3, DCBSZ603 // bump address bdnz hsd603.fl LEAF_EXIT(HalpSweepDcache603) //++ // // HalpSweepIcache603 HalpSweepIcache603p // // Sweep the entire instruction cache. The instruction cache (by // definition) can never contain modified code (hence there are no // icbf or icbst instructions). Therefore what we really need to do // here is simply invalidate every block in the cache. This can be // done by toggling the Instruction Cache Flash Invalidate (ICFI) bit // in the 603's HID0 register. // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepIcache603) ALTERNATE_ENTRY(HalpSweepIcache603p) mfspr r.3, HID0 // 603, use Instruction ori r.4, r.3, H0_603_ICFI // Cache Flash Invalidate isync mtspr HID0, r.4 // invalidate I-Cache mtspr HID0, r.3 // re-enable LEAF_EXIT(HalpSweepIcache603) //++ // // HalpSweepIcacheRange603 HalpSweepIcacheRange603p // // Remove a range of instructions from the instruction cache. // // Note that if this is going to take a long time we flash // invalidate the I cache instead. Currently I define a // "long time" as greater than 4096 bytes which amounts to // 128 trips thru this loop (which should run in 256 clocks). // This number was selected without bias or forethought from // thin air - plj. I chose this number because gut feel tells // me that it will cost me more than 256 clocks in cache misses // trying to get back to the function that requested the cache // flush in the first place. // // Arguments: // // r.3 Start address // r.4 Length (in bytes) // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepIcacheRange603) ALTERNATE_ENTRY(HalpSweepIcacheRange603p) cmpwi r.4, 4096 // if range > 4096 bytes, flush bgt- ..HalpSweepIcache603 // entire I cache rlwinm r.5, r.3, 0, DCBSZ603-1 // isolate offset in start block addi r.4, r.4, DCBSZ603-1 // bump range by block sz - 1 add r.4, r.4, r.5 // add start block offset srwi r.4, r.4, DCBSZL2603 // number of blocks mtctr r.4 hsir603.fl: icbi 0, r.3 // invalidate block in I cache addi r.3, r.3, DCBSZ603 // bump address bdnz hsir603.fl LEAF_EXIT(HalpSweepIcacheRange603) //++ // // 603 "Arthur" Cache Flushing Routines // // Arthur is similar to the 603 in most ways. Differences are:- // // Size: Arthur's caches are 32KB each. // Associativity: Arthur has 128 sets of 8 blocks. // Flash Invalidate: Arthur will automatically clear the flash // invalidate bit in HID0 where the 603 required // that you clear it manually. However, it is // ok to to it the old way. // Block Replacement Algorithm: // Arthur uses a Pseudo Least Recently Used algorithm // where other 603s use a strictly LRU mechanism. In // order to flush the entire D-Cache we must either // touch 12 individual blocks with address bits 20:26 // that hit the same set, OR, we must set a bit in HID0 // that causes to use a strict LRU mechanism while doing // the flush and turn that bit off again at the end. // Hashed Page Table: // Arthur DOES use a hashed page table. // // In light of the above, we use the standard 603 routines for all // Arthur's cache flushing routines except the Sweep D-Cache routine. // // // HalpSweepDcache603art // // Sweep the entire data cache. This is accomplished by loading // the cache with data corresponding to a known address range and // then ensuring that each block in the cache is not dirty. // // Note: Because Arthur has a Hashed Page Table, we use it in the // same way as the 604 for cache flushing and therefore do not need // the store loop that follows the load loop in the 603 routine. // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepDcache603art) li r.4, DCB603ART // size of Arthur cache in blocks mfmsr r.10 // prep to disable ints and data xlate mfspr r.9, HID0 // need to set Data Cache Flush Assist mfsdr1 r.3 // fill the D cache from memory // allocated to the hashed page // table (it's useful and we don't // have to flush it). mtctr r.4 rlwinm r.11, r.10, 0, 0xffff7fff// clear INT Enable bit sync // ensure ALL previous stores complete mtmsr r.11 // disable interrupts ori r.7, r.9, H0_603_DCFA oris r.3, r.3, 0x8000 // make addr virtual mtspr HID0,r.7 // set Data Cache Flush Assist subi r.3, r.3, DCBSZ603 // dec addr prior to inc isync hsd603art.ld: lbzu r.8, DCBSZ603(r.3) bdnz hsd603art.ld sync mtspr HID0,r.9 // clear Data Cache Flush Assist mtmsr r.10 // reset previous interrupt enable // state. LEAF_EXIT(HalpSweepDcache603art) //++ // // 604 Cache Flushing Routines // // The 604 has seperate instruction and data caches of 16 KB each. // The 604+ has seperate instruction and data caches of 32 KB each. // Line size = Block size = 32 bytes. // // // // HalpSweepDcache604 HalpSweepDcache604p // // Sweep the entire data cache. This is accomplished by loading // the cache with data corresponding to a known address range and // then ensuring that each block in the cache is not dirty. // // As in the 601 case, we use the Hashed Page Table for the data // in an effort to minimize performance lost by force feeding the // cache. // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepDcache604p) li r.4, DCB604E // size of 604+ cache in blocks b hsd604 DUMMY_EXIT(HalpSweepDcache604p) LEAF_ENTRY(HalpSweepDcache604) li r.4, DCB604 // size of cache in cache blocks hsd604: mfsdr1 r.3 // fill the D cache from memory // allocated to the hashed page // table (it's something useful). mtctr r.4 DISABLE_INTERRUPTS(r.10,r.11) sync // ensure ALL previous stores completed oris r.3, r.3, 0x8000 // get VA of hashed page table subi r.5, r.3, DCBSZ604 // dec addr prior to inc hsd604.ld: lbzu r.8, DCBSZ604(r.5) bdnz hsd604.ld ENABLE_INTERRUPTS(r.10) sync LEAF_EXIT(HalpSweepDcache604) //++ // // HalpSweepIcache604 HalpSweepIcache604p // // Sweep the entire instruction cache. This routine is functionally // similar to the 603 version except that on the 604 the bit in HID0 // (coincidentally the *same* bit) is called Instruction Cache Invali- // sate All (ICIA) and it clears automatically after being set. // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepIcache604) ALTERNATE_ENTRY(HalpSweepIcache604p) mfspr r.3, HID0 // 604, use Instruction ori r.3, r.3, H0_604_ICIA // Cache Invalidate All isync mtspr HID0, r.3 // invalidate I-Cache LEAF_EXIT(HalpSweepIcache604) //++ // // HalpSweepIcacheRange604 HalpSweepIcacheRange604p // // Remove a range of instructions from the instruction cache. // // Arguments: // // r.3 Start address // r.4 Length (in bytes) // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepIcacheRange604) ALTERNATE_ENTRY(HalpSweepIcacheRange604p) rlwinm r.5, r.3, 0, DCBSZ604-1 // isolate offset in start block addi r.4, r.4, DCBSZ604-1 // bump range by block sz - 1 add r.4, r.4, r.5 // add start block offset srwi r.4, r.4, DCBSZL2604 // number of blocks mtctr r.4 hsir604.fl: icbi 0, r.3 // invalidate block in I cache addi r.3, r.3, DCBSZ604 // bump address bdnz hsir604.fl LEAF_EXIT(HalpSweepIcacheRange604) //++ // // 620 Cache Flushing Routines // // The 620 has seperate instruction and data caches of 32 KB each. // Line size = Block size = 64 bytes. // // // // HalpSweepDcache620 // // Sweep the entire data cache. This is accomplished by loading // the cache with data corresponding to a known address range and // then ensuring that each block in the cache is not dirty. // // As in the 601 case, we use the Hashed Page Table for the data // in an effort to minimize performance lost by force feeding the // cache. // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepDcache620) li r.4, DCB620 // size of cache in cache blocks hsd620: mfsdr1 r.3 // fill the D cache from memory // allocated to the hashed page // table (it's something useful). mtctr r.4 DISABLE_INTERRUPTS(r.10,r.11) sync oris r.3, r.3, 0x8000 // get VA of hashed page table subi r.5, r.3, DCBSZ620 // dec addr prior to inc hsd620.ld: lbzu r.8, DCBSZ620(r.5) bdnz hsd620.ld ENABLE_INTERRUPTS(r.10) sync LEAF_EXIT(HalpSweepDcache620) //++ // // HalpSweepIcache620 // // Sweep the entire instruction cache. This routine is functionally // identical to the 604 version except that on the 620 the bit in HID0 // (coincidentally the *same* bit) is called Instruction Cache Edge // Flash Invalidate (ICEFI). // // Arguments: // // None. // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepIcache620) mfspr r.3, HID0 // 620, use Instruction ori r.3, r.3, H0_620_ICEFI // Cache Edge Flash Invalidate isync mtspr HID0, r.3 // invalidate I-Cache LEAF_EXIT(HalpSweepIcache620) //++ // // HalpSweepIcacheRange620 // // Remove a range of instructions from the instruction cache. // // Arguments: // // r.3 Start address // r.4 Length (in bytes) // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepIcacheRange620) rlwinm r.5, r.3, 0, DCBSZ620-1 // isolate offset in start block addi r.4, r.4, DCBSZ620-1 // bump range by block sz - 1 add r.4, r.4, r.5 // add start block offset srwi r.4, r.4, DCBSZL2620 // number of blocks mtctr r.4 hsir620.fl: icbi 0, r.3 // invalidate block in I cache addi r.3, r.3, DCBSZ620 // bump address bdnz hsir620.fl LEAF_EXIT(HalpSweepIcacheRange620) //++ // // HalpSweepDcacheRange620 // // Force data in a given address range to memory. // // Arguments: // // r.3 Start address // r.4 Length (in bytes) // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepDcacheRange620) rlwinm r.5, r.3, 0, DCBSZ620-1 // isolate offset in start block addi r.4, r.4, DCBSZ620-1 // bump range by block sz - 1 add r.4, r.4, r.5 // add start block offset srwi r.4, r.4, DCBSZL2620 // number of blocks mtctr r.4 sync hsdr620.fl: dcbst 0, r.3 // flush block addi r.3, r.3, DCBSZ620 // bump address bdnz hsdr620.fl LEAF_EXIT(HalpSweepDcacheRange620) //++ // // HalpSweepPhysicalRangeInBothCaches // // Force data in a given PHYSICAL address range to memory and // invalidate from the block in the instruction cache. // // This implementation assumes a block size of 32 bytes. It // will still work on the 620. // // Arguments: // // r.3 Start physical PAGE number. // r.4 Starting offset within page. Cache block ALIGNED. // r.5 Length (in bytes) // // Return Value: // // None. // //-- .set PAGE_SHIFT, 12 LEAF_ENTRY(HalpSweepPhysicalRangeInBothCaches) // // Starting physical address = (PageNumber << PAGE_SHIFT) | Offset // rlwimi r.4, r.3, PAGE_SHIFT, 0xfffff000 addi r.5, r.5, 31 // bump length by block size - 1 srwi r.5, r.5, 5 // get number of blocks mflr r.0 // save return address mtctr r.5 // set loop count // // Interrupts MUST be disabled for the duration of this function as // we use srr0 and srr1 which will be destroyed by any exception or // interrupt. // DISABLE_INTERRUPTS(r.12,r.11) // r.11 <- disabled MSR // r.12 <- previous MSR cror 0,0,0 // N.B. 603e/ev errata 15 // // Find ourselves in memory. This is needed as we must disable // both instruction and data translation. We do this while // interrupts are disabled only to try to avoid changing the // Link Register when an unwind might/could occur. // // The HAL is known to be in KSEG0 so its physical address is // its effective address with the top bit stripped off. // bl hspribc hspribc: mflr r.6 // r.6 <- &hspribc rlwinm r.6, r.6, 0, 0x7fffffff // r.6 &= 0x7fffffff addi r.6, r.6, hspribc.real - hspribc // r.6 = real &hspribc.real sync // ensure all previous loads and // stores are complete. mtsrr0 r.6 // address in real space rlwinm r.11, r.11, 0, ~0x30 // turn off Data and Instr relocation mtsrr1 r.11 rfi // leap to next instruction hspribc.real: mtsrr0 r.0 // set return address mtsrr1 r.12 // set old MSR value hspribc.loop: dcbst 0, r.4 // flush data block to memory icbi 0, r.4 // invalidate i-cache addi r.4, r.4, 32 // point to next block bdnz hspribc.loop // jif more to do sync // ensure all translations complete isync // don't even *think* about getting // ahead. rfi // return to caller and translated // mode DUMMY_EXIT(HalpSweepPhysicalRangeInBothCaches) //++ // // HalpSweepPhysicalIcacheRange // // Invalidate a given PHYSICAL address range from the instruction // cache. // // This implementation assumes a block size of 32 bytes. It // will still work on the 620. // // Arguments: // // r.3 Start physical PAGE number. // r.4 Starting offset within page. Cache block ALIGNED. // r.5 Length (in bytes) // // Return Value: // // None. // //-- LEAF_ENTRY(HalpSweepPhysicalIcacheRange) // // Starting physical address = (PageNumber << PAGE_SHIFT) | Offset // rlwimi r.4, r.3, PAGE_SHIFT, 0xfffff000 addi r.5, r.5, 31 // bump length by block size - 1 srwi r.5, r.5, 5 // get number of blocks mflr r.0 // save return address mtctr r.5 // set loop count // // Interrupts MUST be disabled for the duration of this function as // we use srr0 and srr1 which will be destroyed by any exception or // interrupt. // DISABLE_INTERRUPTS(r.12,r.11) // r.11 <- disabled MSR // r.12 <- previous MSR cror 0,0,0 // N.B. 603e/ev errata 15 // // Find ourselves in memory. This is needed as we must disable // both instruction and data translation. We do this while // interrupts are disabled only to try to avoid changing the // Link Register when an unwind might/could occur. // // The HAL is known to be in KSEG0 so its physical address is // its effective address with the top bit stripped off. // bl hspir hspir: mflr r.6 // r.6 <- &hspribc rlwinm r.6, r.6, 0, 0x7fffffff // r.6 &= 0x7fffffff addi r.6, r.6, hspir.real - hspir // r.6 = real &hspribc.real sync // ensure all previous loads and // stores are complete. mtsrr0 r.6 // address in real space // // N.B. It may not be required that Data Relocation be disabled here. // I can't tell from my Arch spec if ICBI works on a Data or // Instruction address. I believe it is probably a Data // address even though it would be sensible for it to be an // instruction address,.... // rlwinm r.11, r.11, 0, ~0x30 // turn off Data and Instr relocation mtsrr1 r.11 rfi // leap to next instruction hspir.real: mtsrr0 r.0 // set return address mtsrr1 r.12 // set old MSR value hspir.loop: icbi 0, r.4 // invalidate i-cache addi r.4, r.4, 32 // point to next block bdnz hspir.loop // jif more to do sync // ensure all translations complete isync // don't even *think* about getting // ahead. rfi // return to caller and translated // mode DUMMY_EXIT(HalpSweepPhysicalIcacheRange)