diff -u --recursive --new-file v2.3.7/linux/Makefile linux/Makefile --- v2.3.7/linux/Makefile Mon Jun 21 11:17:54 1999 +++ linux/Makefile Tue Jun 22 10:45:40 1999 @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 3 -SUBLEVEL = 7 +SUBLEVEL = 8 EXTRAVERSION = ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) diff -u --recursive --new-file v2.3.7/linux/arch/alpha/config.in linux/arch/alpha/config.in --- v2.3.7/linux/arch/alpha/config.in Mon May 10 09:55:21 1999 +++ linux/arch/alpha/config.in Tue Jun 22 10:46:52 1999 @@ -142,6 +142,7 @@ if [ "$CONFIG_ALPHA_CABRIOLET" = "y" -o "$CONFIG_ALPHA_AVANTI" = "y" \ -o "$CONFIG_ALPHA_EB64P" = "y" -o "$CONFIG_ALPHA_JENSEN" = "y" \ + -o "$CONFIG_ALPHA_TAKARA" = "y" -o "$CONFIG_ALPHA_EB164" = "y" \ -o "$CONFIG_ALPHA_MIKASA" = "y" -o "$CONFIG_ALPHA_ALCOR" = "y" \ -o "$CONFIG_ALPHA_SABLE" = "y" -o "$CONFIG_ALPHA_MIATA" = "y" \ -o "$CONFIG_ALPHA_NORITAKE" = "y" -o "$CONFIG_ALPHA_PC164" = "y" \ @@ -166,7 +167,11 @@ define_bool CONFIG_ALPHA_AVANTI y fi -bool 'Symmetric multi-processing support' CONFIG_SMP +if [ "$CONFIG_ALPHA_SABLE" = "y" -o "$CONFIG_ALPHA_RAWHIDE" = "y" \ + -o "$CONFIG_ALPHA_DP264" = "y" -o "$CONFIG_ALPHA_GENERIC" = "y" ] +then + bool 'Symmetric multi-processing support' CONFIG_SMP +fi if [ "$CONFIG_PCI" = "y" ]; then bool 'PCI quirks' CONFIG_PCI_QUIRKS diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/alpha_ksyms.c linux/arch/alpha/kernel/alpha_ksyms.c --- v2.3.7/linux/arch/alpha/kernel/alpha_ksyms.c Fri May 14 12:41:22 1999 +++ linux/arch/alpha/kernel/alpha_ksyms.c Tue Jun 22 10:46:52 1999 @@ -171,8 +171,8 @@ EXPORT_SYMBOL(__global_restore_flags); #if DEBUG_SPINLOCK EXPORT_SYMBOL(spin_unlock); -EXPORT_SYMBOL(spin_lock); -EXPORT_SYMBOL(spin_trylock); +EXPORT_SYMBOL(debug_spin_lock); +EXPORT_SYMBOL(debug_spin_trylock); #endif #if DEBUG_RWLOCK EXPORT_SYMBOL(write_lock); diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/core_cia.c linux/arch/alpha/kernel/core_cia.c --- v2.3.7/linux/arch/alpha/kernel/core_cia.c Sun Sep 6 10:34:33 1998 +++ linux/arch/alpha/kernel/core_cia.c Tue Jun 22 10:46:52 1999 @@ -598,7 +598,7 @@ { CIA_jd = *(vuip)CIA_IOC_CIA_ERR; DBGM(("CIA_pci_clr_err: CIA ERR after read 0x%x\n", CIA_jd)); - *(vuip)CIA_IOC_CIA_ERR = 0x0180; + *(vuip)CIA_IOC_CIA_ERR = CIA_jd; mb(); return 0; } @@ -698,6 +698,10 @@ reason = buf; break; } + mb(); + mb(); /* magic */ + draina(); + cia_pci_clr_err(); wrmces(rdmces()); /* reset machine check pending flag */ mb(); diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/core_mcpcia.c linux/arch/alpha/kernel/core_mcpcia.c --- v2.3.7/linux/arch/alpha/kernel/core_mcpcia.c Sun Sep 6 10:34:33 1998 +++ linux/arch/alpha/kernel/core_mcpcia.c Tue Jun 22 10:54:54 1999 @@ -18,7 +18,6 @@ #include #include #include -#include #define __EXTERN_INLINE inline #include diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/head.S linux/arch/alpha/kernel/head.S --- v2.3.7/linux/arch/alpha/kernel/head.S Fri May 14 12:41:22 1999 +++ linux/arch/alpha/kernel/head.S Tue Jun 22 10:46:52 1999 @@ -54,87 +54,6 @@ .end __smp_callin #endif /* __SMP__ */ - .align 3 - .globl wrent - .ent wrent -wrent: - .prologue 0 - call_pal PAL_wrent - ret ($26) - .end wrent - - .align 3 - .globl wrkgp - .ent wrkgp -wrkgp: - .prologue 0 - call_pal PAL_wrkgp - ret ($26) - .end wrkgp - - .align 3 - .globl wrusp - .ent wrusp -wrusp: - .prologue 0 - call_pal PAL_wrusp - ret ($26) - .end wrusp - - .align 3 - .globl rdusp - .ent rdusp -rdusp: - .prologue 0 - call_pal PAL_rdusp - ret ($26) - .end rdusp - - .align 3 - .globl rdmces - .ent rdmces -rdmces: - .prologue 0 - call_pal PAL_rdmces - ret ($26) - .end rdmces - - .align 3 - .globl wrmces - .ent wrmces -wrmces: - .prologue 0 - call_pal PAL_wrmces - ret ($26) - .end wrmces - - .align 3 - .globl whami - .ent whami -whami: - .prologue 0 - call_pal PAL_whami - ret ($26) - .end whami - - .align 3 - .globl wripir - .ent wripir -wripir: - .prologue 0 - call_pal PAL_wripir - ret ($26) - .end wripir - - .align 3 - .globl wrvptptr - .ent wrvptptr -wrvptptr: - .prologue 0 - call_pal PAL_wrvptptr - ret ($26) - .end wrvptptr - # # The following two functions are needed for supporting SRM PALcode # on the PC164 (at least), since that PALcode manages the interrupt diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/machvec.h linux/arch/alpha/kernel/machvec.h --- v2.3.7/linux/arch/alpha/kernel/machvec.h Sun Jan 10 09:59:54 1999 +++ linux/arch/alpha/kernel/machvec.h Tue Jun 22 10:46:52 1999 @@ -36,7 +36,6 @@ #define DO_EV4_MMU \ max_asn: EV4_MAX_ASN, \ - mmu_context_mask: ~0UL, \ mv_get_mmu_context: ev4_get_mmu_context, \ mv_flush_tlb_current: ev4_flush_tlb_current, \ mv_flush_tlb_other: ev4_flush_tlb_other, \ @@ -44,7 +43,6 @@ #define DO_EV5_MMU \ max_asn: EV5_MAX_ASN, \ - mmu_context_mask: ~0UL, \ mv_get_mmu_context: ev5_get_mmu_context, \ mv_flush_tlb_current: ev5_flush_tlb_current, \ mv_flush_tlb_other: ev5_flush_tlb_other, \ @@ -52,7 +50,6 @@ #define DO_EV6_MMU \ max_asn: EV6_MAX_ASN, \ - mmu_context_mask: 0xfffffffffful, \ mv_get_mmu_context: ev5_get_mmu_context, \ mv_flush_tlb_current: ev5_flush_tlb_current, \ mv_flush_tlb_other: ev5_flush_tlb_other, \ diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/process.c linux/arch/alpha/kernel/process.c --- v2.3.7/linux/arch/alpha/kernel/process.c Sat May 22 13:46:08 1999 +++ linux/arch/alpha/kernel/process.c Tue Jun 22 10:46:52 1999 @@ -329,7 +329,7 @@ p->tss.ksp = (unsigned long) childstack; p->tss.pal_flags = 1; /* set FEN, clear everything else */ p->tss.flags = current->tss.flags; - p->mm->context = 0; + p->tss.mm_context = p->tss.asn = 0; return 0; } diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/proto.h linux/arch/alpha/kernel/proto.h --- v2.3.7/linux/arch/alpha/kernel/proto.h Fri May 14 12:41:23 1999 +++ linux/arch/alpha/kernel/proto.h Tue Jun 22 10:46:52 1999 @@ -180,7 +180,7 @@ extern void wrmces(unsigned long mces); extern void cserve_ena(unsigned long); extern void cserve_dis(unsigned long); -extern void __smp_callin(void); +extern void __smp_callin(unsigned long); /* entry.S */ extern void entArith(void); diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/setup.c linux/arch/alpha/kernel/setup.c --- v2.3.7/linux/arch/alpha/kernel/setup.c Sat Apr 24 17:54:08 1999 +++ linux/arch/alpha/kernel/setup.c Tue Jun 22 10:46:52 1999 @@ -106,6 +106,7 @@ WEAK(alphabook1_mv); WEAK(avanti_mv); WEAK(cabriolet_mv); +WEAK(clipper_mv); WEAK(dp264_mv); WEAK(eb164_mv); WEAK(eb64p_mv); @@ -330,6 +331,10 @@ /* Round it up to an even number of pages. */ high = (high + PAGE_SIZE) & (PAGE_MASK*2); + + /* Enforce maximum of 2GB even if there is more. Blah. */ + if (high > 0x80000000UL) + high = 0x80000000UL; return PAGE_OFFSET + high; } @@ -448,11 +453,11 @@ static struct alpha_machine_vector *tsunami_vecs[] __initlocaldata = { NULL, - &dp264_mv, /* dp164 */ + &dp264_mv, /* dp264 */ &dp264_mv, /* warhol */ &dp264_mv, /* windjammer */ &monet_mv, /* monet */ - &dp264_mv, /* clipper */ + &clipper_mv, /* clipper */ &dp264_mv, /* goldrush */ &webbrick_mv, /* webbrick */ &dp264_mv, /* catamaran */ @@ -537,6 +542,7 @@ &alphabook1_mv, &avanti_mv, &cabriolet_mv, + &clipper_mv, &dp264_mv, &eb164_mv, &eb64p_mv, diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/smp.c linux/arch/alpha/kernel/smp.c --- v2.3.7/linux/arch/alpha/kernel/smp.c Fri May 14 12:41:23 1999 +++ linux/arch/alpha/kernel/smp.c Tue Jun 22 10:46:52 1999 @@ -95,6 +95,8 @@ smp_store_cpu_info(int cpuid) { cpu_data[cpuid].loops_per_sec = loops_per_sec; + cpu_data[cpuid].last_asn + = (cpuid << WIDTH_HARDWARE_ASN) + ASN_FIRST_VERSION; } /* @@ -151,8 +153,8 @@ while (!smp_threads_ready) barrier(); - printk(KERN_INFO "SMP: commencing CPU %d current %p\n", - cpuid, current); + DBGS(("smp_callin: commencing CPU %d current %p\n", + cpuid, current)); /* Do nothing. */ cpu_idle(NULL); @@ -293,9 +295,9 @@ + hwrpb->processor_offset + i * hwrpb->processor_size); - printk(KERN_INFO "recv_secondary_console_msg: on %d from %d" - " HALT_REASON 0x%lx FLAGS 0x%lx\n", - mycpu, i, cpu->halt_reason, cpu->flags); + DBGS(("recv_secondary_console_msg: on %d from %d" + " HALT_REASON 0x%lx FLAGS 0x%lx\n", + mycpu, i, cpu->halt_reason, cpu->flags)); cnt = cpu->ipc_buffer[0] >> 32; if (cnt <= 0 || cnt >= 80) @@ -790,6 +792,11 @@ void smp_send_reschedule(int cpu) { +#if DEBUG_IPI_MSG + if (cpu == hard_smp_processor_id()) + printk(KERN_WARNING + "smp_send_reschedule: Sending IPI to self.\n"); +#endif send_ipi_message(1L << cpu, IPI_RESCHEDULE); } @@ -797,6 +804,10 @@ smp_send_stop(void) { unsigned long to_whom = cpu_present_mask ^ (1L << smp_processor_id()); +#if DEBUG_IPI_MSG + if (hard_smp_processor_id() != boot_cpu_id) + printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n"); +#endif send_ipi_message(to_whom, IPI_CPU_STOP); } @@ -862,13 +873,13 @@ void flush_tlb_all(void) { - tbia(); - /* Although we don't have any data to pass, we do want to synchronize with the other processors. */ if (smp_call_function(ipi_flush_tlb_all, NULL, 1, 1)) { printk(KERN_CRIT "flush_tlb_all: timed out\n"); } + + tbia(); } static void @@ -948,43 +959,21 @@ #if DEBUG_SPINLOCK - -#ifdef MANAGE_SPINLOCK_IPL - -static inline long -spinlock_raise_ipl(spinlock_t * lock) -{ - long min_ipl = lock->target_ipl; - long last_ipl = swpipl(7); - if (last_ipl < 7 && min_ipl < 7) - setipl(min_ipl < last_ipl ? last_ipl : min_ipl); - return last_ipl; -} - -static inline void -spinlock_restore_ipl(long prev) -{ - setipl(prev); -} - -#else - -#define spinlock_raise_ipl(LOCK) ((void)(LOCK), 0) -#define spinlock_restore_ipl(PREV) ((void)(PREV)) - -#endif /* MANAGE_SPINLOCK_IPL */ - void spin_unlock(spinlock_t * lock) { - long old_ipl = lock->saved_ipl; mb(); lock->lock = 0; - spinlock_restore_ipl(old_ipl); + + lock->on_cpu = -1; + lock->previous = NULL; + lock->task = NULL; + lock->base_file = "none"; + lock->line_no = 0; } void -spin_lock(spinlock_t * lock) +debug_spin_lock(spinlock_t * lock, const char *base_file, int line_no) { long tmp; long stuck; @@ -992,7 +981,6 @@ unsigned long started = jiffies; int printed = 0; int cpu = smp_processor_id(); - long old_ipl = spinlock_raise_ipl(lock); stuck = 1L << 28; try_again: @@ -1020,39 +1008,43 @@ if (stuck < 0) { printk(KERN_WARNING - "spinlock stuck at %p(%d) owner %s at %p(%d) st %ld\n", - inline_pc, cpu, lock->task->comm, lock->previous, - lock->task->processor, lock->task->state); + "%s:%d spinlock stuck in %s at %p(%d)" + " owner %s at %p(%d) %s:%d\n", + base_file, line_no, + current->comm, inline_pc, cpu, + lock->task->comm, lock->previous, + lock->on_cpu, lock->base_file, lock->line_no); stuck = 1L << 36; printed = 1; goto try_again; } /* Exiting. Got the lock. */ - lock->saved_ipl = old_ipl; lock->on_cpu = cpu; lock->previous = inline_pc; lock->task = current; + lock->base_file = base_file; + lock->line_no = line_no; if (printed) { - printk(KERN_WARNING "spinlock grabbed at %p(%d) %ld ticks\n", - inline_pc, cpu, jiffies - started); + printk(KERN_WARNING + "%s:%d spinlock grabbed in %s at %p(%d) %ld ticks\n", + base_file, line_no, current->comm, inline_pc, + cpu, jiffies - started); } } int -spin_trylock(spinlock_t * lock) +debug_spin_trylock(spinlock_t * lock, const char *base_file, int line_no) { - long old_ipl = spinlock_raise_ipl(lock); int ret; if ((ret = !test_and_set_bit(0, lock))) { - mb(); - lock->saved_ipl = old_ipl; lock->on_cpu = smp_processor_id(); lock->previous = __builtin_return_address(0); lock->task = current; } else { - spinlock_restore_ipl(old_ipl); + lock->base_file = base_file; + lock->line_no = line_no; } return ret; } diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/sys_dp264.c linux/arch/alpha/kernel/sys_dp264.c --- v2.3.7/linux/arch/alpha/kernel/sys_dp264.c Sun Feb 21 19:06:36 1999 +++ linux/arch/alpha/kernel/sys_dp264.c Tue Jun 22 10:46:52 1999 @@ -2,8 +2,8 @@ * linux/arch/alpha/kernel/sys_dp264.c * * Copyright (C) 1995 David A Rusling - * Copyright (C) 1996 Jay A Estabrook - * Copyright (C) 1998 Richard Henderson + * Copyright (C) 1996, 1999 Jay A Estabrook + * Copyright (C) 1998, 1999 Richard Henderson * * Code supporting the DP264 (EV6+TSUNAMI). */ @@ -35,7 +35,7 @@ #define dev2hose(d) (bus2hose[(d)->bus->number]->pci_hose_index) /* - * HACK ALERT! only CPU#0 is used currently + * HACK ALERT! only the boot cpu is used for interrupts. */ static void @@ -66,34 +66,60 @@ } static void +clipper_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p) +{ + if (irq >= 16) { + volatile unsigned long *csr; + + if (TSUNAMI_bootcpu < 2) + if (!TSUNAMI_bootcpu) + csr = &TSUNAMI_cchip->dim0.csr; + else + csr = &TSUNAMI_cchip->dim1.csr; + else + if (TSUNAMI_bootcpu == 2) + csr = &TSUNAMI_cchip->dim2.csr; + else + csr = &TSUNAMI_cchip->dim3.csr; + + *csr = (~mask >> 16) | (1UL << 55); /* master ISA enable */ + mb(); + *csr; + } + else if (irq >= 8) + outb(mask >> 8, 0xA1); /* ISA PIC2 */ + else + outb(mask, 0x21); /* ISA PIC1 */ +} + +static void dp264_device_interrupt(unsigned long vector, struct pt_regs * regs) { #if 1 printk("dp264_device_interrupt: NOT IMPLEMENTED YET!! \n"); #else - unsigned long pld; - unsigned int i; + unsigned long pld; + unsigned int i; - /* Read the interrupt summary register of TSUNAMI */ - pld = TSUNAMI_cchip->dir0.csr; + /* Read the interrupt summary register of TSUNAMI */ + pld = TSUNAMI_cchip->dir0.csr; - /* - * Now for every possible bit set, work through them and call - * the appropriate interrupt handler. - */ - while (pld) { - i = ffz(~pld); - pld &= pld - 1; /* clear least bit set */ - if (i == 55) { - isa_device_interrupt(vector, regs); - } else { /* if not timer int */ - handle_irq(16 + i, 16 + i, regs); - } + /* + * Now for every possible bit set, work through them and call + * the appropriate interrupt handler. + */ + while (pld) { + i = ffz(~pld); + pld &= pld - 1; /* clear least bit set */ + if (i == 55) + isa_device_interrupt(vector, regs); + else + handle_irq(16 + i, 16 + i, regs); #if 0 TSUNAMI_cchip->dir0.csr = 1UL << i; mb(); tmp = TSUNAMI_cchip->dir0.csr; #endif - } + } #endif } @@ -104,24 +130,48 @@ ack = irq = (vector - 0x800) >> 4; - /* - * The DP264 SRM console reports PCI interrupts with a vector - * 0x100 *higher* than one might expect, as PCI IRQ 0 (ie bit 0) - * shows up as IRQ 16, etc, etc. We adjust it down by 16 to have - * it line up with the actual bit numbers from the DIM registers, - * which is how we manage the interrupts/mask. Sigh... - */ - if (irq >= 32) - ack = irq = irq - 16; + /* + * The SRM console reports PCI interrupts with a vector calculated by: + * + * 0x900 + (0x10 * DRIR-bit) + * + * So bit 16 shows up as IRQ 32, etc. + * + * On DP264/BRICK/MONET, we adjust it down by 16 because at least + * that many of the low order bits of the DRIR are not used, and + * so we don't count them. + */ + if (irq >= 32) + ack = irq = irq - 16; + + handle_irq(irq, ack, regs); +} + +static void +clipper_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +{ + int irq, ack; + + ack = irq = (vector - 0x800) >> 4; + /* + * The SRM console reports PCI interrupts with a vector calculated by: + * + * 0x900 + (0x10 * DRIR-bit) + * + * So bit 16 shows up as IRQ 32, etc. + * + * CLIPPER uses bits 8-47 for PCI interrupts, so we do not need + * to scale down the vector reported, we just use it. + * + * Eg IRQ 24 is DRIR bit 8, etc, etc + */ handle_irq(irq, ack, regs); } static void __init dp264_init_irq(void) { - volatile unsigned long *csr; - outb(0, DMA1_RESET_REG); outb(0, DMA2_RESET_REG); outb(DMA_MODE_CASCADE, DMA2_MODE_REG); @@ -130,23 +180,26 @@ if (alpha_using_srm) alpha_mv.device_interrupt = dp264_srm_device_interrupt; - if (TSUNAMI_bootcpu < 2) - if (!TSUNAMI_bootcpu) - csr = &TSUNAMI_cchip->dim0.csr; - else - csr = &TSUNAMI_cchip->dim1.csr; - else - if (TSUNAMI_bootcpu == 2) - csr = &TSUNAMI_cchip->dim2.csr; - else - csr = &TSUNAMI_cchip->dim3.csr; - - /* Note invert on MASK bits. */ - *csr = ~(alpha_irq_mask); - mb(); - *csr; + dp264_update_irq_hw(16, alpha_irq_mask, 0); - enable_irq(55); /* Enable CYPRESS interrupt controller (ISA). */ + enable_irq(55); /* Enable ISA interrupt controller. */ + enable_irq(2); +} + +static void __init +clipper_init_irq(void) +{ + outb(0, DMA1_RESET_REG); + outb(0, DMA2_RESET_REG); + outb(DMA_MODE_CASCADE, DMA2_MODE_REG); + outb(0, DMA2_MASK_REG); + + if (alpha_using_srm) + alpha_mv.device_interrupt = clipper_srm_device_interrupt; + + clipper_update_irq_hw(16, alpha_irq_mask, 0); + + enable_irq(55); /* Enable ISA interrupt controller. */ enable_irq(2); } @@ -221,7 +274,7 @@ const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5; int irq = COMMON_TABLE_LOOKUP; - if (irq >= 0) + if (irq > 0) irq += 16 * dev2hose(dev); return irq; @@ -250,42 +303,38 @@ { 32, 32, 33, 34, 35}, /* IdSel 13 slot 3 PCI0*/ { 28, 28, 29, 30, 31}, /* IdSel 14 slot 4 PCI2*/ { 24, 24, 25, 26, 27} /* IdSel 15 slot 5 PCI2*/ -}; + }; const long min_idsel = 3, max_idsel = 15, irqs_per_slot = 5; - int irq = COMMON_TABLE_LOOKUP; - - return irq; + return COMMON_TABLE_LOOKUP; } static int __init monet_swizzle(struct pci_dev *dev, int *pinp) { - int slot, pin = *pinp; + int slot, pin = *pinp; - /* Check first for the built-in bridge on hose 1. */ - if (dev2hose(dev) == 1 && PCI_SLOT(dev->bus->self->devfn) == 8) { - slot = PCI_SLOT(dev->devfn); - } - else - { - /* Must be a card-based bridge. */ - do { + /* Check first for the built-in bridge on hose 1. */ + if (dev2hose(dev) == 1 && PCI_SLOT(dev->bus->self->devfn) == 8) { + slot = PCI_SLOT(dev->devfn); + } else { + /* Must be a card-based bridge. */ + do { /* Check for built-in bridge on hose 1. */ - if (dev2hose(dev) == 1 && + if (dev2hose(dev) == 1 && PCI_SLOT(dev->bus->self->devfn) == 8) { slot = PCI_SLOT(dev->devfn); break; - } - pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)) ; + } + pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)) ; - /* Move up the chain of bridges. */ - dev = dev->bus->self; - /* Slot of the next bridge. */ - slot = PCI_SLOT(dev->devfn); - } while (dev->bus->self); - } - *pinp = pin; - return slot; + /* Move up the chain of bridges. */ + dev = dev->bus->self; + /* Slot of the next bridge. */ + slot = PCI_SLOT(dev->devfn); + } while (dev->bus->self); + } + *pinp = pin; + return slot; } static int __init @@ -300,14 +349,34 @@ { 30, 30, 30, 30, 30}, /* IdSel 11 21143 #2 */ { -1, -1, -1, -1, -1}, /* IdSel 12 unused */ { -1, -1, -1, -1, -1}, /* IdSel 13 unused */ - { 47, 47, 46, 45, 44}, /* IdSel 14 slot 0 */ + { 35, 35, 34, 33, 32}, /* IdSel 14 slot 0 */ { 39, 39, 38, 37, 36}, /* IdSel 15 slot 1 */ { 43, 43, 42, 41, 40}, /* IdSel 16 slot 2 */ - { 35, 35, 34, 33, 32}, /* IdSel 17 slot 3 */ -}; + { 47, 47, 46, 45, 44}, /* IdSel 17 slot 3 */ + }; const long min_idsel = 7, max_idsel = 17, irqs_per_slot = 5; + return COMMON_TABLE_LOOKUP; +} + +static int __init +clipper_map_irq(struct pci_dev *dev, int slot, int pin) +{ + static char irq_tab[7][5] __initlocaldata = { + /*INT INTA INTB INTC INTD */ + { 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 1 slot 1 */ + { 16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 2 slot 2 */ + { 16+16, 16+16, 16+17, 16+18, 16+19}, /* IdSel 3 slot 3 */ + { 16+20, 16+20, 16+21, 16+22, 16+23}, /* IdSel 4 slot 4 */ + { 16+24, 16+24, 16+25, 16+26, 16+27}, /* IdSel 5 slot 5 */ + { 16+28, 16+28, 16+29, 16+30, 16+31}, /* IdSel 6 slot 6 */ + { -1, -1, -1, -1, -1} /* IdSel 7 ISA Bridge */ + }; + const long min_idsel = 1, max_idsel = 7, irqs_per_slot = 5; int irq = COMMON_TABLE_LOOKUP; + if (irq > 0) + irq += 16 * dev2hose(dev); + return irq; } @@ -336,6 +405,13 @@ SMC669_Init(0); } +static void __init +clipper_pci_fixup(void) +{ + layout_all_busses(DEFAULT_IO_BASE, DEFAULT_MEM_BASE); + common_pci_fixup(clipper_map_irq, common_swizzle); +} + /* * The System Vectors @@ -407,5 +483,28 @@ pci_fixup: webbrick_pci_fixup, kill_arch: generic_kill_arch, }; -/* No alpha_mv alias for webbrick, since we compile it in unconditionally - with DP264; setup_arch knows how to cope. */ + +struct alpha_machine_vector clipper_mv __initmv = { + vector_name: "Clipper", + DO_EV6_MMU, + DO_DEFAULT_RTC, + DO_TSUNAMI_IO, + DO_TSUNAMI_BUS, + machine_check: tsunami_machine_check, + max_dma_address: ALPHA_MAX_DMA_ADDRESS, + + nr_irqs: 64, + irq_probe_mask: _PROBE_MASK(64), + update_irq_hw: clipper_update_irq_hw, + ack_irq: generic_ack_irq, + device_interrupt: dp264_device_interrupt, + + init_arch: tsunami_init_arch, + init_irq: clipper_init_irq, + init_pit: generic_init_pit, + pci_fixup: clipper_pci_fixup, + kill_arch: generic_kill_arch, +}; + +/* No alpha_mv alias for webbrick/monet/clipper, since we compile them + in unconditionally with DP264; setup_arch knows how to cope. */ diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/sys_takara.c linux/arch/alpha/kernel/sys_takara.c --- v2.3.7/linux/arch/alpha/kernel/sys_takara.c Sun Sep 6 10:34:33 1998 +++ linux/arch/alpha/kernel/sys_takara.c Tue Jun 22 10:46:52 1999 @@ -3,7 +3,7 @@ * * Copyright (C) 1995 David A Rusling * Copyright (C) 1996 Jay A Estabrook - * Copyright (C) 1998 Richard Henderson + * Copyright (C) 1998, 1999 Richard Henderson * * Code supporting the TAKARA. */ @@ -30,11 +30,21 @@ #include "machvec.h" -/* - * WARNING WARNING WARNING - * - * This port is missing an update_irq_hw implementation. - */ +static void +takara_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p) +{ + unsigned int regaddr; + + if (irq <= 15) { + if (irq <= 7) + outb(mask, 0x21); /* ISA PIC1 */ + else + outb(mask >> 8, 0xA1); /* ISA PIC2 */ + } else if (irq <= 31) { + regaddr = 0x510 + ((irq - 16) & 0x0c); + outl((mask >> ((irq - 16) & 0x0c)) & 0xf0000Ul, regaddr); + } +} static void takara_device_interrupt(unsigned long vector, struct pt_regs *regs) @@ -68,28 +78,45 @@ if (intstatus & 4) handle_irq(16+2, 16+2, regs); if (intstatus & 2) handle_irq(16+1, 16+1, regs); if (intstatus & 1) handle_irq(16+0, 16+0, regs); - } else + } else { isa_device_interrupt (vector, regs); + } +} + +static void +takara_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) +{ + int irq = (vector - 0x800) >> 4; + + if (irq > 15) + irq = ((vector - 0x800) >> 6) + 12; + + handle_irq(irq, irq, regs); } static void __init takara_init_irq(void) { - unsigned int ctlreg; - STANDARD_INIT_IRQ_PROLOG; - ctlreg = inl(0x500); - ctlreg &= ~0x8000; /* return to non-accelerated mode */ - outw(ctlreg >> 16, 0x502); - outw(ctlreg & 0xFFFF, 0x500); - ctlreg = 0x05107c00; /* enable the PCI interrupt register */ - outw(ctlreg >> 16, 0x502); - outw(ctlreg & 0xFFFF, 0x500); + if (alpha_using_srm) + alpha_mv.device_interrupt = takara_srm_device_interrupt; + + if (!alpha_using_srm) { + unsigned int ctlreg = inl(0x500); + + /* Return to non-accelerated mode. */ + ctlreg &= ~0x8000; + outl(ctlreg, 0x500); + + /* Enable the PCI interrupt register. */ + ctlreg = 0x05107c00; + outl(ctlreg, 0x500); + } + enable_irq(2); } - /* * The Takara has PCI devices 1, 2, and 3 configured to slots 20, * 19, and 18 respectively, in the default configuration. They can @@ -123,12 +150,35 @@ return COMMON_TABLE_LOOKUP; } +static int __init +takara_swizzle(struct pci_dev *dev, int *pinp) +{ + int slot = PCI_SLOT(dev->devfn); + int pin = *pinp; + unsigned int ctlreg = inl(0x500); + unsigned int busslot = PCI_SLOT(dev->bus->self->devfn); + + /* Check first for built-in bridges. */ + if (busslot > 16 && ((1<<(36-busslot)) & ctlreg)) { + if (pin == 1) + pin += (20 - busslot); + else { + /* Must be a card-based bridge. */ + printk(KERN_WARNING "takara_swizzle: cannot handle " + "card-bridge behind builtin bridge yet.\n"); + } + } + + *pinp = pin; + return slot; +} + static void __init takara_pci_fixup(void) { layout_all_busses(DEFAULT_IO_BASE, DEFAULT_MEM_BASE); - common_pci_fixup(takara_map_irq, common_swizzle); - enable_ide(0x26e); + common_pci_fixup(takara_map_irq, takara_swizzle); + /* enable_ide(0x26e); */ } @@ -147,7 +197,7 @@ nr_irqs: 20, irq_probe_mask: _PROBE_MASK(20), - update_irq_hw: NULL, + update_irq_hw: takara_update_irq_hw, ack_irq: generic_ack_irq, device_interrupt: takara_device_interrupt, diff -u --recursive --new-file v2.3.7/linux/arch/alpha/kernel/time.c linux/arch/alpha/kernel/time.c --- v2.3.7/linux/arch/alpha/kernel/time.c Fri May 14 12:41:23 1999 +++ linux/arch/alpha/kernel/time.c Tue Jun 22 10:46:52 1999 @@ -1,7 +1,7 @@ /* * linux/arch/alpha/kernel/time.c * - * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * Copyright (C) 1991, 1992, 1995, 1999 Linus Torvalds * * This file contains the PC-specific time handling details: * reading the RTC at bootup, etc.. @@ -43,7 +43,7 @@ #include "irq.h" extern rwlock_t xtime_lock; -extern volatile unsigned long lost_ticks; /*kernel/sched.c*/ +extern volatile unsigned long lost_ticks; /* kernel/sched.c */ static int set_rtc_mmss(unsigned long); diff -u --recursive --new-file v2.3.7/linux/arch/alpha/mm/fault.c linux/arch/alpha/mm/fault.c --- v2.3.7/linux/arch/alpha/mm/fault.c Wed Sep 9 08:56:58 1998 +++ linux/arch/alpha/mm/fault.c Tue Jun 22 10:46:52 1999 @@ -7,6 +7,7 @@ #include #include #include +#include #define __EXTERN_INLINE inline #include @@ -28,65 +29,22 @@ extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); -#ifdef __SMP__ -unsigned long last_asn[NR_CPUS] = { /* gag */ - ASN_FIRST_VERSION + (0 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (1 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (2 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (3 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (4 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (5 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (6 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (7 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (8 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (9 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (10 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (11 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (12 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (13 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (14 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (15 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (16 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (17 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (18 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (19 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (20 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (21 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (22 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (23 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (24 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (25 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (26 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (27 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (28 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (29 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (30 << WIDTH_HARDWARE_ASN), - ASN_FIRST_VERSION + (31 << WIDTH_HARDWARE_ASN) -}; -#else -unsigned long asn_cache = ASN_FIRST_VERSION; -#endif /* __SMP__ */ - /* - * Select a new ASN for a task. + * Force a new ASN for a task. */ +#ifndef __SMP__ +unsigned long last_asn = ASN_FIRST_VERSION; +#endif + void get_new_mmu_context(struct task_struct *p, struct mm_struct *mm) { - unsigned long asn = asn_cache; - - if ((asn & HARDWARE_ASN_MASK) < MAX_ASN) - ++asn; - else { - tbiap(); - imb(); - asn = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION; - } - asn_cache = asn; - mm->context = asn; /* full version + asn */ - p->tss.asn = asn & HARDWARE_ASN_MASK; /* just asn */ + unsigned long new = __get_new_mmu_context(p, mm); + p->tss.mm_context = new; + p->tss.asn = new & HARDWARE_ASN_MASK; } + /* * This routine handles page faults. It determines the address, diff -u --recursive --new-file v2.3.7/linux/arch/alpha/mm/init.c linux/arch/alpha/mm/init.c --- v2.3.7/linux/arch/alpha/mm/init.c Fri May 14 12:41:23 1999 +++ linux/arch/alpha/mm/init.c Tue Jun 22 10:46:52 1999 @@ -174,7 +174,7 @@ extern unsigned long free_area_init(unsigned long, unsigned long); -static struct thread_struct * +static inline struct thread_struct * load_PCB(struct thread_struct * pcb) { register unsigned long sp __asm__("$30"); diff -u --recursive --new-file v2.3.7/linux/fs/block_dev.c linux/fs/block_dev.c --- v2.3.7/linux/fs/block_dev.c Mon Jun 21 11:17:58 1999 +++ linux/fs/block_dev.c Tue Jun 22 10:45:40 1999 @@ -125,7 +125,7 @@ buffercount=0; } balance_dirty(dev); - if(write_error) + if (write_error) break; } if ( buffercount ){ diff -u --recursive --new-file v2.3.7/linux/fs/buffer.c linux/fs/buffer.c --- v2.3.7/linux/fs/buffer.c Mon Jun 21 11:17:58 1999 +++ linux/fs/buffer.c Tue Jun 22 14:21:33 1999 @@ -103,10 +103,8 @@ int nref_dirt; /* Dirty buffer threshold for activating bdflush when trying to refill buffers. */ int dummy1; /* unused */ - int age_buffer; /* Time for normal buffer to age before - we flush it */ - int age_super; /* Time for superblock to age before we - flush it */ + int age_buffer; /* Time for normal buffer to age before we flush it */ + int age_super; /* Time for superblock to age before we flush it */ int dummy2; /* unused */ int dummy3; /* unused */ } b_un; @@ -746,21 +744,6 @@ return bh; } -void set_writetime(struct buffer_head * buf, int flag) -{ - int newtime; - - if (buffer_dirty(buf)) { - /* Move buffer to dirty list if jiffies is clear. */ - newtime = jiffies + (flag ? bdf_prm.b_un.age_super : - bdf_prm.b_un.age_buffer); - if(!buf->b_flushtime || buf->b_flushtime > newtime) - buf->b_flushtime = newtime; - } else { - buf->b_flushtime = 0; - } -} - /* * Put a buffer into the appropriate list, without side-effects. */ @@ -778,27 +761,29 @@ * pressures on different devices - thus the (currently unused) * 'dev' parameter. */ +int too_many_dirty_buffers; + void balance_dirty(kdev_t dev) { int dirty = nr_buffers_type[BUF_DIRTY]; int ndirty = bdf_prm.b_un.ndirty; if (dirty > ndirty) { - int wait = 0; - if (dirty > 2*ndirty) - wait = 1; - wakeup_bdflush(wait); + if (dirty > 2*ndirty) { + too_many_dirty_buffers = 1; + wakeup_bdflush(1); + return; + } + wakeup_bdflush(0); } + too_many_dirty_buffers = 0; + return; } -atomic_t too_many_dirty_buffers; - static inline void __mark_dirty(struct buffer_head *bh, int flag) { - set_writetime(bh, flag); + bh->b_flushtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer); refile_buffer(bh); - if (atomic_read(&too_many_dirty_buffers)) - balance_dirty(bh->b_dev); } void __mark_buffer_dirty(struct buffer_head *bh, int flag) @@ -841,9 +826,6 @@ */ void __brelse(struct buffer_head * buf) { - /* If dirty, mark the time this buffer should be written back. */ - set_writetime(buf, 0); - refile_buffer(buf); touch_buffer(buf); if (buf->b_count) { @@ -1401,9 +1383,7 @@ if (!bh->b_blocknr) { err = -EIO; - down(&inode->i_sem); phys = fs_get_block (inode, block, 1, &err, &created); - up(&inode->i_sem); if (!phys) goto out; @@ -1491,9 +1471,7 @@ } if (!bh->b_blocknr) { err = -EIO; - down(&inode->i_sem); phys = fs_get_block (inode, block, 1, &err, &created); - up(&inode->i_sem); if (!phys) goto out; @@ -1505,16 +1483,19 @@ * We also rely on the fact that filesystem holes * cannot be written. */ - if (!created && (start_offset || - (end_bytes && (i == end_block)))) { - bh->b_state = 0; - ll_rw_block(READ, 1, &bh); - lock_kernel(); - wait_on_buffer(bh); - unlock_kernel(); - err = -EIO; - if (!buffer_uptodate(bh)) - goto out; + if (start_offset || (end_bytes && (i == end_block))) { + if (created) { + memset(bh->b_data, 0, bh->b_size); + } else { + bh->b_state = 0; + ll_rw_block(READ, 1, &bh); + lock_kernel(); + wait_on_buffer(bh); + unlock_kernel(); + err = -EIO; + if (!buffer_uptodate(bh)) + goto out; + } } bh->b_state = (1<b_end_io = end_buffer_io_sync; set_bit(BH_Uptodate, &bh->b_state); + created = 0; } err = -EFAULT; + len = blocksize; if (start_offset) { len = start_bytes; start_offset = 0; - } else - if (end_bytes && (i == end_block)) { + } else if (end_bytes && (i == end_block)) { len = end_bytes; end_bytes = 0; - } else { - /* - * Overwritten block. - */ - len = blocksize; } if (copy_from_user(target_buf, buf, len)) goto out; @@ -1549,8 +1526,24 @@ * we dirty buffers only after copying the data into * the page - this way we can dirty the buffer even if * the bh is still doing IO. + * + * NOTE! This also does a direct dirty balace check, + * rather than relying on bdflush just waking up every + * once in a while. This is to catch (and slow down) + * the processes that write tons of buffer.. + * + * Note how we do NOT want to do this in the full block + * case: full pages are flushed not by the people who + * dirtied them, but by people who need memory. And we + * should not penalize them for somebody else writing + * lots of dirty pages. */ - atomic_mark_buffer_dirty(bh,0); + if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { + __atomic_mark_buffer_dirty(bh, bdf_prm.b_un.age_buffer); + if (too_many_dirty_buffers) + balance_dirty(bh->b_dev); + } + skip: i++; block++; @@ -1827,6 +1820,9 @@ tmp = tmp->b_this_page; if (!buffer_busy(p)) continue; + + too_many_dirty_buffers = 1; + wakeup_bdflush(0); return 0; } while (tmp != bh); @@ -2033,8 +2029,6 @@ if (buffer_locked(bh) || !buffer_dirty(bh)) continue; ndirty++; - if(time_before(jiffies, bh->b_flushtime)) - continue; nwritten++; next->b_count++; bh->b_count++; @@ -2102,28 +2096,13 @@ return error; } -/* This is the actual bdflush daemon itself. It used to be started from +/* + * This is the actual bdflush daemon itself. It used to be started from * the syscall above, but now we launch it ourselves internally with - * kernel_thread(...) directly after the first thread in init/main.c */ - -/* To prevent deadlocks for a loop device: - * 1) Do non-blocking writes to loop (avoids deadlock with running - * out of request blocks). - * 2) But do a blocking write if the only dirty buffers are loop buffers - * (otherwise we go into an infinite busy-loop). - * 3) Quit writing loop blocks if a freelist went low (avoids deadlock - * with running out of free buffers for loop's "real" device). -*/ + * kernel_thread(...) directly after the first thread in init/main.c + */ int bdflush(void * unused) { - int i; - int ndirty; - int nlist; - int ncount; - struct buffer_head * bh, *next; - int major; - int wrta_cmd = WRITEA; /* non-blocking write for LOOP */ - /* * We have a bare-bones task_struct, and really should fill * in a few more things so "top" and /proc/2/{exe,root,cwd} @@ -2143,99 +2122,91 @@ lock_kernel(); for (;;) { -#ifdef DEBUG - printk("bdflush() activated..."); -#endif + int nlist; CHECK_EMERGENCY_SYNC - ncount = 0; -#ifdef DEBUG - for(nlist = 0; nlist < NR_LIST; nlist++) -#else for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++) -#endif - { - ndirty = 0; - repeat: - - bh = lru_list[nlist]; - if(bh) - for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; - bh = next) { - /* We may have stalled while waiting for I/O to complete. */ - if(bh->b_list != nlist) goto repeat; - next = bh->b_next_free; - if(!lru_list[nlist]) { - printk("Dirty list empty %d\n", i); - break; - } - - /* Clean buffer on dirty list? Refile it */ - if (nlist == BUF_DIRTY && !buffer_dirty(bh)) { - refile_buffer(bh); - continue; - } - - /* Unlocked buffer on locked list? Refile it */ - if (nlist == BUF_LOCKED && !buffer_locked(bh)) { - refile_buffer(bh); - continue; - } - - if (buffer_locked(bh) || !buffer_dirty(bh)) - continue; - major = MAJOR(bh->b_dev); - /* Should we write back buffers that are shared or not?? - currently dirty buffers are not shared, so it does not matter */ - next->b_count++; - bh->b_count++; - ndirty++; - bh->b_flushtime = 0; - if (major == LOOP_MAJOR) { - ll_rw_block(wrta_cmd,1, &bh); - wrta_cmd = WRITEA; - if (buffer_dirty(bh)) - --ndirty; - } - else - ll_rw_block(WRITE, 1, &bh); -#ifdef DEBUG - if(nlist != BUF_DIRTY) ncount++; -#endif - bh->b_count--; - next->b_count--; - wake_up(&buffer_wait); - } - } -#ifdef DEBUG - if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount); - printk("sleeping again.\n"); -#endif - /* If we didn't write anything, but there are still - * dirty buffers, then make the next write to a - * loop device to be a blocking write. - * This lets us block--which we _must_ do! */ - if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) { - wrta_cmd = WRITE; - continue; + { + int nr; + int written = 0; + struct buffer_head *next; + int major; + + repeat: + next = lru_list[nlist]; + nr = nr_buffers_type[nlist]; + + while (nr-- > 0) { + struct buffer_head *bh = next; + /* We may have stalled while waiting for I/O to complete. */ + if (next->b_list != nlist) + goto repeat; + next = next->b_next_free; + + /* Clean buffer on dirty list? Refile it */ + if (nlist == BUF_DIRTY && !buffer_dirty(bh)) { + refile_buffer(bh); + continue; + } + + /* Unlocked buffer on locked list? Refile it */ + if (nlist == BUF_LOCKED && !buffer_locked(bh)) { + refile_buffer(bh); + continue; + } + + /* + * If we aren't in panic mode, don't write out too much + * at a time. Also, don't write out buffers we don't really + * have to write out yet.. + */ + if (!too_many_dirty_buffers) { + if (written > bdf_prm.b_un.ndirty) + break; + if (time_before(jiffies, bh->b_flushtime)) + continue; + } + + if (buffer_locked(bh) || !buffer_dirty(bh)) + continue; + + major = MAJOR(bh->b_dev); + if (next) + next->b_count++; + bh->b_count++; + written++; + bh->b_flushtime = 0; + + /* + * For the loop major we can try to do asynchronous writes, + * but we have to guarantee that we're making some progress.. + */ + if (major == LOOP_MAJOR && written > 1) { + ll_rw_block(WRITEA, 1, &bh); + if (buffer_dirty(bh)) + --written; + } else + ll_rw_block(WRITE, 1, &bh); + + bh->b_count--; + if (next) + next->b_count--; + wake_up(&buffer_wait); + } } run_task_queue(&tq_disk); wake_up(&bdflush_done); /* * If there are still a lot of dirty buffers around, - * skip the sleep and flush some more + * skip the sleep and flush some more. Otherwise, we + * sleep for a while and mark us as not being in panic + * mode.. */ - if ((ndirty == 0) || (nr_buffers_type[BUF_DIRTY] <= - nr_buffers * bdf_prm.b_un.nfract/100)) { - - atomic_set(&too_many_dirty_buffers, 0); - spin_lock_irq(¤t->sigmask_lock); - flush_signals(current); - spin_unlock_irq(¤t->sigmask_lock); - - interruptible_sleep_on(&bdflush_wait); + if (!too_many_dirty_buffers || nr_buffers_type[BUF_DIRTY] < bdf_prm.b_un.ndirty) { + too_many_dirty_buffers = 0; + sleep_on_timeout(&bdflush_wait, 5*HZ); } } } diff -u --recursive --new-file v2.3.7/linux/fs/ext2/truncate.c linux/fs/ext2/truncate.c --- v2.3.7/linux/fs/ext2/truncate.c Mon Jun 21 11:17:58 1999 +++ linux/fs/ext2/truncate.c Tue Jun 22 13:56:06 1999 @@ -158,7 +158,7 @@ } #define DATA_BUFFER_USED(bh) \ - ((bh->b_count > 1) || buffer_locked(bh)) + (bh->b_count || buffer_locked(bh)) static int trunc_direct (struct inode * inode) { @@ -177,12 +177,11 @@ bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize); if (bh) { - bh->b_count++; if (DATA_BUFFER_USED(bh)) { - brelse(bh); retry = 1; continue; } + bh->b_count++; } *p = 0; @@ -254,12 +253,11 @@ */ bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize); if (bh) { - bh->b_count++; if (DATA_BUFFER_USED(bh)) { - brelse(bh); retry = 1; continue; } + bh->b_count++; } *ind = 0; diff -u --recursive --new-file v2.3.7/linux/fs/locks.c linux/fs/locks.c --- v2.3.7/linux/fs/locks.c Tue May 11 14:37:40 1999 +++ linux/fs/locks.c Tue Jun 22 10:45:40 1999 @@ -512,7 +512,9 @@ while ((fl = *before) != NULL) { if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) { int (*lock)(struct file *, int, struct file_lock *); - lock = filp->f_op->lock; + lock = NULL; + if (filp->f_op) + lock = filp->f_op->lock; if (lock) { file_lock = *fl; file_lock.fl_type = F_UNLCK; diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/atomic.h linux/include/asm-alpha/atomic.h --- v2.3.7/linux/include/asm-alpha/atomic.h Sun Dec 27 15:21:50 1998 +++ linux/include/asm-alpha/atomic.h Tue Jun 22 10:46:52 1999 @@ -75,6 +75,7 @@ " mov %0,%2\n" " stl_c %0,%1\n" " beq %0,2f\n" + " mb\n" ".section .text2,\"ax\"\n" "2: br 1b\n" ".previous" @@ -92,6 +93,7 @@ " mov %0,%2\n" " stl_c %0,%1\n" " beq %0,2f\n" + " mb\n" ".section .text2,\"ax\"\n" "2: br 1b\n" ".previous" diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/bitops.h linux/include/asm-alpha/bitops.h --- v2.3.7/linux/include/asm-alpha/bitops.h Fri May 14 12:41:23 1999 +++ linux/include/asm-alpha/bitops.h Tue Jun 22 10:46:52 1999 @@ -90,6 +90,7 @@ " xor %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,3f\n" + " mb\n" "2:\n" ".section .text2,\"ax\"\n" "3: br 1b\n" @@ -114,6 +115,7 @@ " xor %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,3f\n" + " mb\n" "2:\n" ".section .text2,\"ax\"\n" "3: br 1b\n" @@ -137,6 +139,7 @@ " xor %0,%3,%0\n" " stl_c %0,%1\n" " beq %0,3f\n" + " mb\n" ".section .text2,\"ax\"\n" "3: br 1b\n" ".previous" diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/init.h linux/include/asm-alpha/init.h --- v2.3.7/linux/include/asm-alpha/init.h Fri May 14 12:41:23 1999 +++ linux/include/asm-alpha/init.h Tue Jun 22 10:46:52 1999 @@ -1,6 +1,7 @@ #ifndef _ALPHA_INIT_H #define _ALPHA_INIT_H +#ifndef MODULE #define __init __attribute__ ((__section__ (".text.init"))) #define __initdata __attribute__ ((__section__ (".data.init"))) #define __initfunc(__arginit) \ @@ -11,6 +12,7 @@ #define __INIT .section .text.init,"ax" #define __FINIT .previous #define __INITDATA .section .data.init,"a" +#endif #define __cacheline_aligned __attribute__((__aligned__(32))) diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/io.h linux/include/asm-alpha/io.h --- v2.3.7/linux/include/asm-alpha/io.h Thu May 13 11:00:08 1999 +++ linux/include/asm-alpha/io.h Tue Jun 22 10:46:52 1999 @@ -29,15 +29,16 @@ */ static inline void __set_hae(unsigned long new_hae) { - unsigned long ipl = swpipl(7); + unsigned long flags; + __save_and_cli(flags); alpha_mv.hae_cache = new_hae; *alpha_mv.hae_register = new_hae; mb(); - /* Re-read to make sure it was written. */ new_hae = *alpha_mv.hae_register; - setipl(ipl); + + __restore_flags(flags); } static inline void set_hae(unsigned long new_hae) diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/machvec.h linux/include/asm-alpha/machvec.h --- v2.3.7/linux/include/asm-alpha/machvec.h Sun Dec 27 10:52:10 1998 +++ linux/include/asm-alpha/machvec.h Tue Jun 22 10:46:52 1999 @@ -32,7 +32,6 @@ int rtc_port; int max_asn; unsigned long max_dma_address; - unsigned long mmu_context_mask; unsigned long irq_probe_mask; unsigned long iack_sc; diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/mmu_context.h linux/include/asm-alpha/mmu_context.h --- v2.3.7/linux/include/asm-alpha/mmu_context.h Wed Mar 24 07:41:55 1999 +++ linux/include/asm-alpha/mmu_context.h Tue Jun 22 10:46:52 1999 @@ -49,31 +49,24 @@ # endif #endif -#ifdef __SMP__ -#define WIDTH_THIS_PROCESSOR 5 /* - * last_asn[processor]: + * cpu_last_asn(processor): * 63 0 * +-------------+----------------+--------------+ * | asn version | this processor | hardware asn | * +-------------+----------------+--------------+ */ -extern unsigned long last_asn[]; -#define asn_cache last_asn[p->processor] +#ifdef __SMP__ +#include +#define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) #else -#define WIDTH_THIS_PROCESSOR 0 -/* - * asn_cache: - * 63 0 - * +------------------------------+--------------+ - * | asn version | hardware asn | - * +------------------------------+--------------+ - */ -extern unsigned long asn_cache; +extern unsigned long last_asn; +#define cpu_last_asn(cpuid) last_asn #endif /* __SMP__ */ #define WIDTH_HARDWARE_ASN 8 +#define WIDTH_THIS_PROCESSOR 5 #define ASN_FIRST_VERSION (1UL << (WIDTH_THIS_PROCESSOR + WIDTH_HARDWARE_ASN)) #define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1) @@ -96,20 +89,46 @@ extern void get_new_mmu_context(struct task_struct *p, struct mm_struct *mm); -__EXTERN_INLINE void ev4_get_mmu_context(struct task_struct *p) +static inline unsigned long +__get_new_mmu_context(struct task_struct *p, struct mm_struct *mm) { - /* As described, ASN's are broken. */ + unsigned long asn = cpu_last_asn(smp_processor_id()); + unsigned long next = asn + 1; + + if ((next ^ asn) & ~MAX_ASN) { + tbiap(); + next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION; + } + cpu_last_asn(smp_processor_id()) = next; + mm->context = next; /* full version + asn */ + return next; } -__EXTERN_INLINE void ev5_get_mmu_context(struct task_struct *p) +__EXTERN_INLINE void +ev4_get_mmu_context(struct task_struct *p) +{ + /* As described, ASN's are broken. But we can optimize for + switching between threads -- if the mm is unchanged from + current we needn't flush. */ + if (current->mm != p->mm) + tbiap(); +} + +__EXTERN_INLINE void +ev5_get_mmu_context(struct task_struct *p) { - struct mm_struct * mm = p->mm; + /* Check if our ASN is of an older version, or on a different CPU, + and thus invalid. */ - if (mm) { - unsigned long asn = asn_cache; - /* Check if our ASN is of an older version and thus invalid */ - if ((mm->context ^ asn) & ~HARDWARE_ASN_MASK) - get_new_mmu_context(p, mm); + long asn = cpu_last_asn(smp_processor_id()); + struct mm_struct *mm = p->mm; + long mmc = mm->context; + + if ((p->tss.mm_context ^ asn) & ~HARDWARE_ASN_MASK) { + if ((mmc ^ asn) & ~HARDWARE_ASN_MASK) + mmc = __get_new_mmu_context(p, mm); + p->tss.mm_context = mmc; + p->tss.asn = mmc & HARDWARE_ASN_MASK; } } @@ -123,40 +142,40 @@ # endif #endif -extern inline void init_new_context(struct mm_struct *mm) +extern inline void +init_new_context(struct mm_struct *mm) { mm->context = 0; } -extern inline void destroy_context(struct mm_struct *mm) +extern inline void +destroy_context(struct mm_struct *mm) { /* Nothing to do. */ } +#ifdef __MMU_EXTERN_INLINE +#undef __EXTERN_INLINE +#undef __MMU_EXTERN_INLINE +#endif /* * Force a context reload. This is needed when we change the page * table pointer or when we update the ASN of the current process. */ -#if defined(CONFIG_ALPHA_GENERIC) -#define MASK_CONTEXT(tss) \ - ((struct thread_struct *)((unsigned long)(tss) & alpha_mv.mmu_context_mask)) -#elif defined(CONFIG_ALPHA_DP264) -#define MASK_CONTEXT(tss) \ - ((struct thread_struct *)((unsigned long)(tss) & 0xfffffffffful)) -#else -#define MASK_CONTEXT(tss) (tss) +/* Don't get into trouble with dueling __EXTERN_INLINEs. */ +#ifndef __EXTERN_INLINE +#include #endif -__EXTERN_INLINE struct thread_struct * +extern inline unsigned long __reload_tss(struct thread_struct *tss) { - register struct thread_struct *a0 __asm__("$16"); - register struct thread_struct *v0 __asm__("$0"); - - a0 = MASK_CONTEXT(tss); + register unsigned long a0 __asm__("$16"); + register unsigned long v0 __asm__("$0"); + a0 = virt_to_phys(tss); __asm__ __volatile__( "call_pal %2 #__reload_tss" : "=r"(v0), "=r"(a0) @@ -166,27 +185,22 @@ return v0; } -__EXTERN_INLINE void +extern inline void reload_context(struct task_struct *task) { __reload_tss(&task->tss); } /* - * After we have set current->mm to a new value, this activates the - * context for the new mm so we see the new mappings. + * After setting current->mm to a new value, activate the context for the + * new mm so we see the new mappings. */ -__EXTERN_INLINE void +extern inline void activate_context(struct task_struct *task) { - get_mmu_context(task); + get_new_mmu_context(task, task->mm); reload_context(task); } - -#ifdef __MMU_EXTERN_INLINE -#undef __EXTERN_INLINE -#undef __MMU_EXTERN_INLINE -#endif #endif /* __ALPHA_MMU_CONTEXT_H */ diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/page.h linux/include/asm-alpha/page.h --- v2.3.7/linux/include/asm-alpha/page.h Sun Sep 6 10:34:33 1998 +++ linux/include/asm-alpha/page.h Tue Jun 22 10:46:52 1999 @@ -105,6 +105,15 @@ #define __pgprot(x) (x) #endif /* STRICT_MM_TYPECHECKS */ + +#define BUG() \ +do { \ + printk("Kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + __asm__ __volatile__("call_pal 129 # bugchk"); \ +} while (1) + +#define PAGE_BUG(page) BUG() + #endif /* !ASSEMBLY */ /* to align the pointer to the (next) page boundary */ diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/processor.h linux/include/asm-alpha/processor.h --- v2.3.7/linux/include/asm-alpha/processor.h Tue May 25 14:55:05 1999 +++ linux/include/asm-alpha/processor.h Tue Jun 22 10:46:52 1999 @@ -8,10 +8,10 @@ #define __ASM_ALPHA_PROCESSOR_H /* - * Default implementation of macro that returns current - * instruction pointer ("program counter"). + * Returns current instruction pointer ("program counter"). */ -#define current_text_addr() ({ __label__ _l; _l: &&_l;}) +#define current_text_addr() \ + ({ void *__pc; __asm__ ("br %0,.+4" : "=r"(__pc)); __pc; }) /* * We have a 42-bit user address space: 4TB user VM... @@ -61,6 +61,15 @@ */ unsigned long flags; + /* The full version of the ASN including serial number. + + Two threads running on two different processors must of necessity + have different serial numbers. Having this duplicated from + mm->context allows them to be slightly out of sync preventing + the asn from incrementing each and every time the two threads + are scheduled. */ + unsigned long mm_context; + /* Perform syscall argument validation (get/set_fs). */ mm_segment_t fs; @@ -77,7 +86,7 @@ 0, 0, 0, \ 0, 0, 0, \ 0, 0, 0, \ - 0, \ + 0, 0, \ KERNEL_DS \ } diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/smp.h linux/include/asm-alpha/smp.h --- v2.3.7/linux/include/asm-alpha/smp.h Fri May 14 12:41:23 1999 +++ linux/include/asm-alpha/smp.h Tue Jun 22 10:46:52 1999 @@ -9,6 +9,7 @@ struct cpuinfo_alpha { unsigned long loops_per_sec; + unsigned long last_asn; unsigned long *pgd_cache; unsigned long *pte_cache; unsigned long pgtable_cache_sz; diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/softirq.h linux/include/asm-alpha/softirq.h --- v2.3.7/linux/include/asm-alpha/softirq.h Thu Jun 3 14:32:26 1999 +++ linux/include/asm-alpha/softirq.h Tue Jun 22 10:46:52 1999 @@ -5,18 +5,33 @@ #include #include -/* - * This works but is wrong - on SMP it should disable only on the - * current CPU and shouldn't synchronize like the heavy global - * disable does. Oh, well. - * - * See the x86 version for an example. - */ -#define local_bh_enable() start_bh_atomic() -#define local_bh_disable() end_bh_atomic() - extern unsigned int local_bh_count[NR_CPUS]; +extern inline void cpu_bh_disable(int cpu) +{ + local_bh_count[cpu]++; + mb(); +} + +extern inline void cpu_bh_enable(int cpu) +{ + mb(); + local_bh_count[cpu]--; +} + +extern inline int cpu_bh_trylock(int cpu) +{ + return local_bh_count[cpu] ? 0 : (local_bh_count[cpu] = 1); +} + +extern inline void cpu_bh_endlock(int cpu) +{ + local_bh_count[cpu] = 0; +} + +#define local_bh_enable() cpu_bh_enable(smp_processor_id()) +#define local_bh_disable() cpu_bh_disable(smp_processor_id()) + #define get_active_bhs() (bh_mask & bh_active) static inline void clear_active_bhs(unsigned long x) @@ -43,8 +58,9 @@ extern inline void remove_bh(int nr) { - bh_base[nr] = NULL; bh_mask &= ~(1 << nr); + wmb(); + bh_base[nr] = NULL; } extern inline void mark_bh(int nr) @@ -78,44 +94,39 @@ /* These are for the irq's testing the lock */ static inline int softirq_trylock(int cpu) { - if (!test_and_set_bit(0,&global_bh_count)) { - if (atomic_read(&global_bh_lock) == 0) { - ++local_bh_count[cpu]; - return 1; + if (cpu_bh_trylock(cpu)) { + if (!test_and_set_bit(0, &global_bh_count)) { + if (atomic_read(&global_bh_lock) == 0) + return 1; + clear_bit(0, &global_bh_count); } - clear_bit(0,&global_bh_count); + cpu_bh_endlock(cpu); } return 0; } static inline void softirq_endlock(int cpu) { - local_bh_count[cpu]--; - clear_bit(0,&global_bh_count); + cpu_bh_enable(cpu); + clear_bit(0, &global_bh_count); } #else extern inline void start_bh_atomic(void) { - local_bh_count[smp_processor_id()]++; - barrier(); + local_bh_disable(); } extern inline void end_bh_atomic(void) { - barrier(); - local_bh_count[smp_processor_id()]--; + local_bh_enable(); } /* These are for the irq's testing the lock */ -#define softirq_trylock(cpu) \ - (local_bh_count[cpu] ? 0 : (local_bh_count[cpu] = 1)) - -#define softirq_endlock(cpu) \ - (local_bh_count[cpu] = 0) - -#define synchronize_bh() do { } while (0) +#define softirq_trylock(cpu) cpu_bh_trylock(cpu) +#define softirq_endlock(cpu) cpu_bh_endlock(cpu) +#define synchronize_bh() barrier() #endif /* SMP */ diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/spinlock.h linux/include/asm-alpha/spinlock.h --- v2.3.7/linux/include/asm-alpha/spinlock.h Thu Jun 3 14:25:57 1999 +++ linux/include/asm-alpha/spinlock.h Tue Jun 22 10:46:52 1999 @@ -8,29 +8,47 @@ * and read-write locks.. We should actually do a * with all of this. Oh, well. */ -#define spin_lock_irqsave(lock, flags) do { local_irq_save(flags); spin_lock(lock); } while (0) -#define spin_lock_irq(lock) do { local_irq_disable(); spin_lock(lock); } while (0) -#define spin_lock_bh(lock) do { local_bh_disable(); spin_lock(lock); } while (0) - -#define read_lock_irqsave(lock, flags) do { local_irq_save(flags); read_lock(lock); } while (0) -#define read_lock_irq(lock) do { local_irq_disable(); read_lock(lock); } while (0) -#define read_lock_bh(lock) do { local_bh_disable(); read_lock(lock); } while (0) - -#define write_lock_irqsave(lock, flags) do { local_irq_save(flags); write_lock(lock); } while (0) -#define write_lock_irq(lock) do { local_irq_disable(); write_lock(lock); } while (0) -#define write_lock_bh(lock) do { local_bh_disable(); write_lock(lock); } while (0) - -#define spin_unlock_irqrestore(lock, flags) do { spin_unlock(lock); local_irq_restore(flags); } while (0) -#define spin_unlock_irq(lock) do { spin_unlock(lock); local_irq_enable(); } while (0) -#define spin_unlock_bh(lock) do { spin_unlock(lock); local_bh_enable(); } while (0) - -#define read_unlock_irqrestore(lock, flags) do { read_unlock(lock); local_irq_restore(flags); } while (0) -#define read_unlock_irq(lock) do { read_unlock(lock); local_irq_enable(); } while (0) -#define read_unlock_bh(lock) do { read_unlock(lock); local_bh_enable(); } while (0) - -#define write_unlock_irqrestore(lock, flags) do { write_unlock(lock); local_irq_restore(flags); } while (0) -#define write_unlock_irq(lock) do { write_unlock(lock); local_irq_enable(); } while (0) -#define write_unlock_bh(lock) do { write_unlock(lock); local_bh_enable(); } while (0) +#define spin_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); spin_lock(lock); } while (0) +#define spin_lock_irq(lock) \ + do { local_irq_disable(); spin_lock(lock); } while (0) +#define spin_lock_bh(lock) \ + do { local_bh_disable(); spin_lock(lock); } while (0) + +#define read_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); read_lock(lock); } while (0) +#define read_lock_irq(lock) \ + do { local_irq_disable(); read_lock(lock); } while (0) +#define read_lock_bh(lock) \ + do { local_bh_disable(); read_lock(lock); } while (0) + +#define write_lock_irqsave(lock, flags) \ + do { local_irq_save(flags); write_lock(lock); } while (0) +#define write_lock_irq(lock) \ + do { local_irq_disable(); write_lock(lock); } while (0) +#define write_lock_bh(lock) \ + do { local_bh_disable(); write_lock(lock); } while (0) + +#define spin_unlock_irqrestore(lock, flags) \ + do { spin_unlock(lock); local_irq_restore(flags); } while (0) +#define spin_unlock_irq(lock) \ + do { spin_unlock(lock); local_irq_enable(); } while (0) +#define spin_unlock_bh(lock) \ + do { spin_unlock(lock); local_bh_enable(); } while (0) + +#define read_unlock_irqrestore(lock, flags) \ + do { read_unlock(lock); local_irq_restore(flags); } while (0) +#define read_unlock_irq(lock) \ + do { read_unlock(lock); local_irq_enable(); } while (0) +#define read_unlock_bh(lock) \ + do { read_unlock(lock); local_bh_enable(); } while (0) + +#define write_unlock_irqrestore(lock, flags) \ + do { write_unlock(lock); local_irq_restore(flags); } while (0) +#define write_unlock_irq(lock) \ + do { write_unlock(lock); local_irq_enable(); } while (0) +#define write_unlock_bh(lock) \ + do { write_unlock(lock); local_bh_enable(); } while (0) #ifndef __SMP__ @@ -49,7 +67,7 @@ #define spin_lock_init(lock) ((void) 0) #define spin_lock(lock) ((void) 0) -#define spin_trylock(lock) ((void) 0) +#define spin_trylock(lock) (1) #define spin_unlock_wait(lock) ((void) 0) #define spin_unlock(lock) ((void) 0) @@ -94,19 +112,20 @@ */ typedef struct { - volatile unsigned int lock; + volatile unsigned int lock /*__attribute__((aligned(32))) */; #if DEBUG_SPINLOCK - char debug_state, target_ipl, saved_ipl, on_cpu; + int on_cpu; + int line_no; void *previous; struct task_struct * task; + const char *base_file; #endif } spinlock_t; #if DEBUG_SPINLOCK -#define SPIN_LOCK_UNLOCKED (spinlock_t) {0, 1, 0, 0, 0, 0} +#define SPIN_LOCK_UNLOCKED (spinlock_t) {0, -1, 0, 0, 0, 0} #define spin_lock_init(x) \ - ((x)->lock = 0, (x)->target_ipl = 0, (x)->debug_state = 1, \ - (x)->previous = 0, (x)->task = 0) + ((x)->lock = 0, (x)->on_cpu = -1, (x)->previous = 0, (x)->task = 0) #else #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } #define spin_lock_init(x) ((x)->lock = 0) @@ -120,8 +139,11 @@ #if DEBUG_SPINLOCK extern void spin_unlock(spinlock_t * lock); -extern void spin_lock(spinlock_t * lock); -extern int spin_trylock(spinlock_t * lock); +extern void debug_spin_lock(spinlock_t * lock, const char *, int); +extern int debug_spin_trylock(spinlock_t * lock, const char *, int); + +#define spin_lock(LOCK) debug_spin_lock(LOCK, __BASE_FILE__, __LINE__) +#define spin_trylock(LOCK) debug_spin_trylock(LOCK, __BASE_FILE__, __LINE__) #define spin_lock_own(LOCK, LOCATION) \ do { \ @@ -167,7 +189,9 @@ /***********************************************************/ -typedef struct { volatile int write_lock:1, read_counter:31; } rwlock_t; +typedef struct { + volatile int write_lock:1, read_counter:31; +} /*__attribute__((aligned(32)))*/ rwlock_t; #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } diff -u --recursive --new-file v2.3.7/linux/include/asm-alpha/system.h linux/include/asm-alpha/system.h --- v2.3.7/linux/include/asm-alpha/system.h Thu Jun 3 14:28:25 1999 +++ linux/include/asm-alpha/system.h Tue Jun 22 10:46:52 1999 @@ -86,16 +86,6 @@ unsigned long ld_lock; /* Contents of EV5 LD_LOCK register*/ }; - -extern void wrent(void *, unsigned long); -extern void wrkgp(unsigned long); -extern void wrusp(unsigned long); -extern unsigned long rdusp(void); -extern unsigned long rdmces (void); -extern void wrmces (unsigned long); -extern unsigned long whami(void); -extern void wripir(unsigned long); - extern void halt(void) __attribute__((noreturn)); #define switch_to(prev,next,last) \ @@ -159,73 +149,86 @@ __asm__ ("amask %1,%0" : "=r"(__amask) : "rI"(__input)); \ __amask; }) -static inline unsigned long -wrperfmon(unsigned long perf_fun, unsigned long arg) -{ - register unsigned long __r0 __asm__("$0"); - register unsigned long __r16 __asm__("$16"); - register unsigned long __r17 __asm__("$17"); - __r16 = perf_fun; - __r17 = arg; - __asm__ __volatile__( - "call_pal %1" - : "=r"(__r0) - : "i"(PAL_wrperfmon), "r"(__r16), "r"(__r17) - : "$1", "$22", "$23", "$24", "$25", "$26"); - return __r0; +#define __CALL_PAL_R0(NAME, TYPE) \ +static inline TYPE NAME(void) \ +{ \ + register TYPE __r0 __asm__("$0"); \ + __asm__ __volatile__( \ + "call_pal %1 # " #NAME \ + :"=r" (__r0) \ + :"i" (PAL_ ## NAME) \ + :"$1", "$16", "$22", "$23", "$24", "$25"); \ + return __r0; \ } +#define __CALL_PAL_W1(NAME, TYPE0) \ +static inline void NAME(TYPE0 arg0) \ +{ \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + __asm__ __volatile__( \ + "call_pal %1 # "#NAME \ + : "=r"(__r16) \ + : "i"(PAL_ ## NAME), "0"(__r16) \ + : "$1", "$22", "$23", "$24", "$25"); \ +} -#define call_pal1(palno,arg) \ -({ \ - register unsigned long __r0 __asm__("$0"); \ - register unsigned long __r16 __asm__("$16"); __r16 = arg; \ - __asm__ __volatile__( \ - "call_pal %3 #call_pal1" \ - :"=r" (__r0),"=r" (__r16) \ - :"1" (__r16),"i" (palno) \ - :"$1", "$22", "$23", "$24", "$25", "memory"); \ - __r0; \ -}) - -#define getipl() \ -({ \ - register unsigned long r0 __asm__("$0"); \ - __asm__ __volatile__( \ - "call_pal %1 #getipl" \ - :"=r" (r0) \ - :"i" (PAL_rdps) \ - :"$1", "$16", "$22", "$23", "$24", "$25", "memory"); \ - r0; \ -}) +#define __CALL_PAL_W2(NAME, TYPE0, TYPE1) \ +static inline void NAME(TYPE0 arg0, TYPE1 arg1) \ +{ \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + register TYPE1 __r17 __asm__("$17") = arg1; \ + __asm__ __volatile__( \ + "call_pal %2 # "#NAME \ + : "=r"(__r16), "=r"(__r17) \ + : "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17) \ + : "$1", "$22", "$23", "$24", "$25"); \ +} -#define setipl(ipl) \ -({ \ - register unsigned long __r16 __asm__("$16"); __r16 = (ipl); \ - __asm__ __volatile__( \ - "call_pal %2 #setipl" \ - :"=r" (__r16) \ - :"0" (__r16),"i" (PAL_swpipl) \ - :"$0", "$1", "$22", "$23", "$24", "$25", "memory"); \ -}) +#define __CALL_PAL_RW1(NAME, RTYPE, TYPE0) \ +static inline RTYPE NAME(TYPE0 arg0) \ +{ \ + register RTYPE __r0 __asm__("$0"); \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + __asm__ __volatile__( \ + "call_pal %2 # "#NAME \ + : "=r"(__r16), "=r"(__r0) \ + : "i"(PAL_ ## NAME), "0"(__r16) \ + : "$1", "$22", "$23", "$24", "$25"); \ + return __r0; \ +} -#define swpipl(ipl) \ -({ \ - register unsigned long __r0 __asm__("$0"); \ - register unsigned long __r16 __asm__("$16") = (ipl); \ +#define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1) \ +static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \ +{ \ + register RTYPE __r0 __asm__("$0"); \ + register TYPE0 __r16 __asm__("$16") = arg0; \ + register TYPE1 __r17 __asm__("$17") = arg1; \ __asm__ __volatile__( \ - "call_pal %3 #swpipl" \ - :"=r" (__r0),"=r" (__r16) \ - :"1" (__r16),"i" (PAL_swpipl) \ - :"$1", "$22", "$23", "$24", "$25", "memory"); \ - __r0; \ -}) + "call_pal %3 # "#NAME \ + : "=r"(__r16), "=r"(__r17), "=r"(__r0) \ + : "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17) \ + : "$1", "$22", "$23", "$24", "$25"); \ + return __r0; \ +} -#define __cli() setipl(7) -#define __sti() setipl(0) -#define __save_flags(flags) ((flags) = getipl()) +__CALL_PAL_R0(rdmces, unsigned long); +__CALL_PAL_R0(rdps, unsigned long); +__CALL_PAL_R0(rdusp, unsigned long); +__CALL_PAL_RW1(swpipl, unsigned long, unsigned long); +__CALL_PAL_R0(whami, unsigned long); +__CALL_PAL_W2(wrent, void*, unsigned long); +__CALL_PAL_W1(wripir, unsigned long); +__CALL_PAL_W1(wrkgp, unsigned long); +__CALL_PAL_W1(wrmces, unsigned long); +__CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long); +__CALL_PAL_W1(wrusp, unsigned long); +__CALL_PAL_W1(wrvptptr, unsigned long); + +#define __cli() ((void) swpipl(7)) +#define __sti() ((void) swpipl(0)) +#define __save_flags(flags) ((flags) = rdps()) #define __save_and_cli(flags) ((flags) = swpipl(7)) -#define __restore_flags(flags) setipl(flags) +#define __restore_flags(flags) ((void) swpipl(flags)) #define local_irq_save(flags) __save_and_cli(flags) #define local_irq_restore(flags) __restore_flags(flags) @@ -294,6 +297,7 @@ " bis $31,%3,%1\n" " stl_c %1,%2\n" " beq %1,2f\n" + " mb\n" ".section .text2,\"ax\"\n" "2: br 1b\n" ".previous" @@ -312,6 +316,7 @@ " bis $31,%3,%1\n" " stq_c %1,%2\n" " beq %1,2f\n" + " mb\n" ".section .text2,\"ax\"\n" "2: br 1b\n" ".previous" diff -u --recursive --new-file v2.3.7/linux/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h --- v2.3.7/linux/include/asm-i386/spinlock.h Tue Jun 8 23:03:38 1999 +++ linux/include/asm-i386/spinlock.h Tue Jun 22 14:41:38 1999 @@ -50,7 +50,7 @@ #endif #define spin_lock_init(lock) do { } while(0) -#define spin_lock(lock) do { } while(0) +#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ #define spin_trylock(lock) (1) #define spin_unlock_wait(lock) do { } while(0) #define spin_unlock(lock) do { } while(0) @@ -109,9 +109,9 @@ #define RW_LOCK_UNLOCKED (rwlock_t) { 0 } #endif -#define read_lock(lock) do { } while(0) +#define read_lock(lock) (void)(lock) /* Not "unused variable". */ #define read_unlock(lock) do { } while(0) -#define write_lock(lock) do { } while(0) +#define write_lock(lock) (void)(lock) /* Not "unused variable". */ #define write_unlock(lock) do { } while(0) #else /* __SMP__ */ diff -u --recursive --new-file v2.3.7/linux/include/linux/fs.h linux/include/linux/fs.h --- v2.3.7/linux/include/linux/fs.h Mon Jun 21 11:18:00 1999 +++ linux/include/linux/fs.h Tue Jun 22 14:41:43 1999 @@ -738,7 +738,6 @@ extern struct file *inuse_filps; -extern void set_writetime(struct buffer_head *, int); extern int try_to_free_buffers(struct page *); extern void refile_buffer(struct buffer_head * buf); diff -u --recursive --new-file v2.3.7/linux/include/linux/pagemap.h linux/include/linux/pagemap.h --- v2.3.7/linux/include/linux/pagemap.h Mon Jun 21 11:18:00 1999 +++ linux/include/linux/pagemap.h Tue Jun 22 14:41:52 1999 @@ -76,6 +76,7 @@ extern void __add_page_to_hash_queue(struct page * page, struct page **p); +extern void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset); extern int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash); static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset) diff -u --recursive --new-file v2.3.7/linux/include/linux/swap.h linux/include/linux/swap.h --- v2.3.7/linux/include/linux/swap.h Mon Jun 21 11:18:00 1999 +++ linux/include/linux/swap.h Tue Jun 22 14:41:41 1999 @@ -52,7 +52,6 @@ kdev_t swap_device; struct dentry * swap_file; unsigned short * swap_map; - unsigned char * swap_lockmap; unsigned int lowest_bit; unsigned int highest_bit; unsigned int cluster_next; @@ -85,7 +84,7 @@ extern int try_to_free_pages(unsigned int gfp_mask); /* linux/mm/page_io.c */ -extern void rw_swap_page(int, unsigned long, char *, int); +extern void rw_swap_page(int, struct page *, int); extern void rw_swap_page_nocache(int, unsigned long, char *); extern void rw_swap_page_nolock(int, unsigned long, char *, int); extern void swap_after_unlock_page (unsigned long entry); @@ -97,7 +96,7 @@ /* linux/mm/swap_state.c */ extern void show_swap_cache_info(void); -extern int add_to_swap_cache(struct page *, unsigned long); +extern void add_to_swap_cache(struct page *, unsigned long); extern int swap_duplicate(unsigned long); extern int swap_check_entry(unsigned long); struct page * lookup_swap_cache(unsigned long); @@ -145,13 +144,6 @@ extern unsigned long swap_cache_find_total; extern unsigned long swap_cache_find_success; #endif - -extern inline unsigned long in_swap_cache(struct page *page) -{ - if (PageSwapCache(page)) - return page->offset; - return 0; -} /* * Work out if there are any other processes sharing this page, ignoring diff -u --recursive --new-file v2.3.7/linux/init/main.c linux/init/main.c --- v2.3.7/linux/init/main.c Mon Jun 21 11:18:00 1999 +++ linux/init/main.c Tue Jun 22 10:45:40 1999 @@ -69,7 +69,6 @@ static int init(void *); extern int bdflush(void *); extern int kswapd(void *); -extern int kpiod(void *); extern void kswapd_setup(void); extern void init_IRQ(void); @@ -1304,7 +1303,6 @@ kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); /* Start the background pageout daemon. */ kswapd_setup(); - kernel_thread(kpiod, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); #if CONFIG_AP1000 diff -u --recursive --new-file v2.3.7/linux/kernel/fork.c linux/kernel/fork.c --- v2.3.7/linux/kernel/fork.c Wed May 26 11:15:36 1999 +++ linux/kernel/fork.c Tue Jun 22 13:56:06 1999 @@ -613,7 +613,7 @@ { int i; p->has_cpu = 0; - p->processor = NO_PROC_ID; + p->processor = current->processor; /* ?? should we just memset this ?? */ for(i = 0; i < smp_num_cpus; i++) p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0; diff -u --recursive --new-file v2.3.7/linux/kernel/ksyms.c linux/kernel/ksyms.c --- v2.3.7/linux/kernel/ksyms.c Mon Jun 21 11:18:01 1999 +++ linux/kernel/ksyms.c Tue Jun 22 14:08:14 1999 @@ -355,7 +355,6 @@ EXPORT_SYMBOL(si_meminfo); /* Added to make file system as module */ -EXPORT_SYMBOL(set_writetime); EXPORT_SYMBOL(sys_tz); EXPORT_SYMBOL(__wait_on_super); EXPORT_SYMBOL(file_fsync); diff -u --recursive --new-file v2.3.7/linux/mm/filemap.c linux/mm/filemap.c --- v2.3.7/linux/mm/filemap.c Mon Jun 21 11:18:01 1999 +++ linux/mm/filemap.c Tue Jun 22 14:25:22 1999 @@ -37,29 +37,11 @@ atomic_t page_cache_size = ATOMIC_INIT(0); struct page * page_hash_table[PAGE_HASH_SIZE]; -/* - * Define a request structure for outstanding page write requests - * to the background page io daemon - */ - -struct pio_request -{ - struct pio_request * next; - struct file * file; - unsigned long offset; - unsigned long page; -}; -static struct pio_request *pio_first = NULL, **pio_last = &pio_first; -static kmem_cache_t *pio_request_cache; -static DECLARE_WAIT_QUEUE_HEAD(pio_wait); - spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED; -static inline void -make_pio_request(struct file *, unsigned long, unsigned long); - -void __add_page_to_hash_queue(struct page * page, struct page **p){ +void __add_page_to_hash_queue(struct page * page, struct page **p) +{ atomic_inc(&page_cache_size); if((page->next_hash = *p) != NULL) (*p)->pprev_hash = &page->next_hash; @@ -233,7 +215,7 @@ int shrink_mmap(int priority, int gfp_mask) { static unsigned long clock = 0; - unsigned long limit = num_physpages; + unsigned long limit = num_physpages << 1; struct page * page; int count, users; @@ -264,6 +246,8 @@ if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) continue; + count--; + /* * Some common cases that we just short-circuit without * getting the locks - we need to re-check this once we @@ -308,23 +292,16 @@ /* Is it a buffer page? */ if (page->buffers) { - kdev_t dev = page->buffers->b_dev; spin_unlock(&pagecache_lock); if (try_to_free_buffers(page)) goto made_progress; - if (!atomic_read(&too_many_dirty_buffers)) { - atomic_set(&too_many_dirty_buffers, 1); - balance_dirty(dev); - } - goto unlock_continue; + spin_lock(&pagecache_lock); } /* We can't free pages unless there's just one user */ if (page_count(page) != 2) goto spin_unlock_continue; - count--; - /* * Is it a page swap page? If so, we want to * drop it if it is no longer used, even if it @@ -485,6 +462,13 @@ __add_page_to_hash_queue(page, hash); } +void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset) +{ + spin_lock(&pagecache_lock); + __add_to_page_cache(page, inode, offset, page_hash(inode, offset)); + spin_unlock(&pagecache_lock); +} + int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash) @@ -646,7 +630,6 @@ struct page * __find_lock_page (struct inode * inode, unsigned long offset, struct page **hash) { - int locked; struct page *page; /* @@ -656,16 +639,12 @@ repeat: spin_lock(&pagecache_lock); page = __find_page_nolock(inode, offset, *hash); - locked = 0; - if (page) { + if (page) get_page(page); - if (TryLockPage(page)) - locked = 1; - } spin_unlock(&pagecache_lock); /* Found the page, sleep if locked. */ - if (page && locked) { + if (page && TryLockPage(page)) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -1460,7 +1439,6 @@ { int retval; unsigned long size; - loff_t loff = offset; int (*writepage) (struct file *, struct page *); struct page * page; @@ -1479,15 +1457,8 @@ page = mem_map + MAP_NR(page_addr); lock_page(page); - if (writepage) { - retval = writepage(file, page); - } else { - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); - if (size == file->f_op->write(file, page_addr, size, &loff)) - retval = 0; - set_fs(old_fs); - } + retval = writepage(file, page); + UnlockPage(page); return retval; } @@ -1505,25 +1476,12 @@ file = vma->vm_file; dentry = file->f_dentry; inode = dentry->d_inode; - if (!file->f_op->write) - return -EIO; /* * If a task terminates while we're swapping the page, the vma and * and file could be released ... increment the count to be safe. */ file->f_count++; - - /* - * If this is a swapping operation rather than msync(), then - * leave the actual IO, and the restoration of the file count, - * to the kpiod thread. Just queue the request for now. - */ - if (!wait) { - make_pio_request(file, offset, page); - return 0; - } - result = do_write_page(inode, file, (const char *) page, offset); fput(file); return result; @@ -1535,9 +1493,12 @@ * trying to swap something out and swap something in * at the same time.. */ +extern void wakeup_bdflush(int); int filemap_swapout(struct vm_area_struct * vma, struct page * page) { - return filemap_write_page(vma, page->offset, page_address(page), 0); + int retval = filemap_write_page(vma, page->offset, page_address(page), 0); + wakeup_bdflush(0); + return retval; } static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, @@ -1712,8 +1673,11 @@ struct inode *inode = file->f_dentry->d_inode; ops = &file_private_mmap; - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { + if (!inode->i_op || !inode->i_op->writepage) + return -EINVAL; ops = &file_shared_mmap; + } if (!inode->i_sb || !S_ISREG(inode->i_mode)) return -EACCES; if (!inode->i_op || !inode->i_op->readpage) @@ -1949,127 +1913,4 @@ panic("put_cached_page: page count=%d\n", page_count(page)); page_cache_release(page); -} - - -/* Add request for page IO to the queue */ - -static inline void put_pio_request(struct pio_request *p) -{ - *pio_last = p; - p->next = NULL; - pio_last = &p->next; -} - -/* Take the first page IO request off the queue */ - -static inline struct pio_request * get_pio_request(void) -{ - struct pio_request * p = pio_first; - pio_first = p->next; - if (!pio_first) - pio_last = &pio_first; - return p; -} - -/* Make a new page IO request and queue it to the kpiod thread */ - -static inline void make_pio_request(struct file *file, - unsigned long offset, - unsigned long pageaddr) -{ - struct pio_request *p; - struct page *page; - - page = page_cache_entry(pageaddr); - get_page(page); - - /* - * We need to allocate without causing any recursive IO in the - * current thread's context. We might currently be swapping out - * as a result of an allocation made while holding a critical - * filesystem lock. To avoid deadlock, we *MUST* not reenter - * the filesystem in this thread. - * - * We can wait for kswapd to free memory, or we can try to free - * pages without actually performing further IO, without fear of - * deadlock. --sct - */ - - while ((p = kmem_cache_alloc(pio_request_cache, GFP_BUFFER)) == NULL) { - if (try_to_free_pages(__GFP_WAIT)) - continue; - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/10); - } - - p->file = file; - p->offset = offset; - p->page = pageaddr; - - put_pio_request(p); - wake_up(&pio_wait); -} - - -/* - * This is the only thread which is allowed to write out filemap pages - * while swapping. - * - * To avoid deadlock, it is important that we never reenter this thread. - * Although recursive memory allocations within this thread may result - * in more page swapping, that swapping will always be done by queuing - * another IO request to the same thread: we will never actually start - * that IO request until we have finished with the current one, and so - * we will not deadlock. - */ - -int kpiod(void * unused) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - struct inode * inode; - struct dentry * dentry; - struct pio_request * p; - - tsk->session = 1; - tsk->pgrp = 1; - strcpy(tsk->comm, "kpiod"); - sigfillset(&tsk->blocked); - /* - * Mark this task as a memory allocator - we don't want to get caught - * up in the regular mm freeing frenzy if we have to allocate memory - * in order to write stuff out. - */ - tsk->flags |= PF_MEMALLOC; - - lock_kernel(); - - pio_request_cache = kmem_cache_create("pio_request", - sizeof(struct pio_request), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!pio_request_cache) - panic ("Could not create pio_request slab cache"); - - while (1) { - tsk->state = TASK_INTERRUPTIBLE; - add_wait_queue(&pio_wait, &wait); - if (!pio_first) - schedule(); - remove_wait_queue(&pio_wait, &wait); - tsk->state = TASK_RUNNING; - - while (pio_first) { - p = get_pio_request(); - dentry = p->file->f_dentry; - inode = dentry->d_inode; - - do_write_page(inode, p->file, - (const char *) p->page, p->offset); - fput(p->file); - page_cache_free(p->page); - kmem_cache_free(pio_request_cache, p); - } - } } diff -u --recursive --new-file v2.3.7/linux/mm/page_alloc.c linux/mm/page_alloc.c --- v2.3.7/linux/mm/page_alloc.c Mon Jun 21 11:18:01 1999 +++ linux/mm/page_alloc.c Tue Jun 22 10:50:36 1999 @@ -124,6 +124,9 @@ if (!PageReserved(page) && put_page_testzero(page)) { if (PageSwapCache(page)) PAGE_BUG(page); + if (PageLocked(page)) + PAGE_BUG(page); + page->flags &= ~(1 << PG_referenced); free_pages_ok(page - mem_map, 0); return 1; @@ -140,6 +143,8 @@ if (!PageReserved(map) && put_page_testzero(map)) { if (PageSwapCache(map)) PAGE_BUG(map); + if (PageLocked(map)) + PAGE_BUG(map); map->flags &= ~(1 << PG_referenced); free_pages_ok(map_nr, order); return 1; @@ -368,8 +373,6 @@ if (!swapdev->swap_map[offset]) break; if (swapdev->swap_map[offset] == SWAP_MAP_BAD) - break; - if (test_bit(offset, swapdev->swap_lockmap)) break; /* Ok, do the async read-ahead now */ diff -u --recursive --new-file v2.3.7/linux/mm/page_io.c linux/mm/page_io.c --- v2.3.7/linux/mm/page_io.c Mon Jun 21 11:18:01 1999 +++ linux/mm/page_io.c Tue Jun 22 12:24:29 1999 @@ -35,7 +35,7 @@ * that shared pages stay shared while being swapped. */ -static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait) +static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait, int dolock) { unsigned long type, offset; struct swap_info_struct * p; @@ -84,26 +84,6 @@ return; } - if (PageSwapCache(page)) { - /* Make sure we are the only process doing I/O with this swap page. */ - while (test_and_set_bit(offset,p->swap_lockmap)) { - run_task_queue(&tq_disk); - sleep_on(&lock_queue); - } - - /* - * Make sure that we have a swap cache association for this - * page. We need this to find which swap page to unlock once - * the swap IO has completed to the physical page. If the page - * is not already in the cache, just overload the offset entry - * as if it were: we are not allowed to manipulate the inode - * hashing for locked pages. - */ - if (page->offset != entry) { - printk ("swap entry mismatch"); - return; - } - } if (rw == READ) { ClearPageUptodate(page); kstat.pswpin++; @@ -159,14 +139,6 @@ } } else { printk(KERN_ERR "rw_swap_page: no swap file or device\n"); - /* Do some cleaning up so if this ever happens we can hopefully - * trigger controlled shutdown. - */ - if (PageSwapCache(page)) { - if (!test_and_clear_bit(offset,p->swap_lockmap)) - printk("swap_after_unlock_page: lock already cleared\n"); - wake_up(&lock_queue); - } put_page(page); return; } @@ -174,9 +146,10 @@ set_bit(PG_decr_after, &page->flags); atomic_inc(&nr_async_pages); } - if (PageSwapCache(page)) { + if (dolock) { /* only lock/unlock swap cache pages! */ set_bit(PG_swap_unlock_after, &page->flags); + p->swap_map[offset]++; } set_bit(PG_free_after, &page->flags); @@ -203,93 +176,51 @@ #endif } -/* Note: We could remove this totally asynchronous function, - * and improve swap performance, and remove the need for the swap lock map, - * by not removing pages from the swap cache until after I/O has been - * processed and letting remove_from_page_cache decrement the swap count - * just before it removes the page from the page cache. +/* + * This is run when asynchronous page I/O has completed. + * It decrements the swap bitmap counter */ -/* This is run when asynchronous page I/O has completed. */ -void swap_after_unlock_page (unsigned long entry) +void swap_after_unlock_page(unsigned long entry) { - unsigned long type, offset; - struct swap_info_struct * p; - - type = SWP_TYPE(entry); - if (type >= nr_swapfiles) { - printk("swap_after_unlock_page: bad swap-device\n"); - return; - } - p = &swap_info[type]; - offset = SWP_OFFSET(entry); - if (offset >= p->max) { - printk("swap_after_unlock_page: weirdness\n"); - return; - } - if (!test_and_clear_bit(offset,p->swap_lockmap)) - printk("swap_after_unlock_page: lock already cleared\n"); - wake_up(&lock_queue); + swap_free(entry); } -/* A simple wrapper so the base function doesn't need to enforce - * that all swap pages go through the swap cache! +/* + * A simple wrapper so the base function doesn't need to enforce + * that all swap pages go through the swap cache! We verify that: + * - the page is locked + * - it's marked as being swap-cache + * - it's associated with the swap inode */ -void rw_swap_page(int rw, unsigned long entry, char *buf, int wait) +void rw_swap_page(int rw, struct page *page, int wait) { - struct page *page = mem_map + MAP_NR(buf); + unsigned long entry = page->offset; - if (page->inode && page->inode != &swapper_inode) + if (!PageLocked(page)) PAGE_BUG(page); - - /* - * Make sure that we have a swap cache association for this - * page. We need this to find which swap page to unlock once - * the swap IO has completed to the physical page. If the page - * is not already in the cache, just overload the offset entry - * as if it were: we are not allowed to manipulate the inode - * hashing for locked pages. - */ - if (!PageSwapCache(page)) { - printk("VM: swap page is not in swap cache\n"); - return; - } - if (page->offset != entry) { - printk ("swap entry mismatch"); - return; - } - rw_swap_page_base(rw, entry, page, wait); + if (!PageSwapCache(page)) + PAGE_BUG(page); + if (page->inode != &swapper_inode) + PAGE_BUG(page); + rw_swap_page_base(rw, entry, page, wait, 1); } /* * Setting up a new swap file needs a simple wrapper just to read the * swap signature. SysV shared memory also needs a simple wrapper. */ -void rw_swap_page_nocache(int rw, unsigned long entry, char *buffer) +void rw_swap_page_nocache(int rw, unsigned long entry, char *buf) { - struct page *page; + struct page *page = mem_map + MAP_NR(buf); - page = mem_map + MAP_NR((unsigned long) buffer); - if (TryLockPage(page)) PAGE_BUG(page); - if (test_and_set_bit(PG_swap_cache, &page->flags)) + if (PageSwapCache(page)) PAGE_BUG(page); if (page->inode) PAGE_BUG(page); - get_page(page); /* Protect from shrink_mmap() */ - page->inode = &swapper_inode; page->offset = entry; - rw_swap_page(rw, entry, buffer, 1); - - /* - * and now remove it from the pagecache ... - */ - if (TryLockPage(page)) - PAGE_BUG(page); - PageClearSwapCache(page); - remove_inode_page(page); - page_cache_release(page); - UnlockPage(page); + rw_swap_page_base(rw, entry, page, 1, 1); } /* @@ -298,17 +229,13 @@ * Therefore we can't use it. Later when we can remove the need for the * lock map and we can reduce the number of functions exported. */ -void rw_swap_page_nolock(int rw, unsigned long entry, char *buffer, int wait) +void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait) { - struct page *page = mem_map + MAP_NR((unsigned long) buffer); + struct page *page = mem_map + MAP_NR(buf); - if (!PageLocked(page)) { - printk("VM: rw_swap_page_nolock: page not locked!\n"); - return; - } - if (PageSwapCache(page)) { - printk ("VM: rw_swap_page_nolock: page in swap cache!\n"); - return; - } - rw_swap_page_base(rw, entry, page, wait); + if (!PageLocked(page)) + PAGE_BUG(page); + if (PageSwapCache(page)) + PAGE_BUG(page); + rw_swap_page_base(rw, entry, page, wait, 0); } diff -u --recursive --new-file v2.3.7/linux/mm/swap_state.c linux/mm/swap_state.c --- v2.3.7/linux/mm/swap_state.c Mon Jun 21 11:18:01 1999 +++ linux/mm/swap_state.c Tue Jun 22 14:24:34 1999 @@ -66,7 +66,7 @@ } #endif -int add_to_swap_cache(struct page *page, unsigned long entry) +void add_to_swap_cache(struct page *page, unsigned long entry) { #ifdef SWAP_CACHE_INFO swap_cache_add_total++; @@ -79,19 +79,12 @@ printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx " "on page %08lx\n", page->offset, page_address(page)); - return 0; } if (page->inode) { printk(KERN_ERR "swap_cache: replacing page-cached entry " "on page %08lx\n", page_address(page)); - return 0; } - get_page(page); - page->inode = &swapper_inode; - page->offset = entry; - add_page_to_hash_queue(page, &swapper_inode, entry); - add_page_to_inode_queue(&swapper_inode, page); - return 1; + add_to_page_cache(page, &swapper_inode, entry); } /* @@ -202,21 +195,27 @@ static inline void remove_from_swap_cache(struct page *page) { - if (!page->inode) { + struct inode *inode = page->inode; + + if (!inode) { printk ("VM: Removing swap cache page with zero inode hash " "on page %08lx\n", page_address(page)); return; } - if (page->inode != &swapper_inode) { + if (inode != &swapper_inode) { printk ("VM: Removing swap cache page with wrong inode hash " "on page %08lx\n", page_address(page)); } + if (!PageSwapCache(page)) + PAGE_BUG(page); #ifdef DEBUG_SWAP printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n", page_address(page), page_count(page)); #endif PageClearSwapCache(page); + if (inode->i_op->flushpage) + inode->i_op->flushpage(inode, page, 0); remove_inode_page(page); } @@ -266,8 +265,14 @@ /* * If we are the only user, then free up the swap cache. */ - if (PageSwapCache(page) && !is_page_shared(page)) - delete_from_swap_cache(page); + lock_page(page); + if (PageSwapCache(page) && !is_page_shared(page)) { + long entry = page->offset; + remove_from_swap_cache(page); + swap_free(entry); + page_cache_release(page); + } + UnlockPage(page); __free_page(page); } @@ -351,11 +356,8 @@ /* * Add it to the swap cache and read its contents. */ - if (!add_to_swap_cache(new_page, entry)) - goto out_free_page; - - LockPage(new_page); - rw_swap_page(READ, entry, (char *) new_page_addr, wait); + add_to_swap_cache(new_page, entry); + rw_swap_page(READ, new_page, wait); #ifdef DEBUG_SWAP printk("DebugVM: read_swap_cache_async created " "entry %08lx at %p\n", @@ -370,4 +372,3 @@ out: return found_page; } - diff -u --recursive --new-file v2.3.7/linux/mm/swapfile.c linux/mm/swapfile.c --- v2.3.7/linux/mm/swapfile.c Mon Jun 21 11:18:01 1999 +++ linux/mm/swapfile.c Tue Jun 22 12:17:05 1999 @@ -42,8 +42,6 @@ offset = si->cluster_next++; if (si->swap_map[offset]) continue; - if (test_bit(offset, si->swap_lockmap)) - continue; si->cluster_nr--; goto got_page; } @@ -52,8 +50,6 @@ for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) { if (si->swap_map[offset]) continue; - if (test_bit(offset, si->swap_lockmap)) - continue; si->lowest_bit = offset; got_page: si->swap_map[offset] = 1; @@ -424,8 +420,6 @@ p->swap_device = 0; vfree(p->swap_map); p->swap_map = NULL; - vfree(p->swap_lockmap); - p->swap_lockmap = NULL; p->flags = 0; err = 0; @@ -505,7 +499,6 @@ int lock_map_size = PAGE_SIZE; int nr_good_pages = 0; unsigned long maxpages; - unsigned long tmp_lock_map = 0; int swapfilesize; lock_kernel(); @@ -524,7 +517,6 @@ p->swap_file = NULL; p->swap_device = 0; p->swap_map = NULL; - p->swap_lockmap = NULL; p->lowest_bit = 0; p->highest_bit = 0; p->cluster_nr = 0; @@ -590,9 +582,8 @@ goto bad_swap; } - p->swap_lockmap = (char *) &tmp_lock_map; - rw_swap_page_nocache(READ, SWP_ENTRY(type,0), (char *) swap_header); - p->swap_lockmap = NULL; + lock_page(mem_map + MAP_NR(swap_header)); + rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header, 1); if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) swap_header_version = 1; @@ -689,11 +680,6 @@ goto bad_swap; } p->swap_map[0] = SWAP_MAP_BAD; - if (!(p->swap_lockmap = vmalloc (lock_map_size))) { - error = -ENOMEM; - goto bad_swap; - } - memset(p->swap_lockmap,0,lock_map_size); p->flags = SWP_WRITEOK; p->pages = nr_good_pages; nr_swap_pages += nr_good_pages; @@ -720,15 +706,12 @@ if(filp.f_op && filp.f_op->release) filp.f_op->release(filp.f_dentry->d_inode,&filp); bad_swap_2: - if (p->swap_lockmap) - vfree(p->swap_lockmap); if (p->swap_map) vfree(p->swap_map); dput(p->swap_file); p->swap_device = 0; p->swap_file = NULL; p->swap_map = NULL; - p->swap_lockmap = NULL; p->flags = 0; if (!(swap_flags & SWAP_FLAG_PREFER)) ++least_priority; diff -u --recursive --new-file v2.3.7/linux/mm/vmscan.c linux/mm/vmscan.c --- v2.3.7/linux/mm/vmscan.c Mon Jun 21 11:18:01 1999 +++ linux/mm/vmscan.c Tue Jun 22 14:36:36 1999 @@ -36,31 +36,35 @@ { pte_t pte; unsigned long entry; - unsigned long page; - struct page * page_map; + unsigned long page_addr; + struct page * page; pte = *page_table; if (!pte_present(pte)) - return 0; - page = pte_page(pte); - if (MAP_NR(page) >= max_mapnr) - return 0; - page_map = mem_map + MAP_NR(page); + goto out_failed; + page_addr = pte_page(pte); + if (MAP_NR(page_addr) >= max_mapnr) + goto out_failed; + page = mem_map + MAP_NR(page_addr); - if (pte_young(pte)) { + /* + * Dont be too eager to get aging right if + * memory is dangerously low. + */ + if (!low_on_memory && pte_young(pte)) { /* * Transfer the "accessed" bit from the page * tables to the global page map. */ set_pte(page_table, pte_mkold(pte)); - set_bit(PG_referenced, &page_map->flags); - return 0; + set_bit(PG_referenced, &page->flags); + goto out_failed; } - if (PageReserved(page_map) - || PageLocked(page_map) - || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))) - return 0; + if (PageReserved(page) + || PageLocked(page) + || ((gfp_mask & __GFP_DMA) && !PageDMA(page))) + goto out_failed; /* * Is the page already in the swap cache? If so, then @@ -70,15 +74,15 @@ * Return 0, as we didn't actually free any real * memory, and we should just continue our scan. */ - if (PageSwapCache(page_map)) { - entry = page_map->offset; + if (PageSwapCache(page)) { + entry = page->offset; swap_duplicate(entry); set_pte(page_table, __pte(entry)); drop_pte: vma->vm_mm->rss--; flush_tlb_page(vma, address); - __free_page(page_map); - return 0; + __free_page(page); + goto out_failed; } /* @@ -105,7 +109,7 @@ * locks etc. */ if (!(gfp_mask & __GFP_IO)) - return 0; + goto out_failed; /* * Ok, it's really dirty. That means that @@ -120,7 +124,7 @@ * assume we free'd something. * * NOTE NOTE NOTE! This should just set a - * dirty bit in page_map, and just drop the + * dirty bit in 'page', and just drop the * pte. All the hard work would be done by * shrink_mmap(). * @@ -133,10 +137,9 @@ flush_tlb_page(vma, address); vma->vm_mm->rss--; - if (vma->vm_ops->swapout(vma, page_map)) + if (vma->vm_ops->swapout(vma, page)) kill_proc(pid, SIGBUS, 1); - __free_page(page_map); - return 1; + goto out_free_success; } /* @@ -147,23 +150,25 @@ */ entry = get_swap_page(); if (!entry) - return 0; /* No swap space left */ + goto out_failed; /* No swap space left */ vma->vm_mm->rss--; tsk->nswap++; set_pte(page_table, __pte(entry)); flush_tlb_page(vma, address); swap_duplicate(entry); /* One for the process, one for the swap cache */ - add_to_swap_cache(page_map, entry); - /* We checked we were unlocked way up above, and we - have been careful not to stall until here */ - LockPage(page_map); + + /* This will also lock the page */ + add_to_swap_cache(page, entry); /* OK, do a physical asynchronous write to swap. */ - rw_swap_page(WRITE, entry, (char *) page, 0); + rw_swap_page(WRITE, page, 0); - __free_page(page_map); +out_free_success: + __free_page(page); return 1; +out_failed: + return 0; } /* @@ -490,8 +495,8 @@ if (!do_try_to_free_pages(GFP_KSWAPD)) break; + run_task_queue(&tq_disk); } while (!tsk->need_resched); - run_task_queue(&tq_disk); tsk->state = TASK_INTERRUPTIBLE; schedule_timeout(HZ); }