diff -u --recursive --new-file v2.1.37/linux/CREDITS linux/CREDITS --- v2.1.37/linux/CREDITS Tue May 13 22:40:59 1997 +++ linux/CREDITS Wed May 14 15:01:20 1997 @@ -1242,14 +1242,14 @@ S: Finland N: Eric Schenk -E: schenk@cs.toronto.edu +E: Eric.Schenk@dna.lth.se D: Random kernel debugging. D: SYSV Semaphore code rewrite. D: Network layer debugging. D: Dial on demand facility (diald). -S: 7 Borden Street -S: Toronto, Ontario -S: Canada M5S 2M8 +S: Dag Hammerskjolds v. 3E +S: S-226 64 LUND +S: Sweden N: Peter De Schrijver E: stud11@cc4.kuleuven.ac.be diff -u --recursive --new-file v2.1.37/linux/Documentation/ioctl-number.txt linux/Documentation/ioctl-number.txt --- v2.1.37/linux/Documentation/ioctl-number.txt Sun Apr 13 10:18:20 1997 +++ linux/Documentation/ioctl-number.txt Thu May 15 14:43:52 1997 @@ -1,5 +1,5 @@ Ioctl Numbers -5 Apr 1997 +10 Apr 1997 Michael Chastain @@ -87,6 +87,8 @@ 'W' 28-2F linux/iso16-relay.h in development 'Y' all linux/cyclades.h 'a' all various, see http://lrcwww.epfl.ch/linux-atm/magic.html +'b' 00-3F bit3 vme host bridge in development: + 'c' all linux/comstats.h 'f' all linux/ext2_fs.h 'l' 00-3F linux/tcfs_fs.h in development: @@ -94,7 +96,8 @@ 'm' all linux/mtio.h conflict! 'm' all linux/soundcard.h conflict! 'n' all linux/ncp_fs.h -'p' all linux/mc146818rtc.h +'p' 00-3F linux/mc146818rtc.h +'p' 40-7F linux/nvram.h 'r' all linux/msdos_fs.h 's' all linux/cdk.h 't' 00-7F linux/if_ppp.h @@ -108,8 +111,9 @@ 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range 0x8B all linux/wireless.h 0x8C 00-3F WiNRADiO driver in development: - + 0x90 00 linux/sbpcd.h +0x93 60-7F linux/auto_fs.h 0x99 00-0F 537-Addinboard driver in development: 0xA0 all Small Device Project in development: diff -u --recursive --new-file v2.1.37/linux/Documentation/svga.txt linux/Documentation/svga.txt --- v2.1.37/linux/Documentation/svga.txt Tue Nov 12 03:08:43 1996 +++ linux/Documentation/svga.txt Wed May 14 15:01:20 1997 @@ -1,5 +1,5 @@ - Video Mode Selection Support 2.10 - (c) 1995, 1996 Martin Mares, + Video Mode Selection Support 2.11 + (c) 1995--1997 Martin Mares, -------------------------------------------------------------------------------- 1. Intro @@ -14,8 +14,9 @@ The video mode to be used is selected by a kernel parameter which can be specified in the kernel Makefile (the SVGA_MODE=... line) or by the "vga=..." -option of LILO or by the "vidmode" utility (present in standard Linux utility -packages). You can use the following settings of this parameter: +option of LILO (or some other boot loader you use) or by the "vidmode" utility +(present in standard Linux utility packages). You can use the following values +of this parameter: NORMAL_VGA - Standard 80x25 mode available on all display adapters. @@ -39,8 +40,8 @@ The ASK_VGA mode causes the kernel to offer a video mode menu upon bootup. It displays a "Press to see video modes available, to continue or wait 30 secs" message. If you press , you enter the -menu, if you press or wait 30 seconds, the kernel will boot up with -the standard 80x25 mode set. +menu, if you press or wait 30 seconds, the kernel will boot up in +the standard 80x25 mode. The menu looks like: @@ -51,62 +52,70 @@ 2 0F02 80x43 3 0F03 80x26 .... -Enter mode number: +Enter mode number or `scan': - should contain a name of your video adapter -or the chip in it or at least whether it's an EGA or VGA or VESA VGA (VGA with -a VESA-compliant BIOS in it). If it doesn't match your configuration, tell me -and I'll try to fix it somehow (you know, hardware detection is a real pain -on PC's). + tells what video adapter did Linux detect +-- it's either a generic adapter name (MDA, CGA, HGC, EGA, VGA, VESA VGA [a VGA +with VESA-compliant BIOS]) or a chipset name (e.g., Trident). Direct detection +of chipsets is turned off by default (see CONFIG_VIDEO_SVGA in chapter 4 to see +how to enable it if you really want) as it's inherently unreliable due to +absolutely insane PC design. "0 0F00 80x25" tells that the first menu item (the menu items are numbered from "0" to "9" and from "a" to "z") is a 80x25 mode with ID=0x0f00 (see the -next section for a description of the mode ID's). +next section for a description of mode ID's). encourages you to write the item number or mode ID you wish to set and press . If the computer complains something about "Unknown mode ID", it tries to explain you that it isn't possible to set such -a mode. It's also possible to press only which forces the current -mode to be used. +a mode. It's also possible to press only which leaves the current mode. - The mode list may be a bit inaccurate on your machine (it isn't possible -to autodetect all existing video cards and their mutations). Some of the -modes may be unsettable, some of them might work incorrectly with Linux -(the common case is mirroring of first few lines at the bottom of the screen -because of BIOS bugs) or there can exist modes which are not displayed. If -you think the list doesn't match your configuration, let me know and I'll try -to add your configuration to the next version of the mode selector. + The mode list usually contains only few basic modes and some VESA modes. In +case your chipset has been detected, some chipset-specific modes are shown as +well (some of these might be missing or unusable on your machine as different +BIOSes are often shipped with the same card and the mode numbers depend purely +on the VGA BIOS). The modes displayed on the menu are partially sorted: The list starts with the standard modes (80x25 and 80x50) followed by "special" modes (80x28 and 80x43), local modes (if the local modes feature is enabled), VESA modes and finally SVGA modes for the auto-detected adapter. - If you enter "scan" instead of item number / mode ID, the program will try -to scan your video modes in a slightly aggressive, but much more accurate way. -This should reveal all video modes supported by your BIOS. During this process, -the screen will flash wildly and strange things will appear. If you are afraid -this could damage your monitor, don't use this functions. After scanning, the -mode ordering is a bit different: the auto-detected SVGA modes are not listed -at all and the modes revealed by the scan are shown before the VESA modes. + If you are not happy with the mode list offered (e.g., if you think your card +is able to do more), you can enter "scan" instead of item number / mode ID. The +program will try to ask the BIOS for all possible video mode numbers and test +what happens then. The screen will be probably flashing wildly for some time and +strange noises will be heard from inside the monitor and so on and then, really +all consistent video modes supported by your BIOS will appear (plus maybe some +`ghost modes'). If you are afraid this could damage your monitor, don't use this +function. + + After scanning, the mode ordering is a bit different: the auto-detected SVGA +modes are not listed at all and the modes revealed by `scan' are shown before +all VESA modes. 3. Mode ID's ~~~~~~~~~~~~ Because of the complexity of all the video stuff, the video mode ID's used here are also a bit complex. A video mode ID is a 16-bit number usually -expressed in a hexadecimal notation (starting with "0x"). The ID numbers -can be divided to three regions: +expressed in a hexadecimal notation (starting with "0x"). You can set a mode +by entering its mode directly if you know it even if it isn't shown on the menu. - 0x0000 to 0x00ff - menu item references. 0x0000 is the first item. +The ID numbers can be divided to three regions: + + 0x0000 to 0x00ff - menu item references. 0x0000 is the first item. Don't use + outside the menu as this can change from boot to boot (especially if you + have used the `scan' feature). 0x0100 to 0x017f - standard BIOS modes. The ID is a BIOS video mode number - (as presented to INT 10, function 00) increased by 0x0100. You can - use any mode numbers even if not shown on the menu. + (as presented to INT 10, function 00) increased by 0x0100. 0x0200 to 0x08ff - VESA BIOS modes. The ID is a VESA mode ID increased by 0x0100. All VESA modes should be autodetected and shown on the menu. 0x0900 to 0x09ff - Video7 special modes. Set by calling INT 0x10, AX=0x6f05. + (Usually 940=80x43, 941=132x25, 942=132x44, 943=80x60, 944=100x60, + 945=132x28 for the standard Video7 BIOS) 0x0f00 to 0x0fff - special modes (they are set by various tricks -- usually by modifying one of the standard modes). Currently available: @@ -123,7 +132,9 @@ 0x1000 to 0x7fff - modes specified by resolution. The code has a "0xRRCC" form where RR is a number of rows and CC is a number of columns. E.g., 0x1950 corresponds to a 80x25 mode, 0x2b84 to 132x43 etc. - This is the only fully portable way to refer to a non-standard mode. + This is the only fully portable way to refer to a non-standard mode, + but it relies on the mode being found and displayed on the menu + (remember that mode scanning is not done automatically). 0xff00 to 0xffff - aliases for backward compatibility: 0xffff equivalent to 0x0f00 (standard 80x25) @@ -131,8 +142,9 @@ If you add 0x8000 to the mode ID, the program will try to recalculate vertical display timing according to mode parameters, which can be used to -eliminate some annoying bugs of certain VGA BIOS'es -- mainly extra lines at -the end of the display. +eliminate some annoying bugs of certain VGA BIOS'es (usually those used for +cards with S3 chipsets and old Cirrus Logic BIOSes) -- mainly extra lines at the +end of the display. 4. Options ~~~~~~~~~~ @@ -140,22 +152,27 @@ All of them are simple #define's -- change them to #undef's when you want to switch them off. Currently supported: - CONFIG_VIDEO_SVGA - enables autodetection of SVGA cards. If your card is -detected incorrectly, you can switch this off. + CONFIG_VIDEO_SVGA - enables autodetection of SVGA cards. This is switched +off by default as it's a bit unreliable due to terribly bad PC design. If you +really want to have the adapter autodetected (maybe in case the `scan' feature +doesn't work on your machine), switch this on and don't cry if the results +are not completely sane. In case you really need this feature, please drop me +a mail as I think of removing it some day. CONFIG_VIDEO_VESA - enables autodetection of VESA modes. If it doesn't work on your machine (or displays a "Error: Scanning of VESA modes failed" message), you can switch it off and report as a bug. - CONFIG_VIDEO_COMPACT - enables compacting of the video mode list. Duplicate -entries (those with the same screen size) are deleted except for the first one -(see the previous section for more information on mode ordering). However, -it's possible that the first variant doesn't work, while some of the others do --- in this case turn this switch off to see the rest. + CONFIG_VIDEO_COMPACT - enables compacting of the video mode list. If there +are more modes with the same screen size, only the first one is kept (see above +for more info on mode ordering). However, in very strange cases it's possible +that the first "version" of the mode doesn't work although some of the others +do -- in this case turn this switch off to see the rest. CONFIG_VIDEO_RETAIN - enables retaining of screen contents when switching video modes. Works only with some boot loaders which leave enough room for the -buffer. +buffer. (If you have old LILO, you can adjust heap_end_ptr and loadflags +in setup.S, but it's better to upgrade the boot loader...) CONFIG_VIDEO_LOCAL - enables inclusion of "local modes" in the list. The local modes are added automatically to the beginning of the list not depending @@ -177,25 +194,7 @@ unless you think you know what you're doing. To activate this setup, use mode number 0x0f08 (see section 3). -5. Adding more cards -~~~~~~~~~~~~~~~~~~~~ - If you have a card not detected by the driver and you are a good programmer, -feel free to add it to the source and send me a diff. It's very simple: You -have to add a new entry to the svga_table consisting of a pointer to your mode -table and a pointer to your detection routine. The order of entries in the -svga_table defines the order of probing. Please use only reliable detection -routines which are known to identify _only_ the card in question. - - The detection routine is called with BP pointing to your mode table and -ES containing 0xc000. If you want, you may alter BP allowing to select an -appropriate mode table according to model ID detected. If the detection fails, -return BP=0. - - The mode table consists of lines containing a (BIOS mode number, rows, -columns) triple and is finished by single zero byte followed by NUL-terminated -adapter name. - -6. Still doesn't work? +5. Still doesn't work? ~~~~~~~~~~~~~~~~~~~~~~ When the mode detection doesn't work (e.g., the mode list is incorrect or the machine hangs instead of displaying the menu), try to switch off some of @@ -207,11 +206,11 @@ If you start Linux from the M$-DOS, you might also use some DOS tools for video mode setting. In this case, you must specify the 0x0f04 mode ("leave -current settings") to Linux, because if you use anything other, the 80x25 -mode will be used automatically. +current settings") to Linux, because if you don't and you use any non-standard +mode, Linux will switch to 80x25 automatically. - If you set some SVGA mode and there's one or more extra lines on the -bottom of the display containing already scrolled-out lines, your VGA BIOS + If you set some extended mode and there's one or more extra lines on the +bottom of the display containing already scrolled-out text, your VGA BIOS contains the most common video BIOS bug called "incorrect vertical display end setting". Adding 0x8000 to the mode ID might fix the problem. Unfortunately, this must be done manually -- no autodetection mechanisms are available. @@ -220,7 +219,7 @@ is probably broken and you need to set the CONFIG_VIDEO_400_HACK switch to force setting of the correct mode. -7. History +6. History ~~~~~~~~~~ 1.0 (??-Nov-95) First version supporting all adapters supported by the old setup.S + Cirrus Logic 54XX. Present in some 1.3.4? kernels @@ -266,3 +265,8 @@ - Added the CONFIG_VIDEO_400_HACK switch. - Added the CONFIG_VIDEO_GFX_HACK switch. - Code cleanup. +2.11 (03-May-97)- Yet another cleanup, now including also the documentation. + - Direct testing of SVGA adapters turned off by default, `scan' + offered explicitly on the prompt line. + - Removed the doc section describing adding of new probing + functions as I try to get rid of _all_ hardware probing here. diff -u --recursive --new-file v2.1.37/linux/Makefile linux/Makefile --- v2.1.37/linux/Makefile Tue May 13 22:41:00 1997 +++ linux/Makefile Wed May 14 15:01:20 1997 @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 1 -SUBLEVEL = 37 +SUBLEVEL = 38 ARCH := $(shell uname -m | sed s/i.86/i386/) @@ -181,7 +181,7 @@ $(DRIVERS) \ $(LIBS) \ -o vmlinux - $(NM) vmlinux | grep -v '$compiled$\|$\.o$$$\|$ a $\|$\.\.ng$$$' | sort > System.map + $(NM) vmlinux | grep -v '$compiled$\|$\.o$$$\|$ [aU] $\|$\.\.ng$$$\|$LASH[RL]DI$' | sort > System.map symlinks: rm -f include/asm diff -u --recursive --new-file v2.1.37/linux/arch/alpha/mm/fault.c linux/arch/alpha/mm/fault.c --- v2.1.37/linux/arch/alpha/mm/fault.c Fri Dec 27 02:03:19 1996 +++ linux/arch/alpha/mm/fault.c Wed May 14 13:07:32 1997 @@ -97,7 +97,7 @@ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } - handle_mm_fault(vma, address, cause > 0); + handle_mm_fault(tsk, vma, address, cause > 0); up(&mm->mmap_sem); return; diff -u --recursive --new-file v2.1.37/linux/arch/i386/Makefile linux/arch/i386/Makefile --- v2.1.37/linux/arch/i386/Makefile Tue May 13 22:41:00 1997 +++ linux/arch/i386/Makefile Thu May 15 15:45:12 1997 @@ -85,7 +85,7 @@ @$(MAKEBOOT) BOOTIMAGE=bzImage zdisk install: vmlinux - @$(MAKEBOOT) BOOTIMAGE=zImage install + @$(MAKEBOOT) BOOTIMAGE=bzImage install archclean: @$(MAKEBOOT) clean diff -u --recursive --new-file v2.1.37/linux/arch/i386/boot/video.S linux/arch/i386/boot/video.S --- v2.1.37/linux/arch/i386/boot/video.S Tue Nov 12 03:08:43 1996 +++ linux/arch/i386/boot/video.S Wed May 14 15:01:20 1997 @@ -1,12 +1,13 @@ ! -! Display adapter & video mode setup, version 2.10 (11-Nov-96) +! Display adapter & video mode setup, version 2.11 (03-May-97) ! -! Copyright (C) 1995, 1996 Martin Mares +! Copyright (C) 1995 -- 1997 Martin Mares ! Based on the original setup.S code (C) Linus Torvalds and Mats Anderson ! -! Enable autodetection of SVGA adapters and modes -#define CONFIG_VIDEO_SVGA +! Enable autodetection of SVGA adapters and modes. If you really need this +! feature, drop me a mail as I think of removing it some day... +#undef CONFIG_VIDEO_SVGA ! Enable autodetection of VESA modes #define CONFIG_VIDEO_VESA @@ -1807,11 +1808,11 @@ .ascii "Mode: COLSxROWS:" crlft: db 0x0d, 0x0a, 0 prompt: db 0x0d, 0x0a - .ascii "Enter mode number: " + .ascii "Enter mode number or `scan': " db 0 unknt: .ascii "Unknown mode ID. Try again." db 0 -badmdt: .ascii "You passed an undefined mode number to setup." +badmdt: .ascii "You passed an undefined mode number." db 0x0d, 0x0a, 0 vesaer: .ascii "Error: Scanning of VESA modes failed. Please " .ascii "report to ." diff -u --recursive --new-file v2.1.37/linux/arch/i386/kernel/bios32.c linux/arch/i386/kernel/bios32.c --- v2.1.37/linux/arch/i386/kernel/bios32.c Tue May 13 22:41:00 1997 +++ linux/arch/i386/kernel/bios32.c Wed May 14 15:01:20 1997 @@ -1,6 +1,8 @@ /* * bios32.c - BIOS32, PCI BIOS functions. * + * $Id: bios32.c,v 1.11 1997/05/07 13:35:21 mj Exp $ + * * Sponsored by * iX Multiuser Multitasking Magazine * Hannover, Germany @@ -55,6 +57,8 @@ * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS * (mj@atrey.karlin.mff.cuni.cz) * + * May 7, 1997 : Added some missing cli()'s. [mj] + * */ #include @@ -161,7 +165,7 @@ unsigned long entry; /* %edx */ unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%edi)" : "=a" (return_code), "=b" (address), @@ -204,7 +208,7 @@ if ((pcibios_entry = bios32_service(PCI_SERVICE))) { pci_indirect.address = pcibios_entry | PAGE_OFFSET; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%edi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -249,7 +253,7 @@ unsigned long ret; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__ ("lcall (%%edi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -274,7 +278,7 @@ unsigned short ret; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%edi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -299,7 +303,7 @@ unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -321,7 +325,7 @@ unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -343,7 +347,7 @@ unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -365,7 +369,7 @@ unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -387,7 +391,7 @@ unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -409,7 +413,7 @@ unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -480,7 +484,7 @@ struct pci_dev *dev; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); for (dev = pci_devices; dev; dev = dev->next) { if (dev->class == class_code) { if (curr == index) { @@ -506,7 +510,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); switch (where & 3) { case 0: *value = inb(0xCFC); @@ -527,7 +531,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); if (where & 2) *value = inw(0xCFE); @@ -542,7 +546,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); *value = inl(0xCFC); restore_flags(flags); @@ -554,7 +558,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); outb(value, 0xCFC); restore_flags(flags); @@ -566,7 +570,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); outw(value, 0xCFC); restore_flags(flags); @@ -578,7 +582,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); outl(value, 0xCFC); restore_flags(flags); @@ -614,7 +618,7 @@ if (device_fn & 0x80) return PCIBIOS_DEVICE_NOT_FOUND; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); *value = inb(IOADDR(device_fn,where)); @@ -630,7 +634,7 @@ if (device_fn & 0x80) return PCIBIOS_DEVICE_NOT_FOUND; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); *value = inw(IOADDR(device_fn,where)); @@ -646,7 +650,7 @@ if (device_fn & 0x80) return PCIBIOS_DEVICE_NOT_FOUND; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); *value = inl (IOADDR(device_fn,where)); @@ -660,7 +664,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); outb (value, IOADDR(device_fn,where)); @@ -674,7 +678,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); outw (value, IOADDR(device_fn,where)); @@ -688,7 +692,7 @@ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); outl (value, IOADDR(device_fn,where)); @@ -720,7 +724,7 @@ unsigned int tmp; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); /* * check if configuration type 1 works diff -u --recursive --new-file v2.1.37/linux/arch/i386/kernel/ptrace.c linux/arch/i386/kernel/ptrace.c --- v2.1.37/linux/arch/i386/kernel/ptrace.c Tue May 13 22:41:01 1997 +++ linux/arch/i386/kernel/ptrace.c Wed May 14 13:11:56 1997 @@ -83,7 +83,7 @@ repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (pgd_none(*pgdir)) { - handle_mm_fault(vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } if (pgd_bad(*pgdir)) { @@ -93,7 +93,7 @@ } pgmiddle = pmd_offset(pgdir, addr); if (pmd_none(*pgmiddle)) { - handle_mm_fault(vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } if (pmd_bad(*pgmiddle)) { @@ -103,7 +103,7 @@ } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { - handle_mm_fault(vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } page = pte_page(*pgtable); @@ -134,7 +134,7 @@ repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (!pgd_present(*pgdir)) { - handle_mm_fault(vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } if (pgd_bad(*pgdir)) { @@ -144,7 +144,7 @@ } pgmiddle = pmd_offset(pgdir, addr); if (pmd_none(*pgmiddle)) { - handle_mm_fault(vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } if (pmd_bad(*pgmiddle)) { @@ -154,12 +154,12 @@ } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { - handle_mm_fault(vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } page = pte_page(*pgtable); if (!pte_write(*pgtable)) { - handle_mm_fault(vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } /* this is a hack for non-kernel-mapped video buffers and similar */ diff -u --recursive --new-file v2.1.37/linux/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- v2.1.37/linux/arch/i386/kernel/setup.c Tue May 13 22:41:01 1997 +++ linux/arch/i386/kernel/setup.c Thu May 15 15:45:12 1997 @@ -325,6 +325,8 @@ sep_bug = CD(have_cpuid) && (CD(x86_capability) & 0x800) && + !memcmp(x86_vendor_id, "GenuineIntel", 12) && + CD(x86) == 6 && CD(x86_model) < 3 && CD(x86_mask) < 3; diff -u --recursive --new-file v2.1.37/linux/arch/i386/kernel/signal.c linux/arch/i386/kernel/signal.c --- v2.1.37/linux/arch/i386/kernel/signal.c Tue May 13 22:41:01 1997 +++ linux/arch/i386/kernel/signal.c Thu May 15 12:24:24 1997 @@ -392,10 +392,12 @@ case SIGQUIT: case SIGILL: case SIGTRAP: case SIGABRT: case SIGFPE: case SIGSEGV: + lock_kernel(); if (current->binfmt && current->binfmt->core_dump) { if (current->binfmt->core_dump(signr, regs)) signr |= 0x80; } + unlock_kernel(); /* fall through */ default: spin_lock_irq(¤t->sigmask_lock); diff -u --recursive --new-file v2.1.37/linux/arch/i386/lib/semaphore.S linux/arch/i386/lib/semaphore.S --- v2.1.37/linux/arch/i386/lib/semaphore.S Tue May 13 22:41:01 1997 +++ linux/arch/i386/lib/semaphore.S Wed May 14 16:00:05 1997 @@ -22,14 +22,13 @@ popl %eax /* restore %eax */ ret +/* Don't save/restore %eax, because that will be our return value */ ENTRY(__down_failed_interruptible) - pushl %eax /* save %eax */ pushl %edx /* save %edx */ pushl %ecx /* save %ecx (and argument) */ call SYMBOL_NAME(__down_interruptible) popl %ecx /* restore %ecx (count on __down_interruptible not changing it) */ popl %edx /* restore %edx */ - popl %eax /* restore %eax */ ret ENTRY(__up_wakeup) diff -u --recursive --new-file v2.1.37/linux/arch/i386/mm/fault.c linux/arch/i386/mm/fault.c --- v2.1.37/linux/arch/i386/mm/fault.c Tue May 13 22:41:01 1997 +++ linux/arch/i386/mm/fault.c Wed May 14 13:07:13 1997 @@ -49,7 +49,7 @@ start &= PAGE_MASK; for (;;) { - handle_mm_fault(vma, start, 1); + handle_mm_fault(current,vma, start, 1); if (!size) break; size--; @@ -142,7 +142,7 @@ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - handle_mm_fault(vma, address, write); + handle_mm_fault(tsk, vma, address, write); up(&mm->mmap_sem); /* * Did it hit the DOS screen memory VA from vm86 mode? diff -u --recursive --new-file v2.1.37/linux/arch/sparc/kernel/irq.c linux/arch/sparc/kernel/irq.c --- v2.1.37/linux/arch/sparc/kernel/irq.c Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc/kernel/irq.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: irq.c,v 1.72 1997/04/20 11:41:26 ecd Exp $ +/* $Id: irq.c,v 1.75 1997/05/08 20:57:37 davem Exp $ * arch/sparc/kernel/irq.c: Interrupt request handling routines. On the * Sparc the IRQ's are basically 'cast in stone' * and you are supposed to probe the prom's device @@ -313,8 +313,180 @@ /* Global IRQ locking depth. */ atomic_t global_irq_count = ATOMIC_INIT(0); +#ifdef DEBUG_IRQLOCK + +static unsigned long previous_irqholder; + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if (!--stuck) {printk("wait_on_irq CPU#%d stuck at %08lx, waiting for %08lx (local=%d, global=%d)\n", cpu, where, previous_irqholder, local_count, atomic_read(&global_irq_count)); stuck = INIT_STUCK; } + +static inline void wait_on_irq(int cpu, unsigned long where) +{ + int stuck = INIT_STUCK; + int local_count = local_irq_count[cpu]; + + /* Are we the only one in an interrupt context? */ + while (local_count != atomic_read(&global_irq_count)) { + /* + * No such luck. Now we need to release the lock, + * _and_ release our interrupt context, because + * otherwise we'd have dead-locks and live-locks + * and other fun things. + */ + atomic_sub(local_count, &global_irq_count); + spin_unlock(&global_irq_lock); + + /* + * Wait for everybody else to go away and release + * their things before trying to get the lock again. + */ + for (;;) { + STUCK; + if (atomic_read(&global_irq_count)) + continue; + if (*((unsigned char *)&global_irq_lock)) + continue; + if (spin_trylock(&global_irq_lock)) + break; + } + atomic_add(local_count, &global_irq_count); + } +} + +#undef INIT_STUCK +#define INIT_STUCK 10000000 + +#undef STUCK +#define STUCK \ +if (!--stuck) {printk("get_irqlock stuck at %08lx, waiting for %08lx\n", where, previous_irqholder); stuck = INIT_STUCK;} + +static inline void get_irqlock(int cpu, unsigned long where) +{ + int stuck = INIT_STUCK; + + if (!spin_trylock(&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + do { + do { + STUCK; + barrier(); + } while (*((unsigned char *)&global_irq_lock)); + } while (!spin_trylock(&global_irq_lock)); + } + /* + * Ok, we got the lock bit. + * But that's actually just the easy part.. Now + * we need to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu, where); + + /* + * Finally. + */ + global_irq_holder = cpu; + previous_irqholder = where; +} + +void __global_cli(void) +{ + int cpu = smp_processor_id(); + unsigned long where; + + __asm__("mov %%i7, %0" : "=r" (where)); + __cli(); + get_irqlock(cpu, where); +} + +void __global_sti(void) +{ + release_irqlock(smp_processor_id()); + __sti(); +} + +unsigned long __global_save_flags(void) +{ + return global_irq_holder == (unsigned char) smp_processor_id(); +} + +void __global_restore_flags(unsigned long flags) +{ + if(flags & 1) { + __global_cli(); + } else { + /* release_irqlock() */ + if(global_irq_holder == smp_processor_id()) { + global_irq_holder = NO_PROC_ID; + spin_unlock(&global_irq_lock); + } + if(!(flags & 2)) + __sti(); + } +} + +#undef INIT_STUCK +#define INIT_STUCK 200000000 + +#undef STUCK +#define STUCK \ +if (!--stuck) {printk("irq_enter stuck (irq=%d, cpu=%d, global=%d)\n",irq,cpu,global_irq_holder); stuck = INIT_STUCK;} + +#define VERBOSE_IRQLOCK_DEBUGGING + +void irq_enter(int cpu, int irq, void *_opaque) +{ +#ifdef VERBOSE_IRQLOCK_DEBUGGING + extern void smp_show_backtrace_all_cpus(void); +#endif + int stuck = INIT_STUCK; + + hardirq_enter(cpu); + barrier(); + while (*((unsigned char *)&global_irq_lock)) { + if ((unsigned char) cpu == global_irq_holder) { + struct pt_regs *regs = _opaque; + int sbh_cnt = atomic_read(&__sparc_bh_counter); + int globl_locked = *((unsigned char *)&global_irq_lock); + int globl_icount = atomic_read(&global_irq_count); + int local_count = local_irq_count[cpu]; + unsigned long pc = regs->pc; + + /* It is very important that we load the state variables + * before we do the first call to printk() as printk() + * could end up changing them... + */ + + printk("CPU[%d]: BAD! Local IRQ's enabled, global disabled " + "interrupt at PC[%08lx]\n", cpu, pc); + printk("CPU[%d]: bhcnt[%d] glocked[%d] gicnt[%d] licnt[%d]\n", + cpu, sbh_cnt, globl_locked, globl_icount, local_count); +#ifdef VERBOSE_IRQLOCK_DEBUGGING + printk("Performing backtrace on all cpus, write this down!\n"); + smp_show_backtrace_all_cpus(); +#endif + break; + } + STUCK; + barrier(); + } +} + +void irq_exit(int cpu, int irq) +{ + hardirq_exit(cpu); + release_irqlock(cpu); +} + +#endif /* DEBUG_IRQLOCK */ + /* There has to be a better way. */ -/* XXX Must write faster version in irqlock.S -DaveM */ void synchronize_irq(void) { int cpu = smp_processor_id(); @@ -371,7 +543,7 @@ if(irq < 10) smp_irq_rotate(cpu); #endif - irq_enter(cpu, cpu_irq); + irq_enter(cpu, cpu_irq, regs); action = *(cpu_irq + irq_action); kstat.interrupts[cpu_irq]++; do { @@ -392,7 +564,7 @@ int cpu = smp_processor_id(); disable_pil_irq(irq); - irq_enter(cpu, irq); + irq_enter(cpu, irq, regs); floppy_interrupt(irq, dev_id, regs); irq_exit(cpu, irq); enable_pil_irq(irq); diff -u --recursive --new-file v2.1.37/linux/arch/sparc/kernel/process.c linux/arch/sparc/kernel/process.c --- v2.1.37/linux/arch/sparc/kernel/process.c Tue May 13 22:41:03 1997 +++ linux/arch/sparc/kernel/process.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: process.c,v 1.96 1997/05/01 08:53:33 davem Exp $ +/* $Id: process.c,v 1.98 1997/05/14 20:44:54 davem Exp $ * linux/arch/sparc/kernel/process.c * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -41,6 +41,8 @@ extern void fpsave(unsigned long *, unsigned long *, void *, unsigned long *); +struct task_struct *current_set[NR_CPUS] = {&init_task, }; + #ifndef __SMP__ #define SUN4C_FAULT_HIGH 100 @@ -191,6 +193,37 @@ rw->ins[0], rw->ins[1], rw->ins[2], rw->ins[3], rw->ins[4], rw->ins[5], rw->ins[6], rw->ins[7]); } + +static spinlock_t sparc_backtrace_lock = SPIN_LOCK_UNLOCKED; + +void show_backtrace(void) +{ + struct reg_window *rw; + unsigned long flags; + unsigned long fp; + int cpu = smp_processor_id(); + + spin_lock_irqsave(&sparc_backtrace_lock, flags); + __asm__ __volatile__("mov %%i6, %0" : "=r" (fp)); + rw = (struct reg_window *) fp; + while(rw) { + printk("CPU[%d]: ARGS[%08lx,%08lx,%08lx,%08lx,%08lx,%08lx] " + "FP[%08lx] CALLER[%08lx]\n", cpu, + rw->ins[0], rw->ins[1], rw->ins[2], rw->ins[3], + rw->ins[4], rw->ins[5], + rw->ins[6], + rw->ins[7]); + rw = (struct reg_window *) rw->ins[6]; + } + spin_unlock_irqrestore(&sparc_backtrace_lock, flags); +} + +#ifdef __SMP__ +void smp_show_backtrace_all_cpus(void) +{ + xc0((smpfunc_t) show_backtrace); +} +#endif void show_stackframe(struct sparc_stackf *sf) { diff -u --recursive --new-file v2.1.37/linux/arch/sparc/kernel/ptrace.c linux/arch/sparc/kernel/ptrace.c --- v2.1.37/linux/arch/sparc/kernel/ptrace.c Tue May 13 22:41:03 1997 +++ linux/arch/sparc/kernel/ptrace.c Thu May 15 14:43:52 1997 @@ -41,7 +41,7 @@ repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (pgd_none(*pgdir)) { - do_no_page(tsk, vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } if (pgd_bad(*pgdir)) { @@ -51,7 +51,7 @@ } pgmiddle = pmd_offset(pgdir, addr); if (pmd_none(*pgmiddle)) { - do_no_page(tsk, vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } if (pmd_bad(*pgmiddle)) { @@ -61,7 +61,7 @@ } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { - do_no_page(tsk, vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } page = pte_page(*pgtable); @@ -94,7 +94,7 @@ repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (!pgd_present(*pgdir)) { - do_no_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } if (pgd_bad(*pgdir)) { @@ -104,7 +104,7 @@ } pgmiddle = pmd_offset(pgdir, addr); if (pmd_none(*pgmiddle)) { - do_no_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } if (pmd_bad(*pgmiddle)) { @@ -114,12 +114,12 @@ } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { - do_no_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } page = pte_page(*pgtable); if (!pte_write(*pgtable)) { - do_wp_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } /* this is a hack for non-kernel-mapped video buffers and similar */ diff -u --recursive --new-file v2.1.37/linux/arch/sparc/kernel/setup.c linux/arch/sparc/kernel/setup.c --- v2.1.37/linux/arch/sparc/kernel/setup.c Mon Apr 14 16:28:07 1997 +++ linux/arch/sparc/kernel/setup.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.83 1997/04/01 02:21:49 davem Exp $ +/* $Id: setup.c,v 1.84 1997/05/08 17:45:16 davem Exp $ * linux/arch/sparc/kernel/setup.c * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -83,7 +83,8 @@ #ifdef __SMP__ global_irq_holder = NO_PROC_ID; - global_irq_lock = global_bh_lock = 0; + *((unsigned char *)&global_irq_lock) = 0; + *((unsigned char *)&global_bh_lock) = 0; #endif __save_and_cli(flags); __asm__ __volatile__("rd %%tbr, %0\n\t" : "=r" (prom_tbr)); diff -u --recursive --new-file v2.1.37/linux/arch/sparc/kernel/signal.c linux/arch/sparc/kernel/signal.c --- v2.1.37/linux/arch/sparc/kernel/signal.c Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc/kernel/signal.c Thu May 15 14:43:52 1997 @@ -1,4 +1,4 @@ -/* $Id: signal.c,v 1.73 1997/04/16 05:56:05 davem Exp $ +/* $Id: signal.c,v 1.74 1997/05/15 19:57:09 davem Exp $ * linux/arch/sparc/kernel/signal.c * * Copyright (C) 1991, 1992 Linus Torvalds @@ -780,8 +780,10 @@ case SIGQUIT: case SIGILL: case SIGTRAP: case SIGABRT: case SIGFPE: case SIGSEGV: case SIGBUS: if(current->binfmt && current->binfmt->core_dump) { + lock_kernel(); if(current->binfmt->core_dump(signr, regs)) signr |= 0x80; + unlock_kernel(); } #ifdef DEBUG_SIGNALS /* Very useful to debug dynamic linker problems */ diff -u --recursive --new-file v2.1.37/linux/arch/sparc/kernel/smp.c linux/arch/sparc/kernel/smp.c --- v2.1.37/linux/arch/sparc/kernel/smp.c Tue May 13 22:41:03 1997 +++ linux/arch/sparc/kernel/smp.c Wed May 14 15:01:20 1997 @@ -38,6 +38,9 @@ extern void calibrate_delay(void); +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; + volatile int smp_processors_ready = 0; unsigned long cpu_present_map = 0; @@ -388,12 +391,6 @@ unsigned long processors_out[NR_CPUS]; /* Set when ipi exited. */ } ccall_info; -/* Returns failure code if for example any of the cpu's failed to respond - * within a certain timeout period. - */ - -#define CCALL_TIMEOUT 5000000 /* enough for initial testing */ - static spinlock_t cross_call_lock = SPIN_LOCK_UNLOCKED; /* Cross calls must be serialized, at least currently. */ @@ -465,43 +462,98 @@ void smp_flush_cache_mm(struct mm_struct *mm) { - if(mm->context != NO_CONTEXT) - xc1((smpfunc_t) local_flush_cache_mm, (unsigned long) mm); + if(mm->context != NO_CONTEXT) { + if(mm->cpu_vm_mask == (1 << smp_processor_id())) + local_flush_cache_mm(mm); + else + xc1((smpfunc_t) local_flush_cache_mm, (unsigned long) mm); + } } void smp_flush_tlb_mm(struct mm_struct *mm) { - if(mm->context != NO_CONTEXT) - xc1((smpfunc_t) local_flush_tlb_mm, (unsigned long) mm); + if(mm->context != NO_CONTEXT) { + if(mm->cpu_vm_mask == (1 << smp_processor_id())) { + local_flush_tlb_mm(mm); + } else { + xc1((smpfunc_t) local_flush_tlb_mm, (unsigned long) mm); + if(mm->count == 1 && current->mm == mm) + mm->cpu_vm_mask = (1 << smp_processor_id()); + } + } } void smp_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end) { - if(mm->context != NO_CONTEXT) - xc3((smpfunc_t) local_flush_cache_range, (unsigned long) mm, - start, end); + if(mm->context != NO_CONTEXT) { + if(mm->cpu_vm_mask == (1 << smp_processor_id())) + local_flush_cache_range(mm, start, end); + else + xc3((smpfunc_t) local_flush_cache_range, (unsigned long) mm, + start, end); + } } void smp_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) { - if(mm->context != NO_CONTEXT) - xc3((smpfunc_t) local_flush_tlb_range, (unsigned long) mm, - start, end); + if(mm->context != NO_CONTEXT) { + if(mm->cpu_vm_mask == (1 << smp_processor_id())) + local_flush_tlb_range(mm, start, end); + else + xc3((smpfunc_t) local_flush_tlb_range, (unsigned long) mm, + start, end); + } } void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) -{ xc2((smpfunc_t) local_flush_cache_page, (unsigned long) vma, page); } +{ + struct mm_struct *mm = vma->vm_mm; + + if(mm->context != NO_CONTEXT) { + if(mm->cpu_vm_mask == (1 << smp_processor_id())) + local_flush_cache_page(vma, page); + else + xc2((smpfunc_t) local_flush_cache_page, + (unsigned long) vma, page); + } +} void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) -{ xc2((smpfunc_t) local_flush_tlb_page, (unsigned long) vma, page); } +{ + struct mm_struct *mm = vma->vm_mm; + + if(mm->context != NO_CONTEXT) { + if(mm->cpu_vm_mask == (1 << smp_processor_id())) + local_flush_tlb_page(vma, page); + else + xc2((smpfunc_t) local_flush_tlb_page, (unsigned long) vma, page); + } +} void smp_flush_page_to_ram(unsigned long page) -{ xc1((smpfunc_t) local_flush_page_to_ram, page); } +{ + /* Current theory is that those who call this are the one's + * who have just dirtied their cache with the pages contents + * in kernel space, therefore we only run this on local cpu. + * + * XXX This experiment failed, research further... -DaveM + */ +#if 1 + xc1((smpfunc_t) local_flush_page_to_ram, page); +#else + local_flush_page_to_ram(page); +#endif +} void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) -{ xc2((smpfunc_t) local_flush_sig_insns, (unsigned long) mm, insn_addr); } +{ + if(mm->cpu_vm_mask == (1 << smp_processor_id())) + local_flush_sig_insns(mm, insn_addr); + else + xc2((smpfunc_t) local_flush_sig_insns, (unsigned long) mm, insn_addr); +} /* Reschedule call back. */ void smp_reschedule_irq(void) diff -u --recursive --new-file v2.1.37/linux/arch/sparc/kernel/sparc_ksyms.c linux/arch/sparc/kernel/sparc_ksyms.c --- v2.1.37/linux/arch/sparc/kernel/sparc_ksyms.c Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc/kernel/sparc_ksyms.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: sparc_ksyms.c,v 1.56 1997/04/18 05:44:35 davem Exp $ +/* $Id: sparc_ksyms.c,v 1.59 1997/05/08 17:45:20 davem Exp $ * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -38,6 +38,7 @@ #include #endif #include +#include struct poll { int fd; @@ -50,9 +51,9 @@ extern unsigned long sunos_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); void _sigpause_common (unsigned int set, struct pt_regs *); -extern void __copy_1page(void *, const void *); +extern void (*__copy_1page)(void *, const void *); extern void __memmove(void *, const void *, __kernel_size_t); -extern void *bzero_1page(void *); +extern void (*bzero_1page)(void *); extern void *__bzero(void *, size_t); extern void *__memscan_zero(void *, size_t); extern void *__memscan_generic(void *, int, size_t); @@ -87,16 +88,46 @@ EXPORT_SYMBOL_PRIVATE(_lock_kernel); EXPORT_SYMBOL_PRIVATE(_unlock_kernel); EXPORT_SYMBOL_PRIVATE(_spinlock_waitfor); +#ifdef SPIN_LOCK_DEBUG +EXPORT_SYMBOL(_spin_lock); +EXPORT_SYMBOL(_spin_unlock); +EXPORT_SYMBOL(_spin_trylock); +EXPORT_SYMBOL(_spin_lock_irq); +EXPORT_SYMBOL(_spin_unlock_irq); +EXPORT_SYMBOL(_spin_lock_irqsave); +EXPORT_SYMBOL(_spin_unlock_irqrestore); +EXPORT_SYMBOL(_read_lock); +EXPORT_SYMBOL(_read_unlock); +EXPORT_SYMBOL(_read_lock_irq); +EXPORT_SYMBOL(_read_unlock_irq); +EXPORT_SYMBOL(_read_lock_irqsave); +EXPORT_SYMBOL(_read_unlock_irqrestore); +EXPORT_SYMBOL(_write_lock); +EXPORT_SYMBOL(_write_unlock); +EXPORT_SYMBOL(_write_lock_irq); +EXPORT_SYMBOL(_write_unlock_irq); +EXPORT_SYMBOL(_write_lock_irqsave); +EXPORT_SYMBOL(_write_unlock_irqrestore); +#else EXPORT_SYMBOL_PRIVATE(_rw_read_enter); EXPORT_SYMBOL_PRIVATE(_rw_read_exit); EXPORT_SYMBOL_PRIVATE(_rw_write_enter); +#endif EXPORT_SYMBOL(__sparc_bh_counter); #ifdef __SMP__ +#ifdef DEBUG_IRQLOCK +EXPORT_SYMBOL(irq_enter); +EXPORT_SYMBOL(irq_exit); +EXPORT_SYMBOL(__global_restore_flags); +EXPORT_SYMBOL(__global_sti); +EXPORT_SYMBOL(__global_cli); +#else EXPORT_SYMBOL_PRIVATE(_irq_enter); EXPORT_SYMBOL_PRIVATE(_irq_exit); EXPORT_SYMBOL_PRIVATE(_global_restore_flags); EXPORT_SYMBOL_PRIVATE(_global_sti); EXPORT_SYMBOL_PRIVATE(_global_cli); +#endif #endif EXPORT_SYMBOL(page_offset); diff -u --recursive --new-file v2.1.37/linux/arch/sparc/lib/Makefile linux/arch/sparc/lib/Makefile --- v2.1.37/linux/arch/sparc/lib/Makefile Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc/lib/Makefile Wed May 14 15:01:20 1997 @@ -1,13 +1,11 @@ -# $Id: Makefile,v 1.23 1997/04/18 05:44:39 davem Exp $ +# $Id: Makefile,v 1.24 1997/05/08 17:45:26 davem Exp $ # Makefile for Sparc library files.. # -CFLAGS := $(CFLAGS) -ansi - OBJS = mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o memcpy.o memset.o \ strlen.o checksum.o blockops.o memscan.o memcmp.o strncmp.o \ strncpy_from_user.o divdi3.o udivdi3.o strlen_user.o \ - copy_user.o locks.o atomic.o bitops.o + copy_user.o locks.o atomic.o bitops.o debuglocks.o ifdef SMP OBJS += irqlock.o diff -u --recursive --new-file v2.1.37/linux/arch/sparc/lib/blockops.S linux/arch/sparc/lib/blockops.S --- v2.1.37/linux/arch/sparc/lib/blockops.S Sat Nov 9 00:12:00 1996 +++ linux/arch/sparc/lib/blockops.S Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: blockops.S,v 1.5 1996/09/24 05:22:56 davem Exp $ +/* $Id: blockops.S,v 1.6 1997/05/03 02:01:54 davem Exp $ * blockops.S: Common block zero optimized routines. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -46,25 +46,7 @@ .text .align 4 - .globl C_LABEL(bzero_2page), C_LABEL(bzero_1page) -C_LABEL(bzero_2page): - /* %o0 = buf */ - or %g0, %g0, %g1 - or %o0, %g0, %o1 - or %g0, 0x20, %g2 -1: - BLAST_BLOCK(%o0, 0x00) - BLAST_BLOCK(%o0, 0x40) - BLAST_BLOCK(%o0, 0x80) - BLAST_BLOCK(%o0, 0xc0) - subcc %g2, 1, %g2 - bne 1b - add %o0, 0x100, %o0 - - retl - mov %o1, %o0 - -C_LABEL(bzero_1page): +generic_bzero_1page: /* %o0 = buf */ or %g0, %g0, %g1 or %o0, %g0, %o1 @@ -79,10 +61,9 @@ add %o0, 0x100, %o0 retl - mov %o1, %o0 + nop - .globl C_LABEL(__copy_1page) -C_LABEL(__copy_1page): +__generic_copy_1page: /* %o0 = dst, %o1 = src */ or %g0, 0x10, %g1 1: @@ -101,3 +82,9 @@ retl nop + + .data + .align 4 + .globl C_LABEL(bzero_1page), C_LABEL(__copy_1page) +C_LABEL(bzero_1page): .word generic_bzero_1page +C_LABEL(__copy_1page): .word __generic_copy_1page diff -u --recursive --new-file v2.1.37/linux/arch/sparc/lib/debuglocks.c linux/arch/sparc/lib/debuglocks.c --- v2.1.37/linux/arch/sparc/lib/debuglocks.c Wed Dec 31 16:00:00 1969 +++ linux/arch/sparc/lib/debuglocks.c Wed May 14 15:01:20 1997 @@ -0,0 +1,463 @@ +/* $Id: debuglocks.c,v 1.1 1997/05/08 18:13:34 davem Exp $ + * debuglocks.c: Debugging versions of SMP locking primitives. + * + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + */ + +#include +#include +#include +#include +#include + +/* To enable this code, just define SPIN_LOCK_DEBUG in asm/spinlock.h */ +#ifdef SPIN_LOCK_DEBUG + +/* Some notes on how these debugging routines work. When a lock is acquired + * an extra debugging member lock->owner_pc is set to the caller of the lock + * acquisition routine. Right before releasing a lock, the debugging program + * counter is cleared to zero. + * + * Furthermore, since PC's are 4 byte aligned on Sparc, we stuff the CPU + * number of the owner in the lowest two bits. + */ + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("spin_lock(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); stuck = INIT_STUCK; } + +void _spin_lock(spinlock_t *lock) +{ + unsigned long caller; + unsigned long val; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); +again: + __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); + if(val) { + while(lock->lock) { + STUCK; + barrier(); + } + goto again; + } + lock->owner_pc = (cpu & 3) | (caller & ~3); +} + +int _spin_trylock(spinlock_t *lock) +{ + unsigned long val; + unsigned long caller; + int cpu = smp_processor_id(); + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); + if(!val) { + /* We got it, record our identity for debugging. */ + lock->owner_pc = (cpu & 3) | (caller & ~3); + } + return val == 0; +} + +void _spin_unlock(spinlock_t *lock) +{ + lock->owner_pc = 0; + __asm__ __volatile__("stb %%g0, [%0]" : : "r" (&(lock->lock)) : "memory"); +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("spin_lock_irq(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); stuck = INIT_STUCK; } + +void _spin_lock_irq(spinlock_t *lock) +{ + unsigned long caller; + unsigned long val; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __cli(); + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); +again: + __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); + if(val) { + while(lock->lock) { + STUCK; + barrier(); + } + goto again; + } + lock->owner_pc = (cpu & 3) | (caller & ~3); +} + +void _spin_unlock_irq(spinlock_t *lock) +{ + lock->owner_pc = 0; + __asm__ __volatile__("stb %%g0, [%0]" : : "r" (&(lock->lock)) : "memory"); + __sti(); +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("spin_lock_irq(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", lock, cpu, caller, lock->owner_pc & ~3, lock->owner_pc & 3); stuck = INIT_STUCK; } + +/* Caller macro does __save_and_cli(flags) for us. */ +void _spin_lock_irqsave(spinlock_t *lock) +{ + unsigned long caller; + unsigned long val; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); +again: + __asm__ __volatile__("ldstub [%1], %0" : "=r" (val) : "r" (&(lock->lock))); + if(val) { + while(lock->lock) { + STUCK; + barrier(); + } + goto again; + } + lock->owner_pc = (cpu & 3) | (caller & ~3); +} + +void _spin_unlock_irqrestore(spinlock_t *lock) +{ + lock->owner_pc = 0; + __asm__ __volatile__("stb %%g0, [%0]" : : "r" (&(lock->lock)) : "memory"); +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("read_lock(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +void _read_lock(rwlock_t *rw) +{ + unsigned long flags; + unsigned long caller; + unsigned long val; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + __save_and_cli(flags); +wlock_again: + __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff) { + STUCK; + barrier(); + } + goto wlock_again; + } +clock_again: + __asm__ __volatile__("ldstub [%1 + 2], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff00) { + STUCK; + barrier(); + } + goto clock_again; + } + (*((unsigned short *)&rw->lock))++; + barrier(); + (*(((unsigned short *)&rw->lock)+1)) = 0; + __restore_flags(flags); +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("read_unlock(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +void _read_unlock(rwlock_t *rw) +{ + unsigned long flags, val, caller; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + __save_and_cli(flags); +clock_again: + __asm__ __volatile__("ldstub [%1 + 2], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff00) { + STUCK; + barrier(); + } + goto clock_again; + } + (*((unsigned short *)&rw->lock))--; + barrier(); + (*(((unsigned char *)&rw->lock)+2))=0; + __restore_flags(flags); +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("write_lock(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +void _write_lock(rwlock_t *rw) +{ + unsigned long flags, val, caller; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + __save_and_cli(flags); +wlock_again: + __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff) { + STUCK; + barrier(); + } + goto wlock_again; + } + rw->owner_pc = (cpu & 3) | (caller & ~3); + while(rw->lock & ~0xff) { + STUCK; + barrier(); + } +} + +void _write_unlock(rwlock_t *rw) +{ + rw->owner_pc = 0; + barrier(); + rw->lock = 0; +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("read_lock_irq(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +void _read_lock_irq(rwlock_t *rw) +{ + unsigned long caller; + unsigned long val; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + __cli(); +wlock_again: + __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff) { + STUCK; + barrier(); + } + goto wlock_again; + } +clock_again: + __asm__ __volatile__("ldstub [%1 + 2], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff00) { + STUCK; + barrier(); + } + goto clock_again; + } + (*((unsigned short *)&rw->lock))++; + barrier(); + (*(((unsigned short *)&rw->lock)+1)) = 0; +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("read_unlock_irq(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +void _read_unlock_irq(rwlock_t *rw) +{ + unsigned long val, caller; + int stuck = INIT_STUCK; + int cpu = smp_processor_id(); + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); +clock_again: + __asm__ __volatile__("ldstub [%1 + 2], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff00) { + STUCK; + barrier(); + } + goto clock_again; + } + (*((unsigned short *)&rw->lock))--; + barrier(); + (*(((unsigned char *)&rw->lock)+2))=0; + __sti(); +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("write_lock_irq(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +void _write_lock_irq(rwlock_t *rw) +{ + unsigned long val, caller; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); + __cli(); +wlock_again: + __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff) { + STUCK; + barrier(); + } + goto wlock_again; + } + rw->owner_pc = (cpu & 3) | (caller & ~3); + while(rw->lock & ~0xff) { + STUCK; + barrier(); + } +} + +void _write_unlock_irq(rwlock_t *rw) +{ + rw->owner_pc = 0; + barrier(); + rw->lock = 0; + __sti(); +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("read_lock_irqsave(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +/* Caller does __save_and_cli(flags) for us. */ +void _read_lock_irqsave(rwlock_t *rw) +{ + unsigned long caller; + unsigned long val; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); +wlock_again: + __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff) { + STUCK; + barrier(); + } + goto wlock_again; + } +clock_again: + __asm__ __volatile__("ldstub [%1 + 2], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff00) { + STUCK; + barrier(); + } + goto clock_again; + } + (*((unsigned short *)&rw->lock))++; + barrier(); + (*(((unsigned short *)&rw->lock)+1)) = 0; +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("read_unlock_irqrestore(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +void _read_unlock_irqrestore(rwlock_t *rw) +{ + unsigned long val, caller; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); +clock_again: + __asm__ __volatile__("ldstub [%1 + 2], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff00) { + STUCK; + barrier(); + } + goto clock_again; + } + (*((unsigned short *)&rw->lock))--; + barrier(); + (*(((unsigned char *)&rw->lock)+2))=0; +} + +#undef INIT_STUCK +#define INIT_STUCK 100000000 + +#undef STUCK +#define STUCK \ +if(!--stuck) { printk("write_lock_irqsave(%p) CPU#%d stuck at %08lx, owner PC(%08lx):CPU(%lx)\n", rw, cpu, caller, rw->owner_pc & ~3, rw->owner_pc & 3); stuck = INIT_STUCK; } + +/* Caller does __save_and_cli(flags) for us. */ +void _write_lock_irqsave(rwlock_t *rw) +{ + unsigned long val, caller; + int cpu = smp_processor_id(); + int stuck = INIT_STUCK; + + __asm__ __volatile__("mov %%i7, %0" : "=r" (caller)); +wlock_again: + __asm__ __volatile__("ldstub [%1 + 3], %0" : "=r" (val) : "r" (&(rw->lock))); + if(val) { + while(rw->lock & 0xff) { + STUCK; + barrier(); + } + goto wlock_again; + } + rw->owner_pc = (cpu & 3) | (caller & ~3); + while(rw->lock & ~0xff) { + STUCK; + barrier(); + } +} + +void _write_unlock_irqrestore(rwlock_t *rw) +{ + rw->owner_pc = 0; + barrier(); + rw->lock = 0; +} + +#endif /* SPIN_LOCK_DEBUG */ diff -u --recursive --new-file v2.1.37/linux/arch/sparc/mm/Makefile linux/arch/sparc/mm/Makefile --- v2.1.37/linux/arch/sparc/mm/Makefile Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc/mm/Makefile Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.24 1997/04/20 14:11:49 ecd Exp $ +# $Id: Makefile,v 1.25 1997/05/03 05:09:11 davem Exp $ # Makefile for the linux Sparc-specific parts of the memory manager. # # Note! Dependencies are done automagically by 'make dep', which also @@ -9,12 +9,30 @@ O_TARGET := mm.o O_OBJS := fault.o init.o sun4c.o srmmu.o hypersparc.o viking.o \ - loadmmu.o generic.o asyncd.o extable.o + tsunami.o loadmmu.o generic.o asyncd.o extable.o include $(TOPDIR)/Rules.make +ifdef SMP + +hypersparc.o: hypersparc.S + $(CC) -D__ASSEMBLY__ $(AFLAGS) -ansi -c -o hypersparc.o hypersparc.S + +viking.o: viking.S + $(CC) -D__ASSEMBLY__ $(AFLAGS) -ansi -c -o viking.o viking.S + +tsunami.o: tsunami.S + $(CC) -D__ASSEMBLY__ $(AFLAGS) -ansi -c -o tsunami.o tsunami.S + +else + hypersparc.o: hypersparc.S $(CC) -D__ASSEMBLY__ -ansi -c -o hypersparc.o hypersparc.S viking.o: viking.S $(CC) -D__ASSEMBLY__ -ansi -c -o viking.o viking.S + +tsunami.o: tsunami.S + $(CC) -D__ASSEMBLY__ -ansi -c -o tsunami.o tsunami.S + +endif diff -u --recursive --new-file v2.1.37/linux/arch/sparc/mm/asyncd.c linux/arch/sparc/mm/asyncd.c --- v2.1.37/linux/arch/sparc/mm/asyncd.c Thu Dec 19 01:03:32 1996 +++ linux/arch/sparc/mm/asyncd.c Thu May 15 14:43:52 1997 @@ -1,4 +1,4 @@ -/* $Id: asyncd.c,v 1.9 1996/12/18 06:43:22 tridge Exp $ +/* $Id: asyncd.c,v 1.10 1997/05/15 21:14:24 davem Exp $ * The asyncd kernel daemon. This handles paging on behalf of * processes that receive page faults due to remote (async) memory * accesses. @@ -153,7 +153,7 @@ if(!pte) goto no_memory; if(!pte_present(*pte)) { - do_no_page(tsk, vma, address, write); + handle_mm_fault(tsk, vma, address, write); goto finish_up; } set_pte(pte, pte_mkyoung(*pte)); @@ -165,12 +165,11 @@ flush_tlb_page(vma, address); goto finish_up; } - do_wp_page(tsk, vma, address, write); + handle_mm_fault(tsk, vma, address, write); /* Fall through for do_wp_page */ finish_up: stats.success++; - update_mmu_cache(vma, address, *pte); return 0; no_memory: diff -u --recursive --new-file v2.1.37/linux/arch/sparc/mm/fault.c linux/arch/sparc/mm/fault.c --- v2.1.37/linux/arch/sparc/mm/fault.c Thu Mar 27 14:40:00 1997 +++ linux/arch/sparc/mm/fault.c Thu May 15 14:43:52 1997 @@ -1,4 +1,4 @@ -/* $Id: fault.c,v 1.91 1997/03/18 17:56:00 jj Exp $ +/* $Id: fault.c,v 1.92 1997/05/15 21:14:21 davem Exp $ * fault.c: Page fault handlers for the Sparc. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -229,7 +229,7 @@ if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - handle_mm_fault(vma, address, write); + handle_mm_fault(current, vma, address, write); up(&mm->mmap_sem); goto out; /* @@ -370,7 +370,7 @@ else if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; - handle_mm_fault(vma, address, write); + handle_mm_fault(current, vma, address, write); up(&mm->mmap_sem); return; bad_area: diff -u --recursive --new-file v2.1.37/linux/arch/sparc/mm/hypersparc.S linux/arch/sparc/mm/hypersparc.S --- v2.1.37/linux/arch/sparc/mm/hypersparc.S Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc/mm/hypersparc.S Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: hypersparc.S,v 1.4 1997/04/19 04:33:39 davem Exp $ +/* $Id: hypersparc.S,v 1.7 1997/05/03 05:09:12 davem Exp $ * hypersparc.S: High speed Hypersparc mmu/cache operations. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -79,7 +79,7 @@ sta %g0, [%o0 + %o4] ASI_M_FLUSH_USER hypersparc_flush_cache_mm_out: retl - sta %g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE ! hyper_flush_whole_icache + nop /* The things we do for performance... */ hypersparc_flush_cache_range: @@ -126,7 +126,7 @@ bne 1b sta %g0, [%o3 + %g5] ASI_M_FLUSH_USER retl - sta %g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE + nop /* Below our threshold, flush one page at a time. */ 0: @@ -166,7 +166,7 @@ sta %o3, [%g7] ASI_M_MMUREGS hypersparc_flush_cache_range_out: retl - sta %g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE + nop /* HyperSparc requires a valid mapping where we are about to flush * in order to check for a physical tag match during the flush. @@ -221,12 +221,12 @@ sta %o2, [%g4] ASI_M_MMUREGS hypersparc_flush_cache_page_out: retl - sta %g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE + nop hypersparc_flush_sig_insns: - flush %o2 + flush %o1 retl - flush %o2 + 4 + flush %o1 + 4 /* HyperSparc is copy-back. */ hypersparc_flush_page_to_ram: @@ -289,7 +289,7 @@ cmp %o1, -1 be hypersparc_flush_tlb_mm_out #endif - mov 0x300, %g2 + mov 0x300, %g2 sta %o1, [%g1] ASI_M_MMUREGS sta %g0, [%g2] ASI_M_FLUSH_PROBE hypersparc_flush_tlb_mm_out: @@ -304,7 +304,7 @@ cmp %o3, -1 be hypersparc_flush_tlb_range_out #endif - srl %o1, SRMMU_PGDIR_SHIFT, %o1 + srl %o1, SRMMU_PGDIR_SHIFT, %o1 sta %o3, [%g1] ASI_M_MMUREGS sll %o1, SRMMU_PGDIR_SHIFT, %o1 sethi %hi(1 << SRMMU_PGDIR_SHIFT), %o4 @@ -324,13 +324,67 @@ mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 - lda [%g1] ASI_M_MMUREGS, %g5 #ifndef __SMP__ cmp %o3, -1 be hypersparc_flush_tlb_page_out #endif + lda [%g1] ASI_M_MMUREGS, %g5 sta %o3, [%g1] ASI_M_MMUREGS sta %g0, [%o1] ASI_M_FLUSH_PROBE hypersparc_flush_tlb_page_out: retl sta %g5, [%g1] ASI_M_MMUREGS + + /* High speed page clear/copy. */ + .globl hypersparc_bzero_1page, hypersparc_copy_1page +hypersparc_bzero_1page: + clr %g1 + mov 32, %g2 + add %g2, %g2, %g3 + add %g2, %g3, %g4 + add %g2, %g4, %g5 + add %g2, %g5, %g7 + add %g2, %g7, %o2 + add %g2, %o2, %o3 + mov 16, %o1 +1: + stda %g0, [%o0 + %g0] ASI_M_BFILL + stda %g0, [%o0 + %g2] ASI_M_BFILL + stda %g0, [%o0 + %g3] ASI_M_BFILL + stda %g0, [%o0 + %g4] ASI_M_BFILL + stda %g0, [%o0 + %g5] ASI_M_BFILL + stda %g0, [%o0 + %g7] ASI_M_BFILL + stda %g0, [%o0 + %o2] ASI_M_BFILL + stda %g0, [%o0 + %o3] ASI_M_BFILL + subcc %o1, 1, %o1 + bne 1b + add %o0, 256, %o0 + + retl + nop + +hypersparc_copy_1page: + sub %o1, %o0, %o2 ! difference + mov 16, %g1 +1: + sta %o0, [%o0 + %o2] ASI_M_BCOPY + add %o0, 32, %o0 + sta %o0, [%o0 + %o2] ASI_M_BCOPY + add %o0, 32, %o0 + sta %o0, [%o0 + %o2] ASI_M_BCOPY + add %o0, 32, %o0 + sta %o0, [%o0 + %o2] ASI_M_BCOPY + add %o0, 32, %o0 + sta %o0, [%o0 + %o2] ASI_M_BCOPY + add %o0, 32, %o0 + sta %o0, [%o0 + %o2] ASI_M_BCOPY + add %o0, 32, %o0 + sta %o0, [%o0 + %o2] ASI_M_BCOPY + add %o0, 32, %o0 + sta %o0, [%o0 + %o2] ASI_M_BCOPY + subcc %g1, 1, %g1 + bne 1b + add %o0, 32, %o0 + + retl + nop diff -u --recursive --new-file v2.1.37/linux/arch/sparc/mm/srmmu.c linux/arch/sparc/mm/srmmu.c --- v2.1.37/linux/arch/sparc/mm/srmmu.c Tue May 13 22:41:03 1997 +++ linux/arch/sparc/mm/srmmu.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: srmmu.c,v 1.140 1997/05/01 08:53:39 davem Exp $ +/* $Id: srmmu.c,v 1.145 1997/05/04 10:02:15 ecd Exp $ * srmmu.c: SRMMU specific routines for memory management. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -77,12 +77,20 @@ /* Don't change this without changing access to this * in arch/sparc/mm/viking.S */ -struct srmmu_trans { +static struct srmmu_trans { unsigned long vbase; unsigned long pbase; unsigned long size; } srmmu_map[SPARC_PHYS_BANKS]; +#define SRMMU_HASHSZ 256 + +/* Not static, viking.S uses it. */ +struct srmmu_trans *srmmu_v2p_hash[SRMMU_HASHSZ]; +static struct srmmu_trans *srmmu_p2v_hash[SRMMU_HASHSZ]; + +#define srmmu_ahashfn(addr) ((addr) >> 24) + static int viking_mxcc_present = 0; void srmmu_frob_mem_map(unsigned long start_mem) @@ -113,31 +121,26 @@ /* Physical memory can be _very_ non-contiguous on the sun4m, especially * the SS10/20 class machines and with the latest openprom revisions. - * So we have to crunch the free page pool. + * So we have to do a quick lookup. */ static inline unsigned long srmmu_v2p(unsigned long vaddr) { - int i; + struct srmmu_trans *tp = srmmu_v2p_hash[srmmu_ahashfn(vaddr)]; - for(i=0; srmmu_map[i].size != 0; i++) { - if(srmmu_map[i].vbase <= vaddr && - (srmmu_map[i].vbase + srmmu_map[i].size > vaddr)) { - return (vaddr - srmmu_map[i].vbase) + srmmu_map[i].pbase; - } - } - return 0xffffffffUL; + if(tp) + return (vaddr - tp->vbase + tp->pbase); + else + return 0xffffffffUL; } static inline unsigned long srmmu_p2v(unsigned long paddr) { - int i; + struct srmmu_trans *tp = srmmu_p2v_hash[srmmu_ahashfn(paddr)]; - for(i=0; srmmu_map[i].size != 0; i++) { - if(srmmu_map[i].pbase <= paddr && - (srmmu_map[i].pbase + srmmu_map[i].size > paddr)) - return (paddr - srmmu_map[i].pbase) + srmmu_map[i].vbase; - } - return 0xffffffffUL; + if(tp) + return (paddr - tp->pbase + tp->vbase); + else + return 0xffffffffUL; } /* In general all page table modifications should use the V8 atomic @@ -847,108 +850,19 @@ free_pages((unsigned long)tsk, 1); } -/* Tsunami flushes. It's page level tlb invalidation is not very - * useful at all, you must be in the context that page exists in to - * get a match. - */ -static void tsunami_flush_cache_all(void) -{ - flush_user_windows(); - tsunami_flush_icache(); - tsunami_flush_dcache(); -} - -static void tsunami_flush_cache_mm(struct mm_struct *mm) -{ - FLUSH_BEGIN(mm) - flush_user_windows(); - tsunami_flush_icache(); - tsunami_flush_dcache(); - FLUSH_END -} - -static void tsunami_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end) -{ - FLUSH_BEGIN(mm) - flush_user_windows(); - tsunami_flush_icache(); - tsunami_flush_dcache(); - FLUSH_END -} - -static void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page) -{ - FLUSH_BEGIN(vma->vm_mm) - flush_user_windows(); - tsunami_flush_icache(); - tsunami_flush_dcache(); - FLUSH_END -} - -/* Tsunami does not have a Copy-back style virtual cache. */ -static void tsunami_flush_page_to_ram(unsigned long page) -{ -} - -/* However, Tsunami is not IO coherent. */ -static void tsunami_flush_page_for_dma(unsigned long page) -{ - tsunami_flush_icache(); - tsunami_flush_dcache(); -} - -/* Tsunami has harvard style split I/D caches which do not snoop each other, - * so we have to flush on-stack sig insns. Only the icache need be flushed - * since the Tsunami has a write-through data cache. - */ -static void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) -{ - tsunami_flush_icache(); -} - -static void tsunami_flush_chunk(unsigned long chunk) -{ -} - -static void tsunami_flush_tlb_all(void) -{ - srmmu_flush_whole_tlb(); - module_stats.invall++; -} - -static void tsunami_flush_tlb_mm(struct mm_struct *mm) -{ - FLUSH_BEGIN(mm) - srmmu_flush_whole_tlb(); - module_stats.invmm++; - FLUSH_END -} - -static void tsunami_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) -{ - FLUSH_BEGIN(mm) - srmmu_flush_whole_tlb(); - module_stats.invrnge++; - FLUSH_END -} - -static void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - - FLUSH_BEGIN(mm) - __asm__ __volatile__(" - lda [%0] %3, %%g5 - sta %1, [%0] %3 - sta %%g0, [%2] %4 - sta %%g5, [%0] %3" - : /* no outputs */ - : "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (page & PAGE_MASK), - "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE) - : "g5"); - module_stats.invpg++; - FLUSH_END -} +/* tsunami.S */ +extern void tsunami_flush_cache_all(void); +extern void tsunami_flush_cache_mm(struct mm_struct *mm); +extern void tsunami_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end); +extern void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page); +extern void tsunami_flush_page_to_ram(unsigned long page); +extern void tsunami_flush_page_for_dma(unsigned long page); +extern void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr); +extern void tsunami_flush_chunk(unsigned long chunk); +extern void tsunami_flush_tlb_all(void); +extern void tsunami_flush_tlb_mm(struct mm_struct *mm); +extern void tsunami_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end); +extern void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); /* Swift flushes. It has the recommended SRMMU specification flushing * facilities, so we can do things in a more fine grained fashion than we @@ -1325,6 +1239,8 @@ extern void hypersparc_flush_tlb_mm(struct mm_struct *mm); extern void hypersparc_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end); extern void hypersparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); +extern void hypersparc_bzero_1page(void *); +extern void hypersparc_copy_1page(void *, const void *); static void srmmu_set_pte_nocache_hyper(pte_t *ptep, pte_t pteval) { @@ -2125,6 +2041,32 @@ MKTRACE(("success\n")); init_task.mm->mmap->vm_start = page_offset = low_base; stack_top = page_offset - PAGE_SIZE; +#if 1 + for(entry = 0; srmmu_map[entry].size; entry++) { + printk("[%d]: v[%08lx,%08lx](%lx) p[%08lx]\n", entry, + srmmu_map[entry].vbase, + srmmu_map[entry].vbase + srmmu_map[entry].size, + srmmu_map[entry].size, + srmmu_map[entry].pbase); + } +#endif + + /* Now setup the p2v/v2p hash tables. */ + for(entry = 0; entry < SRMMU_HASHSZ; entry++) + srmmu_v2p_hash[entry] = srmmu_p2v_hash[entry] = NULL; + for(entry = 0; srmmu_map[entry].size; entry++) { + unsigned long addr; + + for(addr = srmmu_map[entry].vbase; + addr < (srmmu_map[entry].vbase + srmmu_map[entry].size); + addr += (1 << 24)) + srmmu_v2p_hash[srmmu_ahashfn(addr)] = &srmmu_map[entry]; + for(addr = srmmu_map[entry].pbase; + addr < (srmmu_map[entry].pbase + srmmu_map[entry].size); + addr += (1 << 24)) + srmmu_p2v_hash[srmmu_ahashfn(addr)] = &srmmu_map[entry]; + } + return; /* SUCCESS! */ } @@ -2431,6 +2373,11 @@ hyper_flush_whole_icache(); clear = srmmu_get_faddr(); clear = srmmu_get_fstatus(); + +#ifdef __SMP__ + /* Avoid unnecessary cross calls. */ + flush_page_for_dma = local_flush_page_for_dma; +#endif } __initfunc(static void init_hypersparc(void)) @@ -2463,6 +2410,14 @@ update_mmu_cache = srmmu_vac_update_mmu_cache; sparc_update_rootmmu_dir = hypersparc_update_rootmmu_dir; poke_srmmu = poke_hypersparc; + + /* High performance page copy/clear. */ + { extern void (*__copy_1page)(void *, const void *); + extern void (*bzero_1page)(void *); + + __copy_1page = hypersparc_copy_1page; + bzero_1page = hypersparc_bzero_1page; + } } static void poke_cypress(void) diff -u --recursive --new-file v2.1.37/linux/arch/sparc/mm/tsunami.S linux/arch/sparc/mm/tsunami.S --- v2.1.37/linux/arch/sparc/mm/tsunami.S Wed Dec 31 16:00:00 1969 +++ linux/arch/sparc/mm/tsunami.S Wed May 14 15:01:20 1997 @@ -0,0 +1,90 @@ +/* $Id: tsunami.S,v 1.1 1997/05/03 05:09:09 davem Exp $ + * tsunami.S: High speed MicroSparc-I mmu/cache operations. + * + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + */ + +#include +#include +#include +#include +#include + +#define WINDOW_FLUSH(tmp1, tmp2) \ + mov 0, tmp1; \ +98: ld [%g6 + AOFF_task_tss + AOFF_thread_uwinmask], tmp2; \ + orcc %g0, tmp2, %g0; \ + add tmp1, 1, tmp1; \ + bne 98b; \ + save %sp, -64, %sp; \ +99: subcc tmp1, 1, tmp1; \ + bne 99b; \ + restore %g0, %g0, %g0; + + .text + .align 4 + + .globl tsunami_flush_cache_all, tsunami_flush_cache_mm + .globl tsunami_flush_cache_range, tsunami_flush_cache_page + .globl tsunami_flush_page_to_ram, tsunami_flush_page_for_dma + .globl tsunami_flush_sig_insns, tsunami_flush_chunk + .globl tsunami_flush_tlb_all, tsunami_flush_tlb_mm + .globl tsunami_flush_tlb_range, tsunami_flush_tlb_page + + /* Sliiick... */ +tsunami_flush_cache_page: + ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ +tsunami_flush_cache_mm: +tsunami_flush_cache_range: + ld [%o0 + AOFF_mm_context], %g2 +#ifndef __SMP__ + cmp %g2, -1 + be tsunami_flush_cache_out +#endif +tsunami_flush_cache_all: + WINDOW_FLUSH(%g4, %g5) +tsunami_flush_page_for_dma: + sta %g0, [%g0] ASI_M_DC_FLCLEAR + sta %g0, [%g0] ASI_M_IC_FLCLEAR +tsunami_flush_cache_out: +tsunami_flush_page_to_ram: +tsunami_flush_chunk: + retl + nop + +tsunami_flush_sig_insns: + flush %o1 + retl + flush %o1 + 4 + + /* More slick stuff... */ +tsunami_flush_tlb_mm: +tsunami_flush_tlb_range: +#ifndef __SMP__ + ld [%o0 + AOFF_mm_context], %g2 + cmp %g2, -1 + be tsunami_flush_tlb_out +#endif +tsunami_flush_tlb_all: + mov 0x400, %o1 + sta %g0, [%o1] ASI_M_FLUSH_PROBE +tsunami_flush_tlb_out: + retl + nop + + /* This one can be done in a fine grained manner... */ +tsunami_flush_tlb_page: + ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + mov SRMMU_CTX_REG, %g1 + ld [%o0 + AOFF_mm_context], %o3 + andn %o1, (PAGE_SIZE - 1), %o1 +#ifndef __SMP__ + cmp %o3, -1 + be tsunami_flush_tlb_page_out +#endif + lda [%g1] ASI_M_MMUREGS, %g5 + sta %o3, [%g1] ASI_M_MMUREGS + sta %g0, [%o1] ASI_M_FLUSH_PROBE +tsunami_flush_tlb_page_out: + retl + sta %g5, [%g1] ASI_M_MMUREGS diff -u --recursive --new-file v2.1.37/linux/arch/sparc/mm/viking.S linux/arch/sparc/mm/viking.S --- v2.1.37/linux/arch/sparc/mm/viking.S Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc/mm/viking.S Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: viking.S,v 1.2 1997/04/20 21:21:49 ecd Exp $ +/* $Id: viking.S,v 1.3 1997/05/04 10:02:14 ecd Exp $ * viking.S: High speed Viking cache/mmu operations * * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be) @@ -38,40 +38,26 @@ viking_flush_page: viking_flush_chunk: - sethi %hi(C_LABEL(srmmu_map)), %g2 - or %g2, %lo(C_LABEL(srmmu_map)), %g3 - ld [%g3 + 8], %g2 - cmp %g2, 0 - be 3f + sethi %hi(C_LABEL(srmmu_v2p_hash)), %g2 + or %g2, %lo(C_LABEL(srmmu_v2p_hash)), %g2 + srl %o0, 24, %o1 + sll %o1, 2, %o1 + + ld [%g2 + %o1], %g3 + cmp %g3, 0 + bne 1f and %o0, PAGE_MASK, %o0 - ld [%g3], %o1 -1: - cmp %o1, %o0 - bgu,a 2f - add %g3, 0xc, %g3 - - add %o1, %g2, %g2 - cmp %g2, %o0 - bleu,a 2f - add %g3, 0xc, %g3 + retl + nop +1: + ld [%g3], %o1 sub %o0, %o1, %g2 ld [%g3 + 4], %o0 add %g2, %o0, %g3 - b 4f - srl %g3, 12, %g1 ! ppage >> 12 - -2: - ld [%g3 + 8], %g2 - cmp %g2, 0 - bne,a 1b - ld [%g3], %o1 -3: - retl - nop + srl %g3, 12, %g1 ! ppage >> 12 -4: clr %o1 ! set counter, 0 - 127 sethi %hi(KERNBASE + PAGE_SIZE - 0x80000000), %o3 sethi %hi(0x80000000), %o4 @@ -131,40 +117,27 @@ viking_mxcc_flush_page: - sethi %hi(C_LABEL(srmmu_map)), %g2 - or %g2, %lo(C_LABEL(srmmu_map)), %g3 - ld [%g3 + 8], %g2 - cmp %g2, 0 - be 3f + sethi %hi(C_LABEL(srmmu_v2p_hash)), %g2 + or %g2, %lo(C_LABEL(srmmu_v2p_hash)), %g2 + srl %o0, 24, %o1 + sll %o1, 2, %o1 + + ld [%g2 + %o1], %g3 + cmp %g3, 0 + bne 1f and %o0, PAGE_MASK, %o0 - ld [%g3], %o1 -1: - cmp %o1, %o0 - bgu,a 2f - add %g3, 0xc, %g3 - - add %o1, %g2, %g2 - cmp %g2, %o0 - bleu,a 2f - add %g3, 0xc, %g3 + retl + nop +1: + ld [%g3], %o1 sub %o0, %o1, %g2 ld [%g3 + 4], %o0 + sethi %hi(PAGE_SIZE), %g4 add %g2, %o0, %g3 - sethi %hi(PAGE_SIZE), %g4 - b 4f - add %g3, %g4, %g3 ! ppage + PAGE_SIZE - -2: - ld [%g3 + 8], %g2 - cmp %g2, 0 - bne,a 1b - ld [%g3], %o1 -3: - retl - nop -4: + add %g3, %g4, %g3 ! ppage + PAGE_SIZE + mov 0x10, %g2 ! set cacheable bit sethi %hi(MXCC_SRCSTREAM), %o2 or %o2, %lo(MXCC_SRCSTREAM), %o2 diff -u --recursive --new-file v2.1.37/linux/arch/sparc/prom/console.c linux/arch/sparc/prom/console.c --- v2.1.37/linux/arch/sparc/prom/console.c Tue May 13 22:41:04 1997 +++ linux/arch/sparc/prom/console.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: console.c,v 1.12 1997/05/01 01:41:30 davem Exp $ +/* $Id: console.c,v 1.14 1997/05/14 20:44:58 davem Exp $ * console.c: Routines that deal with sending and receiving IO * to/from the current console device using the PROM. * @@ -14,6 +14,9 @@ #include #include +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; + /* Non blocking get character from console input device, returns -1 * if no input was taken. This can be used for polling. */ @@ -38,6 +41,7 @@ } break; case PROM_AP1000: + default: i = -1; break; }; @@ -81,6 +85,9 @@ #endif break; + default: + i = -1; + break; }; __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r" (¤t_set[hard_smp_processor_id()]) : @@ -208,6 +215,7 @@ } break; case PROM_AP1000: + default: return PROMDEV_I_UNK; }; return PROMDEV_O_UNK; diff -u --recursive --new-file v2.1.37/linux/arch/sparc/prom/devmap.c linux/arch/sparc/prom/devmap.c --- v2.1.37/linux/arch/sparc/prom/devmap.c Tue May 13 22:41:04 1997 +++ linux/arch/sparc/prom/devmap.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: devmap.c,v 1.4 1997/05/01 01:41:31 davem Exp $ +/* $Id: devmap.c,v 1.5 1997/05/14 20:44:59 davem Exp $ * promdevmap.c: Map device/IO areas to virtual addresses. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -10,6 +10,9 @@ #include #include + +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; /* Just like the routines in palloc.c, these should not be used * by the kernel at all. Bootloader facility mainly. And again, diff -u --recursive --new-file v2.1.37/linux/arch/sparc/prom/devops.c linux/arch/sparc/prom/devops.c --- v2.1.37/linux/arch/sparc/prom/devops.c Tue May 13 22:41:04 1997 +++ linux/arch/sparc/prom/devops.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: devops.c,v 1.8 1997/05/01 01:41:31 davem Exp $ +/* $Id: devops.c,v 1.10 1997/05/14 20:44:59 davem Exp $ * devops.c: Device operations using the PROM. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -10,6 +10,9 @@ #include #include +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; + /* Open the device described by the string 'dstr'. Returns the handle * to that device used for subsequent operations on that device. * Returns -1 on failure. @@ -57,6 +60,7 @@ (*(romvec->pv_v2devops.v2_dev_close))(dhandle); break; case PROM_AP1000: + default: break; }; __asm__ __volatile__("ld [%0], %%g6\n\t" : : @@ -83,6 +87,7 @@ (*(romvec->pv_v2devops.v2_dev_seek))(dhandle, seekhi, seeklo); break; case PROM_AP1000: + default: break; }; __asm__ __volatile__("ld [%0], %%g6\n\t" : : diff -u --recursive --new-file v2.1.37/linux/arch/sparc/prom/misc.c linux/arch/sparc/prom/misc.c --- v2.1.37/linux/arch/sparc/prom/misc.c Tue May 13 22:41:04 1997 +++ linux/arch/sparc/prom/misc.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: misc.c,v 1.14 1997/05/01 01:41:32 davem Exp $ +/* $Id: misc.c,v 1.15 1997/05/14 20:45:00 davem Exp $ * misc.c: Miscellaneous prom functions that don't belong * anywhere else. * @@ -12,6 +12,9 @@ #include #include #include + +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; /* Reset and reboot the machine with the command 'bcommand'. */ void diff -u --recursive --new-file v2.1.37/linux/arch/sparc/prom/mp.c linux/arch/sparc/prom/mp.c --- v2.1.37/linux/arch/sparc/prom/mp.c Tue May 13 22:41:04 1997 +++ linux/arch/sparc/prom/mp.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: mp.c,v 1.8 1997/05/01 01:41:32 davem Exp $ +/* $Id: mp.c,v 1.9 1997/05/14 20:45:01 davem Exp $ * mp.c: OpenBoot Prom Multiprocessor support routines. Don't call * these on a UP or else you will halt and catch fire. ;) * @@ -11,6 +11,9 @@ #include #include + +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; /* Start cpu with prom-tree node 'cpunode' using context described * by 'ctable_reg' in context 'ctx' at program counter 'pc'. diff -u --recursive --new-file v2.1.37/linux/arch/sparc/prom/segment.c linux/arch/sparc/prom/segment.c --- v2.1.37/linux/arch/sparc/prom/segment.c Tue May 13 22:41:04 1997 +++ linux/arch/sparc/prom/segment.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: segment.c,v 1.4 1997/05/01 01:41:33 davem Exp $ +/* $Id: segment.c,v 1.5 1997/05/14 20:45:02 davem Exp $ * segment.c: Prom routine to map segments in other contexts before * a standalone is completely mapped. This is for sun4 and * sun4c architectures only. @@ -11,6 +11,9 @@ #include #include #include + +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; /* Set physical segment 'segment' at virtual address 'vaddr' in * context 'ctx'. diff -u --recursive --new-file v2.1.37/linux/arch/sparc/prom/tree.c linux/arch/sparc/prom/tree.c --- v2.1.37/linux/arch/sparc/prom/tree.c Tue May 13 22:41:04 1997 +++ linux/arch/sparc/prom/tree.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: tree.c,v 1.17 1997/05/01 01:41:33 davem Exp $ +/* $Id: tree.c,v 1.18 1997/05/14 20:45:03 davem Exp $ * tree.c: Basic device tree traversal/scanning for the Linux * prom library. * @@ -15,6 +15,8 @@ #include #include +/* XXX Let's get rid of this thing if we can... */ +extern struct task_struct *current_set[NR_CPUS]; /* Macro to restore "current" to the g6 register. */ #define restore_current() __asm__ __volatile__("ld [%0], %%g6\n\t" : : \ diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/Makefile linux/arch/sparc64/Makefile --- v2.1.37/linux/arch/sparc64/Makefile Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc64/Makefile Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.15 1997/04/14 17:04:49 jj Exp $ +# $Id: Makefile,v 1.16 1997/05/04 07:21:08 davem Exp $ # sparc64/Makefile # # Makefile for the architecture dependent flags and dependencies on the @@ -29,7 +29,7 @@ LINKFLAGS = -T arch/sparc64/vmlinux.lds -HEAD := arch/sparc64/kernel/head.o +HEAD := arch/sparc64/kernel/head.o arch/sparc64/kernel/init_task.o SUBDIRS := $(SUBDIRS) arch/sparc64/kernel arch/sparc64/lib arch/sparc64/mm \ arch/sparc64/prom diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/defconfig linux/arch/sparc64/defconfig --- v2.1.37/linux/arch/sparc64/defconfig Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc64/defconfig Wed May 14 15:01:20 1997 @@ -40,6 +40,7 @@ SUN_FB_BWTWO=y SUN_FB_LEO=y TADPOLE_FB_WEITEK=y +SUN_FB_CREATOR=y # # Misc Linux/SPARC drivers @@ -132,8 +133,8 @@ # # Filesystems # -CONFIG_QUOTA=y -CONFIG_MINIX_FS=y +# CONFIG_QUOTA is not set +# CONFIG_MINIX_FS is not set CONFIG_EXT2_FS=y # CONFIG_FAT_FS is not set # CONFIG_MSDOS_FS is not set @@ -141,7 +142,9 @@ # CONFIG_UMSDOS_FS is not set CONFIG_PROC_FS=y CONFIG_NFS_FS=y -# CONFIG_ROOT_NFS is not set +CONFIG_ROOT_NFS=y +CONFIG_RNFS_BOOTP=y +# CONFIG_RNFS_RARP is not set # CONFIG_NFSD is not set CONFIG_SUNRPC=y CONFIG_LOCKD=y diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/Makefile linux/arch/sparc64/kernel/Makefile --- v2.1.37/linux/arch/sparc64/kernel/Makefile Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc64/kernel/Makefile Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -# $Id: Makefile,v 1.16 1997/04/17 20:35:37 jj Exp $ +# $Id: Makefile,v 1.17 1997/05/04 07:20:58 davem Exp $ # Makefile for the linux kernel. # # Note! Dependencies are done automagically by 'make dep', which also @@ -13,7 +13,7 @@ .S.o: $(CC) -D__ASSEMBLY__ -ansi -c $< -o $*.o -all: kernel.o head.o +all: kernel.o head.o init_task.o O_TARGET := kernel.o O_OBJS := etrap.o rtrap.o hack.o process.o setup.o cpu.o idprom.o \ diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/etrap.S linux/arch/sparc64/kernel/etrap.S --- v2.1.37/linux/arch/sparc64/kernel/etrap.S Tue May 13 22:41:04 1997 +++ linux/arch/sparc64/kernel/etrap.S Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: etrap.S,v 1.12 1997/04/28 14:57:07 davem Exp $ +/* $Id: etrap.S,v 1.13 1997/05/04 07:21:00 davem Exp $ * etrap.S: Preparing for entry into the kernel on Sparc V9. * * Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu) @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -66,7 +67,10 @@ wrpr %g0, 0, %canrestore wrpr %g2, 0, %otherwin - ldx [%g1 + AOFF_task_saved_kernel_stack], %g2 + mov 1, %g2 + sllx %g2, (PAGE_SHIFT + 1), %g2 + sub %g2, (TRACEREG_SZ + REGWIN_SZ), %g2 + add %g1, %g2, %g2 1: stx %g1, [%g2 + REGWIN_SZ + PT_V9_TSTATE] rdpr %tpc, %g1 @@ -103,11 +107,10 @@ wrpr %l1, (PSTATE_IE | PSTATE_AG), %pstate sethi %uhi(KERNBASE), %g4 or %g4, %ulo(KERNBASE), %g4 - sethi %hi(current_set), %g6 - or %g6, %lo(current_set), %g6 + srlx %sp, (PAGE_SHIFT + 1), %g6 sllx %g4, 32, %g4 jmpl %l2 + 0x4, %g0 - ldx [%g6 + %g4], %g6 + sllx %g6, (PAGE_SHIFT + 1), %g6 #ifdef __SMP__ /* FIXME: Fix the above insn for SMP */ #endif diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/head.S linux/arch/sparc64/kernel/head.S --- v2.1.37/linux/arch/sparc64/kernel/head.S Mon Apr 14 16:28:09 1997 +++ linux/arch/sparc64/kernel/head.S Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: head.S,v 1.27 1997/04/04 00:49:49 davem Exp $ +/* $Id: head.S,v 1.28 1997/05/04 07:21:02 davem Exp $ * head.S: Initial boot code for the Sparc64 port of Linux. * * Copyright (C) 1996,1997 David S. Miller (davem@caip.rutgers.edu) @@ -262,16 +262,18 @@ stx %g6, [%g2 + %g4] stx %g5, [%g3 + %g4] - sethi %hi(init_task), %g6 - or %g6, %lo(init_task), %g6 + sethi %hi(init_task_union), %g6 + or %g6, %lo(init_task_union), %g6 add %g6, %g4, %g6 ! g6 usage is fixed as well mov %sp, %l6 mov %o4, %l7 - sethi %hi(bootup_kernel_stack + 0x2000 - STACK_BIAS - REGWIN_SZ), %g5 - or %g5, %lo(bootup_kernel_stack + 0x2000 - STACK_BIAS - REGWIN_SZ), %g5 - add %g5, %g4, %sp + mov 1, %g5 + sllx %g5, (PAGE_SHIFT + 1), %g5 + sub %g5, (REGWIN_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp mov 0, %fp + wrpr %g0, 0, %wstate wrpr %g0, 0x0, %tl @@ -360,6 +362,9 @@ ! 0xfffff80000008000 #include "ttable.S" + + /* This is just anal retentiveness on my part... */ + .align 16384 .data .align 8 diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/init_task.c linux/arch/sparc64/kernel/init_task.c --- v2.1.37/linux/arch/sparc64/kernel/init_task.c Wed Dec 31 16:00:00 1969 +++ linux/arch/sparc64/kernel/init_task.c Wed May 14 15:01:20 1997 @@ -0,0 +1,18 @@ +#include +#include + +#include +#include + +static struct vm_area_struct init_mmap = INIT_MMAP; +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +struct mm_struct init_mm = INIT_MM; + +/* .text section in head.S is aligned at 2 page boundry and this gets linked + * right after that so that the init_task_union is aligned properly as well. + * We really don't need this special alignment like the Intel does, but + * I do it anyways for completeness. + */ +union task_union init_task_union __attribute__((__section__(".text"))) = { INIT_TASK }; diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/process.c linux/arch/sparc64/kernel/process.c --- v2.1.37/linux/arch/sparc64/kernel/process.c Mon Apr 14 16:28:09 1997 +++ linux/arch/sparc64/kernel/process.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: process.c,v 1.6 1997/04/07 18:57:07 jj Exp $ +/* $Id: process.c,v 1.8 1997/05/14 20:45:06 davem Exp $ * arch/sparc64/kernel/process.c * * Copyright (C) 1995, 1996 David S. Miller (davem@caip.rutgers.edu) @@ -37,6 +37,8 @@ #include #include +struct task_struct *current_set[NR_CPUS] = {&init_task, }; + #ifndef __SMP__ /* @@ -453,19 +455,18 @@ #endif /* Calculate offset to stack_frame & pt_regs */ - stack_offset = (PAGE_SIZE - TRACEREG_SZ); + stack_offset = ((PAGE_SIZE<<1) - TRACEREG_SZ); if(regs->tstate & TSTATE_PRIV) stack_offset -= REGWIN_SZ; - childregs = ((struct pt_regs *) (p->kernel_stack_page + stack_offset)); + childregs = ((struct pt_regs *) (((unsigned long)p) + stack_offset)); *childregs = *regs; new_stack = (((struct reg_window *) childregs) - 1); old_stack = (((struct reg_window *) regs) - 1); *new_stack = *old_stack; - p->saved_kernel_stack = ((unsigned long) new_stack); - p->tss.ksp = p->saved_kernel_stack - STACK_BIAS; + p->tss.ksp = ((unsigned long) new_stack) - STACK_BIAS; p->tss.kpc = ((unsigned long) ret_from_syscall) - 0x8; p->tss.kregs = childregs; @@ -485,7 +486,7 @@ p->tss.current_ds = USER_DS; #if 0 - if (sp != current->tss.kregs->u_regs[UREG_FP]) { + if (sp != regs->u_regs[UREG_FP]) { struct sparc_stackf *childstack; struct sparc_stackf *parentstack; @@ -494,8 +495,7 @@ * Set some valid stack frames to give to the child. */ childstack = (struct sparc_stackf *)sp; - parentstack = (struct sparc_stackf *) - current->tss.kregs->u_regs[UREG_FP]; + parentstack = (struct sparc_stackf *)regs->u_regs[UREG_FP]; #if 0 printk("clone: parent stack:\n"); diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/setup.c linux/arch/sparc64/kernel/setup.c --- v2.1.37/linux/arch/sparc64/kernel/setup.c Mon Apr 14 16:28:09 1997 +++ linux/arch/sparc64/kernel/setup.c Wed May 14 15:01:20 1997 @@ -1,4 +1,4 @@ -/* $Id: setup.c,v 1.5 1997/04/04 00:49:52 davem Exp $ +/* $Id: setup.c,v 1.6 1997/05/04 07:21:04 davem Exp $ * linux/arch/sparc64/kernel/setup.c * * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu) @@ -260,6 +260,8 @@ /* Initialize PROM console and command line. */ *cmdline_p = prom_getbootargs(); strcpy(saved_command_line, *cmdline_p); + + prom_printf("BOOT: args[%s] saved[%s]\n", *cmdline_p, saved_command_line); printk("ARCH: SUN4U\n"); diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/sparcelf32.c linux/arch/sparc64/kernel/sparcelf32.c --- v2.1.37/linux/arch/sparc64/kernel/sparcelf32.c Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc64/kernel/sparcelf32.c Wed May 14 15:01:21 1997 @@ -374,7 +374,6 @@ #define INTERPRETER_AOUT 1 #define INTERPRETER_ELF 2 - static inline int do_load_elf32_binary(struct linux_binprm * bprm, struct pt_regs * regs) { @@ -577,6 +576,7 @@ /* Do this so that we can load the interpreter, if need be. We will change some of these later */ current->mm->rss = 0; + current->tss.flags |= SPARC_FLAG_32BIT; bprm->p = setup_arg_pages(bprm->p, bprm); current->mm->start_stack = bprm->p; diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/kernel/sys_sparc32.c linux/arch/sparc64/kernel/sys_sparc32.c --- v2.1.37/linux/arch/sparc64/kernel/sys_sparc32.c Wed Apr 23 19:01:16 1997 +++ linux/arch/sparc64/kernel/sys_sparc32.c Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: sys_sparc32.c,v 1.9 1997/04/21 08:34:24 jj Exp $ +/* $Id: sys_sparc32.c,v 1.12 1997/05/14 14:50:58 jj Exp $ * sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. * * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -138,6 +139,10 @@ extern asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags); extern asmlinkage int sys_socketcall(int call, unsigned long *args); extern asmlinkage int sys_nfsservctl(int cmd, void *argp, void *resp); +extern asmlinkage int sys_listen(int fd, int backlog); +extern asmlinkage int sys_socket(int family, int type, int protocol); +extern asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2]); +extern asmlinkage int sys_shutdown(int fd, int how); asmlinkage int sys32_ioperm(u32 from, u32 num, int on) { @@ -157,6 +162,17 @@ unsigned short seq; }; +struct semid_ds32 { + struct ipc_perm32 sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t32 sem_otime; /* last semop time */ + __kernel_time_t32 sem_ctime; /* last change time */ + u32 sem_base; /* ptr to first semaphore in array */ + u32 sem_pending; /* pending operations to be processed */ + u32 sem_pending_last; /* last pending operation */ + u32 undo; /* undo requests on this array */ + unsigned short sem_nsems; /* no. of semaphores in array */ +}; + struct msqid_ds32 { struct ipc_perm32 msg_perm; @@ -212,15 +228,62 @@ err = sys_semget (first, second, third); goto out; case SEMCTL: { - /* XXX union semun32 to union semun64 and back conversion */ union semun fourth; + void *pad; + unsigned long old_fs; + struct semid_ds s; + err = -EINVAL; if (!ptr) goto out; err = -EFAULT; - if(get_user(fourth.__pad, (void **)A(ptr))) + if(get_user(pad, (void **)A(ptr))) goto out; + fourth.__pad = pad; + switch (third) { + case IPC_INFO: + case SEM_INFO: + case GETVAL: + case GETPID: + case GETNCNT: + case GETZCNT: + case GETALL: + case SETALL: + case IPC_RMID: + err = sys_semctl (first, second, third, fourth); + goto out; + case IPC_SET: + if (get_user (s.sem_perm.uid, &(((struct semid_ds32 *)A(pad))->sem_perm.uid)) || + __get_user (s.sem_perm.gid, &(((struct semid_ds32 *)A(pad))->sem_perm.gid)) || + __get_user (s.sem_perm.mode, &(((struct semid_ds32 *)A(pad))->sem_perm.mode))) { + err = -EFAULT; + goto out; + } + /* Fall through */ + case SEM_STAT: + case IPC_STAT: + fourth.__pad = &s; + break; + } + old_fs = get_fs(); + set_fs (KERNEL_DS); err = sys_semctl (first, second, third, fourth); + set_fs (old_fs); + switch (third) { + case SEM_STAT: + case IPC_STAT: + if (put_user (s.sem_perm.key, &(((struct semid_ds32 *)A(pad))->sem_perm.key)) || + __put_user (s.sem_perm.uid, &(((struct semid_ds32 *)A(pad))->sem_perm.uid)) || + __put_user (s.sem_perm.gid, &(((struct semid_ds32 *)A(pad))->sem_perm.gid)) || + __put_user (s.sem_perm.cuid, &(((struct semid_ds32 *)A(pad))->sem_perm.cuid)) || + __put_user (s.sem_perm.cgid, &(((struct semid_ds32 *)A(pad))->sem_perm.cgid)) || + __put_user (s.sem_perm.mode, &(((struct semid_ds32 *)A(pad))->sem_perm.mode)) || + __put_user (s.sem_perm.seq, &(((struct semid_ds32 *)A(pad))->sem_perm.seq)) || + __put_user (s.sem_otime, &(((struct semid_ds32 *)A(pad))->sem_otime)) || + __put_user (s.sem_ctime, &(((struct semid_ds32 *)A(pad))->sem_ctime)) || + __put_user (s.sem_nsems, &(((struct semid_ds32 *)A(pad))->sem_nsems))) + err = -EFAULT; + } goto out; } default: @@ -534,10 +597,50 @@ return sys_rename((const char *)A(oldname), (const char *)A(newname)); } -/* XXX: Play with the addr, it will be ugly :(( */ +struct dqblk32 { + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u32 dqb_curblocks; + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; +}; + asmlinkage int sys32_quotactl(int cmd, u32 special, int id, u32 addr) { - return sys_quotactl(cmd, (const char *)A(special), id, (caddr_t)A(addr)); + int cmds = cmd >> SUBCMDSHIFT; + int err; + struct dqblk d; + unsigned long old_fs; + + switch (cmds) { + case Q_GETQUOTA: + break; + case Q_SETQUOTA: + case Q_SETUSE: + case Q_SETQLIM: + if (copy_from_user (&d, (struct dqblk32 *)A(addr), sizeof (struct dqblk32))) + return -EFAULT; + d.dqb_itime = ((struct dqblk32 *)&d)->dqb_itime; + d.dqb_btime = ((struct dqblk32 *)&d)->dqb_btime; + break; + default: + return sys_quotactl(cmd, (const char *)A(special), id, (caddr_t)A(addr)); + } + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_quotactl(cmd, (const char *)A(special), id, (caddr_t)A(addr)); + set_fs (old_fs); + if (cmds == Q_GETQUOTA) { + __kernel_time_t b = d.dqb_btime, i = d.dqb_itime; + ((struct dqblk32 *)&d)->dqb_itime = i; + ((struct dqblk32 *)&d)->dqb_btime = b; + if (copy_to_user ((struct dqblk32 *)A(addr), &d, sizeof (struct dqblk32))) + return -EFAULT; + } + return err; } static int put_statfs (u32 buf, struct statfs *s) @@ -1599,20 +1702,224 @@ return sys_getsockopt(fd, level, optname, (char *)A(optval), (int *)A(optlen)); } -/* Continue here */ +struct msghdr32 { + u32 msg_name; + int msg_namelen; + u32 msg_iov; + __kernel_size_t32 msg_iovlen; + u32 msg_control; + __kernel_size_t32 msg_controllen; + unsigned msg_flags; +}; + +struct cmsghdr32 { + __kernel_size_t32 cmsg_len; + int cmsg_level; + int cmsg_type; + unsigned char cmsg_data[0]; +}; + asmlinkage int sys32_sendmsg(int fd, u32 msg, unsigned flags) { - return sys_sendmsg(fd, (struct msghdr *)A(msg), flags); + struct msghdr m; + int count; + struct iovec *v; + struct iovec vf[UIO_FASTIOV]; + u32 i, vector; + long ret; + unsigned long old_fs; + + if (get_user ((long)m.msg_name, &(((struct msghdr32 *)A(msg))->msg_name)) || + __get_user (m.msg_namelen, &(((struct msghdr32 *)A(msg))->msg_namelen)) || + __get_user (vector, &(((struct msghdr32 *)A(msg))->msg_iov)) || + __get_user (m.msg_iovlen, &(((struct msghdr32 *)A(msg))->msg_iovlen)) || + __get_user ((long)m.msg_control, &(((struct msghdr32 *)A(msg))->msg_control)) || + __get_user (m.msg_controllen, &(((struct msghdr32 *)A(msg))->msg_controllen)) || + __get_user (m.msg_flags, &(((struct msghdr32 *)A(msg))->msg_flags))) + return -EFAULT; + + count = m.msg_iovlen; + if (!count) return 0; if (count > UIO_MAXIOV) return -EINVAL; + if (count <= UIO_FASTIOV) + v = vf; + else { + lock_kernel (); + v = kmalloc (count * sizeof (struct iovec), GFP_KERNEL); + if (!v) { + ret = -ENOMEM; + goto out; + } + } + + for (i = 0; i < count; i++) { + if (__get_user ((unsigned long)(v[i].iov_base), &((((struct iovec32 *)A(vector))+i)->iov_base)) || + __get_user (v[i].iov_len, &((((struct iovec32 *)A(vector))+i)->iov_len))) { + ret = -EFAULT; + goto out; + } + } + + m.msg_iov = v; + + if (m.msg_controllen) { + /* XXX Handle msg_control stuff... Or should that go into ip_sockglue.c etc.? */ + } + old_fs = get_fs(); + set_fs (KERNEL_DS); + ret = sys_sendmsg(fd, &m, flags); + set_fs (old_fs); +out: + if (count > UIO_FASTIOV) { + kfree (v); + unlock_kernel (); + } + return ret; } asmlinkage int sys32_recvmsg(int fd, u32 msg, unsigned int flags) { - return sys_recvmsg(fd, (struct msghdr *)A(msg), flags); + struct msghdr m; + int count; + struct iovec *v; + struct iovec vf[UIO_FASTIOV]; + u32 i, vector; + long ret; + unsigned long old_fs; + + if (get_user ((long)m.msg_name, &(((struct msghdr32 *)A(msg))->msg_name)) || + __get_user (m.msg_namelen, &(((struct msghdr32 *)A(msg))->msg_namelen)) || + __get_user (vector, &(((struct msghdr32 *)A(msg))->msg_iov)) || + __get_user (m.msg_iovlen, &(((struct msghdr32 *)A(msg))->msg_iovlen)) || + __get_user ((long)m.msg_control, &(((struct msghdr32 *)A(msg))->msg_control)) || + __get_user (m.msg_controllen, &(((struct msghdr32 *)A(msg))->msg_controllen)) || + __get_user (m.msg_flags, &(((struct msghdr32 *)A(msg))->msg_flags))) + return -EFAULT; + + count = m.msg_iovlen; + if (!count) return 0; if (count > UIO_MAXIOV) return -EINVAL; + if (count <= UIO_FASTIOV) + v = vf; + else { + lock_kernel (); + v = kmalloc (count * sizeof (struct iovec), GFP_KERNEL); + if (!v) { + ret = -ENOMEM; + goto out; + } + } + + for (i = 0; i < count; i++) { + if (__get_user ((unsigned long)(v[i].iov_base), &((((struct iovec32 *)A(vector))+i)->iov_base)) || + __get_user (v[i].iov_len, &((((struct iovec32 *)A(vector))+i)->iov_len))) { + ret = -EFAULT; + goto out; + } + } + + m.msg_iov = v; + + if (m.msg_controllen) { + /* XXX Handle msg_control stuff... Or should that go into ip_sockglue.c etc.? */ + } + old_fs = get_fs(); + set_fs (KERNEL_DS); + ret = sys_recvmsg(fd, &m, flags); + set_fs (old_fs); + if (ret >= 0) { + /* XXX Handle msg_control stuff... */ + if (put_user (m.msg_flags, &(((struct msghdr32 *)A(msg))->msg_flags)) || + __put_user (m.msg_controllen, &(((struct msghdr32 *)A(msg))->msg_controllen))) + return -EFAULT; + } +out: + if (count > UIO_FASTIOV) { + kfree (v); + unlock_kernel (); + } + return ret; } asmlinkage int sys32_socketcall(int call, u32 args) { - return sys_socketcall(call, (unsigned long *)A(args)); + static unsigned char nargs[18]={0,3,3,3,2,3,3,3, + 4,4,4,6,6,2,5,5,3,3}; + u32 a[6]; + u32 a0,a1; + int err = -EINVAL; + int i; + + lock_kernel(); + if(call<1||call>SYS_RECVMSG) + goto out; + err = -EFAULT; + + for (i = 0; i < nargs[call]; i++, args += sizeof (u32)) + if (get_user(a[i], (u32 *)A(args))) + goto out; + + a0=a[0]; + a1=a[1]; + + switch(call) + { + case SYS_SOCKET: + err = sys_socket(a0, a1, a[2]); + break; + case SYS_BIND: + err = sys32_bind(a0, a1, a[2]); + break; + case SYS_CONNECT: + err = sys32_connect(a0, a1, a[2]); + break; + case SYS_LISTEN: + err = sys_listen(a0, a1); + break; + case SYS_ACCEPT: + err = sys32_accept(a0, a1, a[2]); + break; + case SYS_GETSOCKNAME: + err = sys32_getsockname(a0, a1, a[2]); + break; + case SYS_GETPEERNAME: + err = sys32_getpeername(a0, a1, a[2]); + break; + case SYS_SOCKETPAIR: + err = sys_socketpair(a0, a1, a[2], (int *)A(a[3])); + break; + case SYS_SEND: + err = sys32_send(a0, a1, a[2], a[3]); + break; + case SYS_SENDTO: + err = sys32_sendto(a0, a1, a[2], a[3], a[4], a[5]); + break; + case SYS_RECV: + err = sys32_recv(a0, a1, a[2], a[3]); + break; + case SYS_RECVFROM: + err = sys32_recvfrom(a0, a1, a[2], a[3], a[4], a[5]); + break; + case SYS_SHUTDOWN: + err = sys_shutdown(a0,a1); + break; + case SYS_SETSOCKOPT: + err = sys32_setsockopt(a0, a1, a[2], a[3], a[4]); + break; + case SYS_GETSOCKOPT: + err = sys32_getsockopt(a0, a1, a[2], a[3], a[4]); + break; + case SYS_SENDMSG: + err = sys32_sendmsg(a0, a1, a[2]); + break; + case SYS_RECVMSG: + err = sys32_recvmsg(a0, a1, a[2]); + break; + default: + err = -EINVAL; + break; + } +out: + unlock_kernel(); + return err; } extern void check_pending(int signum); diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/lib/checksum.S linux/arch/sparc64/lib/checksum.S --- v2.1.37/linux/arch/sparc64/lib/checksum.S Tue May 13 22:41:04 1997 +++ linux/arch/sparc64/lib/checksum.S Wed May 14 15:01:21 1997 @@ -44,13 +44,13 @@ csum_partial_end_cruft: andcc %o1, 8, %g0 ! check how much be,pn %icc, 1f ! caller asks %o1 & 0x8 - and %o1, 4, %g3 ! nope, check for word remaining + and %o1, 4, %g5 ! nope, check for word remaining ldd [%o0], %g2 ! load two addcc %g2, %o2, %o2 ! add first word to sum addccc %g3, %o2, %o2 ! add second word as well add %o0, 8, %o0 ! advance buf ptr addc %g0, %o2, %o2 ! add in final carry -1: brz,pn %g3, 1f ! nope, skip this code +1: brz,pn %g5, 1f ! nope, skip this code andcc %o1, 3, %o1 ! check for trailing bytes ld [%o0], %g2 ! load it addcc %g2, %o2, %o2 ! add to sum @@ -98,15 +98,17 @@ srl %o2, 16, %g3 addc %g0, %g3, %g2 sll %o2, 16, %o2 + and %o0, 0x4, %g7 sll %g2, 16, %g3 srl %o2, 16, %o2 or %g3, %o2, %o2 1: brz,pn %g7, csum_partial_fix_aligned - nop + andn %o1, 0x7f, %o3 ld [%o0 + 0x00], %g2 sub %o1, 4, %o1 addcc %g2, %o2, %o2 add %o0, 4, %o0 + andn %o1, 0x7f, %o3 addc %g0, %o2, %o2 csum_partial_fix_aligned: brz,pt %o3, 3f ! none to do @@ -115,9 +117,9 @@ CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5) CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5) CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5) - sub %o3, 128, %o3 ! detract from loop iters addc %g0, %o2, %o2 ! sink in final carry - brnz,pt %o3, 5b ! more to do + subcc %o3, 128, %o3 ! detract from loop iters + bne,pt %icc, 5b ! more to do add %o0, 128, %o0 ! advance buf ptr 3: brz,pn %g1, cpte ! nope andcc %o1, 0xf, %o3 ! anything left at all? @@ -125,7 +127,7 @@ srl %g1, 1, %o4 ! compute offset sub %g7, %g1, %g7 ! adjust jmp ptr sub %g7, %o4, %g7 ! final jmp ptr adjust - jmp %g7 + (cpte - 8 - 10b) ! enter the table + jmp %g7 + (11f-10b) ! enter the table add %o0, %g1, %o0 ! advance buf ptr cptbl: CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5) CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5) @@ -134,8 +136,8 @@ CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5) CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5) CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5) - addc %g0, %o2, %o2 ! fetch final carry - andcc %o1, 0xf, %g0 ! anything left at all? +11: addc %g0, %o2, %o2 ! fetch final carry + andcc %o1, 0xf, %o3 ! anything left at all? cpte: brnz,pn %o3, csum_partial_end_cruft ! yep, handle it sethi %uhi(KERNBASE), %g4 mov %o2, %o0 ! return computed csum @@ -322,13 +324,14 @@ andcc %o0, 0x4, %g0 or %g3, %g7, %g7 1: be,pt %icc, 3f - andn %g1, 0x7f, %g0 + andn %g1, 0x7f, %g2 EX(ld [%o0 + 0x00], %g4, add %g1, 0,#) sub %g1, 4, %g1 EX2(st %g4, [%o1 + 0x00],#) add %o0, 4, %o0 addcc %g4, %g7, %g7 add %o1, 4, %o1 + andn %g1, 0x7f, %g2 addc %g0, %g7, %g7 cc_dword_aligned: 3: brz,pn %g2, 3f ! nope, less than one loop remains @@ -365,7 +368,7 @@ CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5) 12: EXT(cctbl, 12b, 22f,#) ! note for exception table handling addc %g0, %g7, %g7 - andcc %o3, 0xf, %g0 ! check for low bits set + andcc %g1, 0xf, %o3 ! check for low bits set ccte: bne,pn %icc, cc_end_cruft ! something left, handle it out of band sethi %uhi(KERNBASE), %g4 ! restore gfp mov %g7, %o0 ! give em the computed checksum diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/mm/asyncd.c linux/arch/sparc64/mm/asyncd.c --- v2.1.37/linux/arch/sparc64/mm/asyncd.c Mon Dec 30 01:59:59 1996 +++ linux/arch/sparc64/mm/asyncd.c Thu May 15 14:43:52 1997 @@ -1,4 +1,4 @@ -/* $Id: asyncd.c,v 1.1 1996/12/26 10:24:24 davem Exp $ +/* $Id: asyncd.c,v 1.2 1997/05/15 21:14:32 davem Exp $ * The asyncd kernel daemon. This handles paging on behalf of * processes that receive page faults due to remote (async) memory * accesses. @@ -153,7 +153,7 @@ if(!pte) goto no_memory; if(!pte_present(*pte)) { - do_no_page(tsk, vma, address, write); + handle_mm_fault(tsk, vma, address, write); goto finish_up; } set_pte(pte, pte_mkyoung(*pte)); @@ -165,12 +165,11 @@ flush_tlb_page(vma, address); goto finish_up; } - do_wp_page(tsk, vma, address, write); + handle_mm_fault(tsk, vma, address, write); /* Fall through for do_wp_page */ finish_up: stats.success++; - update_mmu_cache(vma, address, *pte); return 0; no_memory: diff -u --recursive --new-file v2.1.37/linux/arch/sparc64/mm/fault.c linux/arch/sparc64/mm/fault.c --- v2.1.37/linux/arch/sparc64/mm/fault.c Thu Mar 27 14:40:01 1997 +++ linux/arch/sparc64/mm/fault.c Thu May 15 14:43:52 1997 @@ -1,4 +1,4 @@ -/* $Id: fault.c,v 1.4 1997/03/11 17:37:07 jj Exp $ +/* $Id: fault.c,v 1.5 1997/05/15 21:14:31 davem Exp $ * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc. * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -168,7 +168,7 @@ if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - handle_mm_fault(vma, address, write); + handle_mm_fault(current, vma, address, write); up(&mm->mmap_sem); goto out; /* diff -u --recursive --new-file v2.1.37/linux/drivers/char/mem.c linux/drivers/char/mem.c --- v2.1.37/linux/drivers/char/mem.c Tue May 13 22:41:07 1997 +++ linux/drivers/char/mem.c Wed May 14 15:01:21 1997 @@ -238,38 +238,38 @@ */ static inline unsigned long read_zero_pagealigned(char * buf, unsigned long size) { - struct vm_area_struct * curr_vma; + struct vm_area_struct * vma; unsigned long addr=(unsigned long)buf; -/* - * First we take the most obvious case: when we have one VM area to deal with, - * and it's privately mapped. - */ - curr_vma = find_vma(current->mm, addr); - - if ( !(curr_vma->vm_flags & VM_SHARED) && - (addr + size <= curr_vma->vm_end) ) { - - flush_cache_range(current->mm, addr, addr + size); - zap_page_range(current->mm, addr, size); - zeromap_page_range(addr, size, PAGE_COPY); - flush_tlb_range(current->mm, addr, addr + size); - - return 0; + /* For private mappings, just map in zero pages. */ + for (vma = find_vma(current->mm, addr); vma; vma = vma->vm_next) { + unsigned long count; + + if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0) + return size; + if (vma->vm_flags & VM_SHARED) + break; + count = vma->vm_end - addr; + if (count > size) + count = size; + + flush_cache_range(current->mm, addr, addr + count); + zap_page_range(current->mm, addr, count); + zeromap_page_range(addr, count, PAGE_COPY); + flush_tlb_range(current->mm, addr, addr + count); + + size -= count; + buf += count; + addr += count; + if (size == 0) + return 0; } - -/* - * Ooops, the shared case is hard. Lets do the conventional - * zeroing. - * - * FIXME: same for the multiple-vma case, we dont handle it - * now for simplicity, although it's much easier than - * the shared case. Not that it should happen often ... - */ - + + /* The shared case is hard. Lets do the conventional zeroing. */ do { - if (clear_user(buf, PAGE_SIZE)) - break; + unsigned long unwritten = clear_user(buf, PAGE_SIZE); + if (unwritten) + return size + unwritten - PAGE_SIZE; if (need_resched) schedule(); buf += PAGE_SIZE; @@ -282,7 +282,10 @@ static long read_zero(struct inode * node, struct file * file, char * buf, unsigned long count) { - unsigned long left; + unsigned long left, unwritten, written = 0; + + if (!count) + return 0; if (!access_ok(VERIFY_WRITE, buf, count)) return -EFAULT; @@ -291,21 +294,27 @@ /* do we want to be clever? Arbitrary cut-off */ if (count >= PAGE_SIZE*4) { - unsigned long partial, unwritten; + unsigned long partial; /* How much left of the page? */ partial = (PAGE_SIZE-1) & -(unsigned long) buf; - clear_user(buf, partial); + unwritten = clear_user(buf, partial); + written = partial - unwritten; + if (unwritten) + goto out; left -= partial; buf += partial; unwritten = read_zero_pagealigned(buf, left & PAGE_MASK); + written += (left & PAGE_MASK) - unwritten; + if (unwritten) + goto out; buf += left & PAGE_MASK; left &= ~PAGE_MASK; - if (unwritten) - return count - left - unwritten; } - clear_user(buf, left); - return count; + unwritten = clear_user(buf, left); + written += left - unwritten; +out: + return written ? written : -EFAULT; } static int mmap_zero(struct inode * inode, struct file * file, struct vm_area_struct * vma) diff -u --recursive --new-file v2.1.37/linux/drivers/net/sunhme.c linux/drivers/net/sunhme.c --- v2.1.37/linux/drivers/net/sunhme.c Tue May 13 22:41:12 1997 +++ linux/drivers/net/sunhme.c Wed May 14 15:01:21 1997 @@ -1673,6 +1673,7 @@ netif_rx(skb); hp->net_stats.rx_packets++; + hp->net_stats.rx_bytes+=len; next: elem = NEXT_RX(elem); this = &rxbase[elem]; @@ -1729,6 +1730,7 @@ skb->protocol = eth_type_trans(skb, dev); netif_rx(skb); hp->net_stats.rx_packets++; + hp->net_stats.rx_bytes+=len; } } /* Return the buffer to the Happy Meal. */ diff -u --recursive --new-file v2.1.37/linux/drivers/net/sunlance.c linux/drivers/net/sunlance.c --- v2.1.37/linux/drivers/net/sunlance.c Tue May 13 22:41:12 1997 +++ linux/drivers/net/sunlance.c Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: sunlance.c,v 1.62 1997/04/16 10:27:25 jj Exp $ +/* $Id: sunlance.c,v 1.64 1997/05/14 20:46:40 davem Exp $ * lance.c: Linux/Sparc/Lance driver * * Written 1995, 1996 by Miguel de Icaza @@ -673,8 +673,7 @@ /* On the 4m, setup the ledma to provide the upper bits for buffers */ if (lp->ledma) - lp->ledma->regs->dma_test = ((unsigned long) lp->init_block) - & 0xff000000; + lp->ledma->regs->dma_test = ((__u32) lp->init_block_dvma) & 0xff000000; lance_init_ring (dev); load_csrs (lp); @@ -758,8 +757,7 @@ lp->ledma->regs->cond_reg |= DMA_RST_ENET; udelay (200); lp->ledma->regs->cond_reg &= ~DMA_RST_ENET; - lp->ledma->regs->dma_test = ((unsigned long) lp->init_block) - & 0xff000000; + lp->ledma->regs->dma_test = ((__u32) lp->init_block_dvma) & 0xff000000; } lance_init_ring (dev); load_csrs (lp); diff -u --recursive --new-file v2.1.37/linux/drivers/sbus/char/suncons.c linux/drivers/sbus/char/suncons.c --- v2.1.37/linux/drivers/sbus/char/suncons.c Wed Apr 23 19:01:21 1997 +++ linux/drivers/sbus/char/suncons.c Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: suncons.c,v 1.61 1997/04/17 02:29:36 miguel Exp $ +/* $Id: suncons.c,v 1.62 1997/05/02 22:32:32 davem Exp $ * * suncons.c: Sun SparcStation console support. * @@ -387,12 +387,12 @@ sun_blitc (*contents, (unsigned long) contents); } -__initfunc(void serial_finish_init(void (*printfunc)(const char *))) +__initfunc(void serial_finish_init(void (*printfunc)(const char *, int))) { char buffer[2048]; sprintf (buffer, linux_serial_image, UTS_RELEASE); - (*printfunc)(buffer); + (*printfunc)(buffer, strlen(buffer)); } __initfunc(void con_type_init_finish(void)) diff -u --recursive --new-file v2.1.37/linux/drivers/sbus/char/sunserial.c linux/drivers/sbus/char/sunserial.c --- v2.1.37/linux/drivers/sbus/char/sunserial.c Tue May 13 22:41:12 1997 +++ linux/drivers/sbus/char/sunserial.c Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: sunserial.c,v 1.39 1997/04/23 07:45:26 ecd Exp $ +/* $Id: sunserial.c,v 1.41 1997/05/14 20:46:51 davem Exp $ * serial.c: Serial port driver for the Sparc. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -39,8 +41,6 @@ #define KEYBOARD_LINE 0x2 #define MOUSE_LINE 0x3 -extern struct wait_queue * keypress_wait; - struct sun_zslayout **zs_chips; struct sun_zschannel **zs_channels; struct sun_zschannel *zs_conschan; @@ -1062,20 +1062,30 @@ /* * zs_console_print is registered for printk. */ -static void zs_console_print(const char *p) +static void zs_console_print(const char *s, int count) { - char c; + int i; - while((c=*(p++)) != 0) { - if(c == '\n') + for (i = 0; i < count; i++, s++) { + if(*s == '\n') rs_put_char('\r'); - rs_put_char(c); + rs_put_char(*s); } /* Comment this if you want to have a strict interrupt-driven output */ rs_fair_output(); +} - return; +static void zs_console_wait_key(void) +{ + sleep_on(&keypress_wait); +} + +static int zs_console_device(void) +{ + extern int serial_console; + + return MKDEV(TTYAUX_MAJOR, 64 + serial_console - 1); } static void rs_flush_chars(struct tty_struct *tty) @@ -1857,7 +1867,7 @@ static void show_serial_version(void) { - char *revision = "$Revision: 1.39 $"; + char *revision = "$Revision: 1.41 $"; char *version, *p; version = strchr(revision, ' '); @@ -2164,14 +2174,15 @@ termios->c_cflag = cflag; } -extern void register_console(void (*proc)(const char *)); - static inline void rs_cons_check(struct sun_serial *ss, int channel) { int i, o, io; static int consout_registered = 0; static int msg_printed = 0; + static struct console console = { + zs_console_print, 0, + zs_console_wait_key, zs_console_device }; i = o = io = 0; @@ -2187,10 +2198,10 @@ o = 1; /* double whee.. */ if(!consout_registered) { - extern void serial_finish_init (void (*)(const char *)); + extern void serial_finish_init (void (*)(const char *, int count)); serial_finish_init (zs_console_print); - register_console(zs_console_print); + register_console(&console); consout_registered = 1; } } diff -u --recursive --new-file v2.1.37/linux/drivers/scsi/qlogicpti.c linux/drivers/scsi/qlogicpti.c --- v2.1.37/linux/drivers/scsi/qlogicpti.c Wed Apr 23 19:01:22 1997 +++ linux/drivers/scsi/qlogicpti.c Wed May 14 15:01:21 1997 @@ -8,6 +8,8 @@ * An even bigger kudos to John Grana at Performance Technologies * for providing me with the hardware to write this driver, you rule * John you really do. + * + * May, 2, 1997: Added support for QLGC,isp --jj */ #include @@ -573,6 +575,7 @@ unsigned char bsizes, bsizes_more; int nqptis = 0, nqptis_in_use = 0; int qpti_node; + int is_pti; tpnt->proc_dir = &proc_scsi_qlogicpti; qptichain = 0; @@ -584,7 +587,8 @@ /* Is this a red snapper? */ if(strcmp(qpti_dev->prom_name, "ptisp") && - strcmp(qpti_dev->prom_name, "PTI,ptisp")) + strcmp(qpti_dev->prom_name, "PTI,ptisp") && + strcmp(qpti_dev->prom_name, "QLGC,isp")) continue; /* Yep, register and allocate software state. */ @@ -618,6 +622,8 @@ prom_getstring(qpti_node, "name", qpti->prom_name, sizeof(qpti->prom_name)); qpti->prom_node = qpti_node; + + is_pti = strcmp (qpti->prom_name, "QLGC,isp"); /* Setup the reg property for this device. */ prom_apply_sbus_ranges(qpti->qdev->my_bus, @@ -632,17 +638,19 @@ qpti->qdev->reg_addrs[0].which_io, 0x0); if(!qregs) panic("PTI Qlogic/ISP registers unmappable"); - - /* Map this one read only. */ - qpti->sreg = sreg = (volatile unsigned char *) - sparc_alloc_io((qpti->qdev->reg_addrs[0].phys_addr + - (16 * PAGE_SIZE)), 0, - sizeof(unsigned char), - "PTI Qlogic/ISP Status Reg", - qpti->qdev->reg_addrs[0].which_io, 1); - if(!sreg) - panic("PTI Qlogic/ISP status reg unmappable"); - qpti->swsreg = 0; + + if(is_pti) { + /* Map this one read only. */ + qpti->sreg = sreg = (volatile unsigned char *) + sparc_alloc_io((qpti->qdev->reg_addrs[0].phys_addr + + (16 * PAGE_SIZE)), 0, + sizeof(unsigned char), + "PTI Qlogic/ISP Status Reg", + qpti->qdev->reg_addrs[0].which_io, 1); + if(!sreg) + panic("PTI Qlogic/ISP status reg unmappable"); + qpti->swsreg = 0; + } qpti_host->base = (unsigned char *)qregs; qpti_host->io_port = (unsigned int) qregs; @@ -713,21 +721,32 @@ /* Set adapter and per-device default values. */ qlogicpti_set_hostdev_defaults(qpti); - - /* Load the firmware. */ - if(qlogicpti_load_firmware(qpti)) - panic("PTI Qlogic/ISP firmware load failed"); - - /* Check the PTI status reg. */ - if(qlogicpti_verify_tmon(qpti)) - panic("PTI Qlogic/ISP tmon verification failed"); + + if (is_pti) { + /* Load the firmware. */ + if(qlogicpti_load_firmware(qpti)) + panic("PTI Qlogic/ISP firmware load failed"); + + /* Check the PTI status reg. */ + if(qlogicpti_verify_tmon(qpti)) + panic("PTI Qlogic/ISP tmon verification failed"); + } /* Reset the ISP and init res/req queues. */ if(qlogicpti_reset_hardware(qpti_host)) panic("PTI Qlogic/ISP cannot be reset"); - printk("(Firmware v%d.%d) [%s Wide, using %s interface]\n", - qpti->fware_majrev, qpti->fware_minrev, + if (is_pti) { + printk("(Firmware v%d.%d)", + qpti->fware_majrev, qpti->fware_minrev); + } else { + char buffer[60]; + + prom_getstring (qpti_node, "isp-fcode", buffer, 60); + printk("(Firmware %s)", buffer); + } + + printk (" [%s Wide, using %s interface]\n", (qpti->ultra ? "Ultra" : "Fast"), (qpti->differential ? "differential" : "single ended")); @@ -751,7 +770,9 @@ /* Free IRQ handler and unmap Qlogic,ISP and PTI status regs. */ free_irq(host->irq, NULL); unmapioaddr((unsigned long)qregs); - unmapioaddr((unsigned long)qpti->sreg); + /* QLGC,isp doesn't have status reg */ + if (strcmp (qpti->prom_name, "QLGC,isp")) + unmapioaddr((unsigned long)qpti->sreg); return 0; } diff -u --recursive --new-file v2.1.37/linux/fs/autofs/autofs_i.h linux/fs/autofs/autofs_i.h --- v2.1.37/linux/fs/autofs/autofs_i.h Tue May 13 22:41:14 1997 +++ linux/fs/autofs/autofs_i.h Thu May 15 15:54:09 1997 @@ -24,12 +24,30 @@ #include #include -#if LINUX_VERSION_CODE < 0x20100 +#define kver(a,b,c) (((a) << 16) + ((b) << 8) + (c)) + +#if LINUX_VERSION_CODE < kver(2,1,0) /* Segmentation stuff for pre-2.1 kernels */ #include -#define copy_to_user memcpy_tofs -#define copy_from_user memcpy_fromfs + +static inline int copy_to_user(void *dst, void *src, unsigned long len) +{ + int rv = verify_area(VERIFY_WRITE, dst, len); + if ( rv ) + return -1; + memcpy_tofs(dst,src,len); + return 0; +} + +static inline int copy_from_user(void *dst, void *src, unsigned long len) +{ + int rv = verify_area(VERIFY_READ, src, len); + if ( rv ) + return -1; + memcpy_fromfs(dst,src,len); + return 0; +} #else diff -u --recursive --new-file v2.1.37/linux/fs/autofs/init.c linux/fs/autofs/init.c --- v2.1.37/linux/fs/autofs/init.c Tue May 13 22:41:14 1997 +++ linux/fs/autofs/init.c Thu May 15 15:45:12 1997 @@ -10,9 +10,14 @@ * * ------------------------------------------------------------------------- */ -#include #include #include "autofs_i.h" + +#if LINUX_VERSION_CODE < kver(2,1,36) +#define __initfunc(X) X +#else +#include +#endif static struct file_system_type autofs_fs_type = { autofs_read_super, "autofs", 0, NULL diff -u --recursive --new-file v2.1.37/linux/fs/autofs/root.c linux/fs/autofs/root.c --- v2.1.37/linux/fs/autofs/root.c Tue May 13 22:41:14 1997 +++ linux/fs/autofs/root.c Thu May 15 15:45:12 1997 @@ -335,9 +335,16 @@ int rv; unsigned long ntimeout; +#if LINUX_VERSION_CODE < kver(2,1,0) + if ( (rv = verify_area(VERIFY_WRITE, p, sizeof(unsigned long))) ) + return rv; + ntimeout = get_user(p); + put_user(sbi->exp_timeout/HZ, p); +#else if ( (rv = get_user(ntimeout, p)) || (rv = put_user(sbi->exp_timeout/HZ, p)) ) return rv; +#endif if ( ntimeout > ULONG_MAX/HZ ) sbi->exp_timeout = 0; @@ -347,6 +354,20 @@ return 0; } +/* Return protocol version */ +static inline int autofs_get_protover(int *p) +{ +#if LINUX_VERSION_CODE < kver(2,1,0) + int rv; + if ( (rv = verify_area(VERIFY_WRITE, p, sizeof(int))) ) + return rv; + put_user(AUTOFS_PROTO_VERSION, p); + return 0; +#else + return put_user(AUTOFS_PROTO_VERSION, p); +#endif +} + /* Perform an expiry operation */ static inline int autofs_expire_run(struct autofs_sb_info *sbi, struct autofs_packet_expire *pkt_p) @@ -402,7 +423,7 @@ autofs_catatonic_mode(sbi); return 0; case AUTOFS_IOC_PROTOVER: /* Get protocol version */ - return put_user(AUTOFS_PROTO_VERSION, (int *)arg); + return autofs_get_protover((int *)arg); case AUTOFS_IOC_SETTIMEOUT: return autofs_get_set_timeout(sbi,(unsigned long *)arg); case AUTOFS_IOC_EXPIRE: diff -u --recursive --new-file v2.1.37/linux/fs/autofs/waitq.c linux/fs/autofs/waitq.c --- v2.1.37/linux/fs/autofs/waitq.c Tue May 13 22:41:14 1997 +++ linux/fs/autofs/waitq.c Thu May 15 15:45:12 1997 @@ -45,7 +45,7 @@ unsigned short fs; unsigned long old_signal; const char *data = (const char *)addr; - int written; + int written = 0; /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/ diff -u --recursive --new-file v2.1.37/linux/fs/binfmt_elf.c linux/fs/binfmt_elf.c --- v2.1.37/linux/fs/binfmt_elf.c Tue May 13 22:41:14 1997 +++ linux/fs/binfmt_elf.c Wed May 14 15:01:21 1997 @@ -580,6 +580,9 @@ /* Do this so that we can load the interpreter, if need be. We will change some of these later */ current->mm->rss = 0; +#ifdef __sparc_v9__ + current->tss.flags &= ~(SPARC_FLAG_32BIT); +#endif bprm->p = setup_arg_pages(bprm->p, bprm); current->mm->start_stack = bprm->p; diff -u --recursive --new-file v2.1.37/linux/fs/buffer.c linux/fs/buffer.c --- v2.1.37/linux/fs/buffer.c Tue May 13 22:41:14 1997 +++ linux/fs/buffer.c Wed May 14 15:01:21 1997 @@ -612,8 +612,7 @@ buffer_locked(bh)) return 0; - if (atomic_read(&mem_map[MAP_NR((unsigned long) bh->b_data)].count) != 1 || - buffer_dirty(bh)) { + if (buffer_dirty(bh)) { refile_buffer(bh); return 0; } @@ -644,8 +643,7 @@ continue; } - if (buffer_locked(bh) && - (bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) { + if (buffer_locked(bh) && bh->b_list == BUF_LOCKED) { /* Buffers are written in the order they are placed * on the locked list. If we encounter a locked * buffer here, this means that the rest of them @@ -845,9 +843,6 @@ if(dispose != buf->b_list) { if(dispose == BUF_DIRTY) buf->b_lru_time = jiffies; - if(dispose == BUF_LOCKED && - (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super) - dispose = BUF_LOCKED1; remove_from_queues(buf); buf->b_list = dispose; insert_into_queues(buf); @@ -1467,7 +1462,7 @@ buffermem -= PAGE_SIZE; mem_map[MAP_NR(page)].buffers = NULL; free_page(page); - return !atomic_read(&mem_map[MAP_NR(page)].count); + return 1; } /* ================== Debugging =================== */ @@ -1478,7 +1473,7 @@ int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; int protected = 0; int nlist; - static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","LOCKED1","DIRTY"}; + static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY"}; printk("Buffer memory: %6dkB\n",buffermem>>10); printk("Buffer heads: %6d\n",nr_buffer_heads); @@ -1526,7 +1521,7 @@ bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), - sizeof(unsigned long) * 4, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!bh_cachep) panic("Cannot create buffer head SLAB cache\n"); diff -u --recursive --new-file v2.1.37/linux/fs/exec.c linux/fs/exec.c --- v2.1.37/linux/fs/exec.c Tue May 13 22:41:14 1997 +++ linux/fs/exec.c Wed May 14 15:01:21 1997 @@ -393,6 +393,7 @@ *mm = *current->mm; init_new_context(mm); mm->def_flags = 0; /* should future lockings be kept? */ + mm->cpu_vm_mask = (1 << smp_processor_id()); mm->count = 1; mm->mmap = mm->mmap_cache = NULL; mm->total_vm = 0; diff -u --recursive --new-file v2.1.37/linux/fs/file_table.c linux/fs/file_table.c --- v2.1.37/linux/fs/file_table.c Tue May 13 22:41:14 1997 +++ linux/fs/file_table.c Wed May 14 15:01:21 1997 @@ -66,7 +66,7 @@ void file_table_init(void) { filp_cache = kmem_cache_create("filp", sizeof(struct file), - sizeof(unsigned long) * 8, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!filp_cache) panic("VFS: Cannot alloc filp SLAB cache."); diff -u --recursive --new-file v2.1.37/linux/fs/inode.c linux/fs/inode.c --- v2.1.37/linux/fs/inode.c Tue May 13 22:41:14 1997 +++ linux/fs/inode.c Wed May 14 15:01:21 1997 @@ -647,7 +647,7 @@ int i; inode_cachep = kmem_cache_create("inode", sizeof(struct inode), - sizeof(unsigned long) * 4, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!inode_cachep) panic("Cannot create inode SLAB cache\n"); diff -u --recursive --new-file v2.1.37/linux/fs/nfs/nfsroot.c linux/fs/nfs/nfsroot.c --- v2.1.37/linux/fs/nfs/nfsroot.c Mon Apr 14 16:28:17 1997 +++ linux/fs/nfs/nfsroot.c Wed May 14 15:01:21 1997 @@ -228,6 +228,8 @@ unsigned long oldfs; int err; + memset(&route, 0, sizeof(struct rtentry)); /* or else! */ + route.rt_dev = dev->name; route.rt_mtu = dev->mtu; route.rt_flags = RTF_UP; @@ -247,9 +249,15 @@ set_fs(KERNEL_DS); err = ip_rt_ioctl(op, &route); set_fs(oldfs); - printk(KERN_NOTICE "%s route %s %s %s: res %d\n", - (op == SIOCADDRT? "add" : "del"), - in_ntoa(dest), in_ntoa(mask), in_ntoa(gw), err); + + /* in_ntoa in ipv4/utils.c uses a single static buffer, so + * must make multiple printk calls, one for each in_ntoa + * invocation... + */ + printk(KERN_NOTICE "%s route ", (op == SIOCADDRT ? "addr" : "del")); + printk("%s ", in_ntoa(dest)); + printk("%s ", in_ntoa(mask)); + printk("%s: res %d\n", in_ntoa(gw), err); return err; } @@ -280,8 +288,10 @@ nextp = openp->next; openp->next = NULL; if (openp->dev != root_dev) { - if (!(openp->old_flags & IFF_UP)) + if (!(openp->old_flags & IFF_UP)) { dev_close(openp->dev); + } + openp->dev->flags = openp->old_flags; } kfree_s(openp, sizeof(struct open_dev)); @@ -1322,6 +1332,13 @@ root_dev->pa_mask = netmask; root_dev->pa_brdaddr = root_dev->pa_addr | ~root_dev->pa_mask; root_dev->pa_dstaddr = 0; + + /* Sticky situation, but it has a solution. We opened it earlier, + * but before we knew what pa_addr etc. to give to it, thus the + * routing code did not add a RTF_LOCAL route for it (how could + * it?) so we send the pseudo device state change event now. -DaveM + */ + ip_rt_event(NETDEV_CHANGE, root_dev); /* * Now add a route to the server. If there is no gateway given, diff -u --recursive --new-file v2.1.37/linux/fs/proc/mem.c linux/fs/proc/mem.c --- v2.1.37/linux/fs/proc/mem.c Tue May 13 22:41:15 1997 +++ linux/fs/proc/mem.c Wed May 14 13:16:59 1997 @@ -285,10 +285,10 @@ return -ENOMEM; if (!pte_present(*src_table)) - handle_mm_fault(src_vma, stmp, 1); + handle_mm_fault(tsk, src_vma, stmp, 1); if ((vma->vm_flags & VM_WRITE) && !pte_write(*src_table)) - handle_mm_fault(src_vma, stmp, 1); + handle_mm_fault(tsk, src_vma, stmp, 1); set_pte(src_table, pte_mkdirty(*src_table)); set_pte(dest_table, *src_table); diff -u --recursive --new-file v2.1.37/linux/include/asm-i386/semaphore.h linux/include/asm-i386/semaphore.h --- v2.1.37/linux/include/asm-i386/semaphore.h Tue May 13 22:41:17 1997 +++ linux/include/asm-i386/semaphore.h Wed May 14 16:04:56 1997 @@ -81,7 +81,7 @@ * "down_failed" is a special asm handler that calls the C * routine that actually waits. See arch/i386/lib/semaphore.S */ -extern inline void do_down(struct semaphore * sem, void (*failed)(void)) +extern inline void down(struct semaphore * sem) { __asm__ __volatile__( "# atomic down operation\n\t" @@ -93,15 +93,36 @@ "1:\n" ".section .text.lock,\"ax\"\n" "2:\tpushl $1b\n\t" - "jmp %1\n" + "jmp __down_failed\n" ".previous" :/* no outputs */ - :"c" (sem), "m" (*(unsigned long *)failed) + :"c" (sem) :"memory"); } -#define down(sem) do_down((sem),__down_failed) -#define down_interruptible(sem) do_down((sem),__down_failed_interruptible) +extern inline int down_interruptible(struct semaphore * sem) +{ + int result; + + __asm__ __volatile__( + "# atomic interruptible down operation\n\t" +#ifdef __SMP__ + "lock ; " +#endif + "decl 0(%1)\n\t" + "js 2f\n\t" + "xorl %0,%0\n" + "1:\n" + ".section .text.lock,\"ax\"\n" + "2:\tpushl $1b\n\t" + "jmp __down_failed_interruptible\n" + ".previous" + :"=a" (result) + :"c" (sem) + :"memory"); + return result; +} + /* * Note! This is subtle. We jump to wake people up only if @@ -121,10 +142,10 @@ "1:\n" ".section .text.lock,\"ax\"\n" "2:\tpushl $1b\n\t" - "jmp %1\n" + "jmp __up_wakeup\n" ".previous" :/* no outputs */ - :"c" (sem), "m" (*(unsigned long *)__up_wakeup) + :"c" (sem) :"memory"); } diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/bitops.h linux/include/asm-sparc/bitops.h --- v2.1.37/linux/include/asm-sparc/bitops.h Mon Apr 14 16:28:19 1997 +++ linux/include/asm-sparc/bitops.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: bitops.h,v 1.46 1997/04/13 06:38:24 davem Exp $ +/* $Id: bitops.h,v 1.47 1997/05/14 20:47:56 davem Exp $ * bitops.h: Bit string operations on the Sparc. * * Copyright 1995 David S. Miller (davem@caip.rutgers.edu) @@ -95,7 +95,7 @@ * all bit-ops return 0 if bit was previously clear and != 0 otherwise. */ -extern __inline__ unsigned long set_bit(unsigned long nr, __SMPVOL void *addr) +extern __inline__ unsigned long test_and_set_bit(unsigned long nr, __SMPVOL void *addr) { register unsigned long mask asm("g2"); register unsigned long *ADDR asm("g1"); @@ -112,7 +112,12 @@ return mask; } -extern __inline__ unsigned long clear_bit(unsigned long nr, __SMPVOL void *addr) +extern __inline__ void set_bit(unsigned long nr, __SMPVOL void *addr) +{ + (void) test_and_set_bit(nr, addr); +} + +extern __inline__ unsigned long test_and_clear_bit(unsigned long nr, __SMPVOL void *addr) { register unsigned long mask asm("g2"); register unsigned long *ADDR asm("g1"); @@ -130,7 +135,12 @@ return mask; } -extern __inline__ unsigned long change_bit(unsigned long nr, __SMPVOL void *addr) +extern __inline__ unsigned long clear_bit(unsigned long nr, __SMPVOL void *addr) +{ + (void) test_and_clear_bit(nr, addr); +} + +extern __inline__ unsigned long test_and_change_bit(unsigned long nr, __SMPVOL void *addr) { register unsigned long mask asm("g2"); register unsigned long *ADDR asm("g1"); @@ -148,6 +158,11 @@ return mask; } +extern __inline__ unsigned long change_bit(unsigned long nr, __SMPVOL void *addr) +{ + (void) test_and_change_bit(nr, addr); +} + #endif /* __KERNEL__ */ /* The following routine need not be atomic. */ @@ -369,8 +384,8 @@ #define ext2_find_next_zero_bit find_next_zero_le_bit /* Bitmap functions for the minix filesystem. */ -#define minix_set_bit(nr,addr) set_bit(nr,addr) -#define minix_clear_bit(nr,addr) clear_bit(nr,addr) +#define minix_set_bit(nr,addr) test_and_set_bit(nr,addr) +#define minix_clear_bit(nr,addr) test_and_clear_bit(nr,addr) #define minix_test_bit(nr,addr) test_bit(nr,addr) #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/cache.h linux/include/asm-sparc/cache.h --- v2.1.37/linux/include/asm-sparc/cache.h Mon Dec 30 01:59:59 1996 +++ linux/include/asm-sparc/cache.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: cache.h,v 1.6 1996/12/28 19:55:12 davem Exp $ +/* $Id: cache.h,v 1.7 1997/05/06 09:31:46 davem Exp $ * cache.h: Cache specific code for the Sparc. These include flushing * and direct tag/data line access. * @@ -11,6 +11,7 @@ #include #define L1_CACHE_BYTES 32 +#define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))) /* Direct access to the instruction cache is provided through and * alternate address space. The IDC bit must be off in the ICCR on diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/current.h linux/include/asm-sparc/current.h --- v2.1.37/linux/include/asm-sparc/current.h Mon Dec 30 01:59:59 1996 +++ linux/include/asm-sparc/current.h Wed May 14 15:01:21 1997 @@ -1,12 +1,6 @@ #ifndef _SPARC_CURRENT_H #define _SPARC_CURRENT_H -/* Some architectures may want to do something "clever" here since - * this is the most frequently accessed piece of data in the entire - * kernel. - */ -extern struct task_struct *current_set[NR_CPUS]; - /* Sparc rules... */ register struct task_struct *current asm("g6"); diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/hardirq.h linux/include/asm-sparc/hardirq.h --- v2.1.37/linux/include/asm-sparc/hardirq.h Wed Apr 23 19:01:28 1997 +++ linux/include/asm-sparc/hardirq.h Wed May 14 15:01:21 1997 @@ -51,7 +51,7 @@ __save_flags(flags); \ __cli(); \ if(atomic_add_return(1, &global_irq_count) != 1 || \ - global_irq_lock) { \ + *(((unsigned char *)(&global_irq_lock)))) { \ atomic_dec(&global_irq_count); \ __restore_flags(flags); \ ret = 0; \ diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/irq.h linux/include/asm-sparc/irq.h --- v2.1.37/linux/include/asm-sparc/irq.h Wed Apr 23 19:01:28 1997 +++ linux/include/asm-sparc/irq.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: irq.h,v 1.17 1997/04/18 05:44:52 davem Exp $ +/* $Id: irq.h,v 1.19 1997/05/08 20:57:39 davem Exp $ * irq.h: IRQ registers on the Sparc. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -13,8 +13,17 @@ #define NR_IRQS 15 +/* Get rid of this when lockups have gone away. -DaveM */ +#ifndef DEBUG_IRQLOCK +#define DEBUG_IRQLOCK +#endif + /* IRQ handler dispatch entry and exit. */ #ifdef __SMP__ +#ifdef DEBUG_IRQLOCK +extern void irq_enter(int cpu, int irq, void *regs); +extern void irq_exit(int cpu, int irq); +#else extern __inline__ void irq_enter(int cpu, int irq) { register int proc asm("g1"); @@ -40,6 +49,7 @@ : "0" (proc) : "g1", "g2", "g3", "g4", "g5", "memory", "cc"); } +#endif /* DEBUG_IRQLOCK */ #else #define irq_enter(cpu, irq) (local_irq_count[cpu]++) #define irq_exit(cpu, irq) (local_irq_count[cpu]--) diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/spinlock.h linux/include/asm-sparc/spinlock.h --- v2.1.37/linux/include/asm-sparc/spinlock.h Tue May 13 22:41:18 1997 +++ linux/include/asm-sparc/spinlock.h Wed May 14 15:01:21 1997 @@ -55,6 +55,82 @@ #include +/* Define this to use the verbose/debugging versions in arch/sparc/lib/debuglocks.c */ +#define SPIN_LOCK_DEBUG + +#ifdef SPIN_LOCK_DEBUG +struct _spinlock_debug { + unsigned char lock; + unsigned long owner_pc; +}; +typedef struct _spinlock_debug spinlock_t; + +#define SPIN_LOCK_UNLOCKED { 0, 0 } +#define spin_lock_init(lp) do { (lp)->owner_pc = 0; (lp)->lock = 0; } while(0) +#define spin_unlock_wait(lp) do { barrier(); } while((lp)->lock) + +extern void _spin_lock(spinlock_t *lock); +extern int _spin_trylock(spinlock_t *lock); +extern void _spin_unlock(spinlock_t *lock); +extern void _spin_lock_irq(spinlock_t *lock); +extern void _spin_unlock_irq(spinlock_t *lock); +extern void _spin_lock_irqsave(spinlock_t *lock); +extern void _spin_unlock_irqrestore(spinlock_t *lock); + +#define spin_lock(lp) _spin_lock(lp) +#define spin_trylock(lp) _spin_trylock(lp) +#define spin_unlock(lp) _spin_unlock(lp) +#define spin_lock_irq(lp) _spin_lock_irq(lp) +#define spin_unlock_irq(lp) _spin_unlock_irq(lp) +#define spin_lock_irqsave(lp, flags) do { __save_and_cli(flags); \ + _spin_lock_irqsave(lp); } while (0) +#define spin_unlock_irqrestore(lp, flags) do { _spin_unlock_irqrestore(lp); \ + __restore_flags(flags); } while(0) + +struct _rwlock_debug { + volatile unsigned int lock; + unsigned long owner_pc; +}; +typedef struct _rwlock_debug rwlock_t; + +#define RW_LOCK_UNLOCKED { 0, 0 } + +extern void _read_lock(rwlock_t *rw); +extern void _read_unlock(rwlock_t *rw); +extern void _write_lock(rwlock_t *rw); +extern void _write_unlock(rwlock_t *rw); +extern void _read_lock_irq(rwlock_t *rw); +extern void _read_unlock_irq(rwlock_t *rw); +extern void _write_lock_irq(rwlock_t *rw); +extern void _write_unlock_irq(rwlock_t *rw); +extern void _read_lock_irqsave(rwlock_t *rw); +extern void _read_unlock_irqrestore(rwlock_t *rw); +extern void _write_lock_irqsave(rwlock_t *rw); +extern void _write_unlock_irqrestore(rwlock_t *rw); + +#define read_lock(rw) _read_lock(rw) +#define read_unlock(rw) _read_unlock(rw) +#define write_lock(rw) _write_lock(rw) +#define write_unlock(rw) _write_unlock(rw) +#define read_lock_irq(rw) _read_lock_irq(rw) +#define read_unlock_irq(rw) _read_unlock_irq(rw) +#define write_lock_irq(rw) _write_lock_irq(rw) +#define write_unlock_irq(rw) _write_unlock_irq(rw) + +#define read_lock_irqsave(rw, flags) \ +do { __save_and_cli(flags); _read_lock_irqsave(rw); } while (0) + +#define read_unlock_irqrestore(rw, flags) do { _read_unlock_irqrestore(rw); \ + __restore_flags(flags); } while(0) + +#define write_lock_irqsave(rw, flags) \ +do { __save_and_cli(flags); _write_lock_irqsave(rw); } while(0) + +#define write_unlock_irqrestore(rw, flags) do { _write_unlock_irqrestore(rw); \ + __restore_flags(flags); } while(0) + +#else /* !SPIN_LOCK_DEBUG */ + typedef unsigned char spinlock_t; #define SPIN_LOCK_UNLOCKED 0 @@ -265,6 +341,8 @@ do { __save_and_cli(flags); write_lock(lock); } while (0) #define write_unlock_irqrestore(lock, flags) \ do { write_unlock(lock); __restore_flags(flags); } while (0) + +#endif /* SPIN_LOCK_DEBUG */ #endif /* __SMP__ */ diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/string.h linux/include/asm-sparc/string.h --- v2.1.37/linux/include/asm-sparc/string.h Mon Mar 17 14:54:31 1997 +++ linux/include/asm-sparc/string.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: string.h,v 1.30 1997/03/03 17:11:12 jj Exp $ +/* $Id: string.h,v 1.31 1997/05/03 02:02:12 davem Exp $ * string.h: External definitions for optimized assembly string * routines for the Linux Kernel. * @@ -34,7 +34,7 @@ extern inline void *__constant_memcpy(void *to, const void *from, __kernel_size_t n) { - extern void __copy_1page(void *, const void *); + extern void (*__copy_1page)(void *, const void *); if(n <= 32) { __builtin_memcpy(to, from, n); @@ -67,7 +67,7 @@ extern inline void *__constant_c_and_count_memset(void *s, char c, __kernel_size_t count) { - extern void *bzero_1page(void *); + extern void (*bzero_1page)(void *); extern __kernel_size_t __bzero(void *, __kernel_size_t); if(!c) { diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc/system.h linux/include/asm-sparc/system.h --- v2.1.37/linux/include/asm-sparc/system.h Tue May 13 22:41:18 1997 +++ linux/include/asm-sparc/system.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: system.h,v 1.60 1997/05/01 02:26:56 davem Exp $ */ +/* $Id: system.h,v 1.65 1997/05/14 20:47:59 davem Exp $ */ #ifndef __SPARC_SYSTEM_H #define __SPARC_SYSTEM_H @@ -75,6 +75,7 @@ #define switch_to(prev, next) do { \ __label__ here; \ register unsigned long task_pc asm("o7"); \ + extern struct task_struct *current_set[NR_CPUS]; \ SWITCH_ENTER \ SWITCH_DO_LAZY_FPU \ __asm__ __volatile__( \ @@ -86,6 +87,7 @@ if(!(next->tss.flags & SPARC_FLAG_KTHREAD) && \ !(next->flags & PF_EXITING)) \ switch_to_context(next); \ + next->mm->cpu_vm_mask |= (1 << smp_processor_id()); \ task_pc = ((unsigned long) &&here) - 0x8; \ __asm__ __volatile__( \ "rd %%psr, %%g4\n\t" \ @@ -136,6 +138,7 @@ __asm__ __volatile__(" rd %%psr, %0 + nop; nop; nop; or %0, %1, %0 wr %0, 0x0, %%psr nop; nop; nop @@ -149,7 +152,8 @@ unsigned long tmp; __asm__ __volatile__(" - rd %%psr, %0 + rd %%psr, %0 + nop; nop; nop; andn %0, %1, %0 wr %0, 0x0, %%psr nop; nop; nop @@ -168,23 +172,22 @@ extern __inline__ unsigned long swap_pil(unsigned long __new_psr) { - unsigned long retval, tmp1, tmp2; + unsigned long retval; __asm__ __volatile__(" rd %%psr, %0 - and %0, %4, %1 - and %3, %4, %2 - xorcc %1, %2, %%g0 + nop; nop; nop; + and %0, %2, %%g1 + and %1, %2, %%g2 + xorcc %%g1, %%g2, %%g0 be 1f nop - wr %0, %4, %%psr - nop - nop - nop + wr %0, %2, %%psr + nop; nop; nop; 1: -" : "=r" (retval), "=r" (tmp1), "=r" (tmp2) +" : "=r" (retval) : "r" (__new_psr), "i" (PSR_PIL) - : "memory", "cc"); + : "g1", "g2", "memory", "cc"); return retval; } @@ -195,6 +198,7 @@ __asm__ __volatile__(" rd %%psr, %0 + nop; nop; nop; or %0, %1, %%g1 wr %%g1, 0x0, %%psr nop; nop; nop @@ -211,6 +215,28 @@ #ifdef __SMP__ +/* This goes away after lockups have been found... */ +#ifndef DEBUG_IRQLOCK +#define DEBUG_IRQLOCK +#endif + +extern unsigned char global_irq_holder; + +#define save_flags(x) \ +do { ((x) = ((global_irq_holder == (unsigned char) smp_processor_id()) ? 1 : \ + ((getipl() & PSR_PIL) ? 2 : 0))); } while(0) + +#define save_and_cli(flags) do { save_flags(flags); cli(); } while(0) + +#ifdef DEBUG_IRQLOCK +extern void __global_cli(void); +extern void __global_sti(void); +extern void __global_restore_flags(unsigned long flags); +#define cli() __global_cli() +#define sti() __global_sti() +#define restore_flags(flags) __global_restore_flags(flags) +#else + /* Visit arch/sparc/lib/irqlock.S for all the fun details... */ #define cli() __asm__ __volatile__("mov %%o7, %%g4\n\t" \ "call ___global_cli\n\t" \ @@ -230,12 +256,6 @@ "memory", "cc"); \ } while(0) -extern unsigned char global_irq_holder; - -#define save_flags(x) \ -do { ((x) = ((global_irq_holder == (unsigned char) smp_processor_id()) ? 1 : \ - ((getipl() & PSR_PIL) ? 2 : 0))); } while(0) - #define restore_flags(flags) \ do { register unsigned long bits asm("g7"); \ bits = flags; \ @@ -248,7 +268,7 @@ "memory", "cc"); \ } while(0) -#define save_and_cli(flags) do { save_flags(flags); cli(); } while(0) +#endif /* DEBUG_IRQLOCK */ #else @@ -267,6 +287,12 @@ extern __inline__ unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val) { +#ifdef __SMP__ + __asm__ __volatile__("swap [%2], %0" + : "=&r" (val) + : "0" (val), "r" (m)); + return val; +#else register unsigned long *ptr asm("g1"); register unsigned long ret asm("g2"); @@ -282,6 +308,7 @@ : "g3", "g4", "g7", "memory", "cc"); return ret; +#endif } #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/a.out.h linux/include/asm-sparc64/a.out.h --- v2.1.37/linux/include/asm-sparc64/a.out.h Mon Apr 14 16:28:21 1997 +++ linux/include/asm-sparc64/a.out.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: a.out.h,v 1.3 1997/04/07 18:57:14 jj Exp $ */ +/* $Id: a.out.h,v 1.4 1997/05/04 07:21:19 davem Exp $ */ #ifndef __SPARC64_A_OUT_H__ #define __SPARC64_A_OUT_H__ @@ -95,7 +95,7 @@ #ifdef __KERNEL__ -#define STACK_TOP TASK_SIZE +#define STACK_TOP (current->tss.flags & SPARC_FLAG_32BIT ? 0xf0000000 : TASK_SIZE) #endif diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/asm_offsets.h linux/include/asm-sparc64/asm_offsets.h --- v2.1.37/linux/include/asm-sparc64/asm_offsets.h Tue May 13 22:41:18 1997 +++ linux/include/asm-sparc64/asm_offsets.h Wed May 14 15:01:21 1997 @@ -30,147 +30,145 @@ #define ASIZ_task_next_run 0x00000008 #define AOFF_task_prev_run 0x000000a0 #define ASIZ_task_prev_run 0x00000008 -#define AOFF_task_saved_kernel_stack 0x000000a8 -#define ASIZ_task_saved_kernel_stack 0x00000008 -#define AOFF_task_kernel_stack_page 0x000000b0 -#define ASIZ_task_kernel_stack_page 0x00000008 -#define AOFF_task_exit_code 0x000000b8 +#define AOFF_task_exit_code 0x000000a8 #define ASIZ_task_exit_code 0x00000004 -#define AOFF_task_exit_signal 0x000000bc +#define AOFF_task_exit_signal 0x000000ac #define ASIZ_task_exit_signal 0x00000004 -#define AOFF_task_personality 0x000000c0 +#define AOFF_task_personality 0x000000b0 #define ASIZ_task_personality 0x00000008 -#define AOFF_task_pid 0x000000cc +#define AOFF_task_pid 0x000000bc #define ASIZ_task_pid 0x00000004 -#define AOFF_task_pgrp 0x000000d0 +#define AOFF_task_pgrp 0x000000c0 #define ASIZ_task_pgrp 0x00000004 -#define AOFF_task_tty_old_pgrp 0x000000d4 +#define AOFF_task_tty_old_pgrp 0x000000c4 #define ASIZ_task_tty_old_pgrp 0x00000004 -#define AOFF_task_session 0x000000d8 +#define AOFF_task_session 0x000000c8 #define ASIZ_task_session 0x00000004 -#define AOFF_task_leader 0x000000dc +#define AOFF_task_leader 0x000000cc #define ASIZ_task_leader 0x00000004 -#define AOFF_task_ngroups 0x000000e0 +#define AOFF_task_ngroups 0x000000d0 #define ASIZ_task_ngroups 0x00000004 -#define AOFF_task_groups 0x000000e4 +#define AOFF_task_groups 0x000000d4 #define ASIZ_task_groups 0x00000080 -#define AOFF_task_p_opptr 0x00000168 +#define AOFF_task_p_opptr 0x00000158 #define ASIZ_task_p_opptr 0x00000008 -#define AOFF_task_p_pptr 0x00000170 +#define AOFF_task_p_pptr 0x00000160 #define ASIZ_task_p_pptr 0x00000008 -#define AOFF_task_p_cptr 0x00000178 +#define AOFF_task_p_cptr 0x00000168 #define ASIZ_task_p_cptr 0x00000008 -#define AOFF_task_p_ysptr 0x00000180 +#define AOFF_task_p_ysptr 0x00000170 #define ASIZ_task_p_ysptr 0x00000008 -#define AOFF_task_p_osptr 0x00000188 +#define AOFF_task_p_osptr 0x00000178 #define ASIZ_task_p_osptr 0x00000008 -#define AOFF_task_pidhash_next 0x00000190 +#define AOFF_task_pidhash_next 0x00000180 #define ASIZ_task_pidhash_next 0x00000008 -#define AOFF_task_pidhash_pprev 0x00000198 +#define AOFF_task_pidhash_pprev 0x00000188 #define ASIZ_task_pidhash_pprev 0x00000008 -#define AOFF_task_tarray_ptr 0x000001a0 +#define AOFF_task_tarray_ptr 0x00000190 #define ASIZ_task_tarray_ptr 0x00000008 -#define AOFF_task_wait_chldexit 0x000001a8 +#define AOFF_task_wait_chldexit 0x00000198 #define ASIZ_task_wait_chldexit 0x00000008 -#define AOFF_task_uid 0x000001b0 +#define AOFF_task_uid 0x000001a0 #define ASIZ_task_uid 0x00000002 -#define AOFF_task_euid 0x000001b2 +#define AOFF_task_euid 0x000001a2 #define ASIZ_task_euid 0x00000002 -#define AOFF_task_suid 0x000001b4 +#define AOFF_task_suid 0x000001a4 #define ASIZ_task_suid 0x00000002 -#define AOFF_task_fsuid 0x000001b6 +#define AOFF_task_fsuid 0x000001a6 #define ASIZ_task_fsuid 0x00000002 -#define AOFF_task_gid 0x000001b8 +#define AOFF_task_gid 0x000001a8 #define ASIZ_task_gid 0x00000002 -#define AOFF_task_egid 0x000001ba +#define AOFF_task_egid 0x000001aa #define ASIZ_task_egid 0x00000002 -#define AOFF_task_sgid 0x000001bc +#define AOFF_task_sgid 0x000001ac #define ASIZ_task_sgid 0x00000002 -#define AOFF_task_fsgid 0x000001be +#define AOFF_task_fsgid 0x000001ae #define ASIZ_task_fsgid 0x00000002 -#define AOFF_task_timeout 0x000001c0 +#define AOFF_task_timeout 0x000001b0 #define ASIZ_task_timeout 0x00000008 -#define AOFF_task_policy 0x000001c8 +#define AOFF_task_policy 0x000001b8 #define ASIZ_task_policy 0x00000008 -#define AOFF_task_rt_priority 0x000001d0 +#define AOFF_task_rt_priority 0x000001c0 #define ASIZ_task_rt_priority 0x00000008 -#define AOFF_task_it_real_value 0x000001d8 +#define AOFF_task_it_real_value 0x000001c8 #define ASIZ_task_it_real_value 0x00000008 -#define AOFF_task_it_prof_value 0x000001e0 +#define AOFF_task_it_prof_value 0x000001d0 #define ASIZ_task_it_prof_value 0x00000008 -#define AOFF_task_it_virt_value 0x000001e8 +#define AOFF_task_it_virt_value 0x000001d8 #define ASIZ_task_it_virt_value 0x00000008 -#define AOFF_task_it_real_incr 0x000001f0 +#define AOFF_task_it_real_incr 0x000001e0 #define ASIZ_task_it_real_incr 0x00000008 -#define AOFF_task_it_prof_incr 0x000001f8 +#define AOFF_task_it_prof_incr 0x000001e8 #define ASIZ_task_it_prof_incr 0x00000008 -#define AOFF_task_it_virt_incr 0x00000200 +#define AOFF_task_it_virt_incr 0x000001f0 #define ASIZ_task_it_virt_incr 0x00000008 -#define AOFF_task_real_timer 0x00000208 +#define AOFF_task_real_timer 0x000001f8 #define ASIZ_task_real_timer 0x00000028 -#define AOFF_task_utime 0x00000230 +#define AOFF_task_utime 0x00000220 #define ASIZ_task_utime 0x00000008 -#define AOFF_task_stime 0x00000238 +#define AOFF_task_stime 0x00000228 #define ASIZ_task_stime 0x00000008 -#define AOFF_task_cutime 0x00000240 +#define AOFF_task_cutime 0x00000230 #define ASIZ_task_cutime 0x00000008 -#define AOFF_task_cstime 0x00000248 +#define AOFF_task_cstime 0x00000238 #define ASIZ_task_cstime 0x00000008 -#define AOFF_task_start_time 0x00000250 +#define AOFF_task_start_time 0x00000240 #define ASIZ_task_start_time 0x00000008 -#define AOFF_task_min_flt 0x00000258 +#define AOFF_task_min_flt 0x00000248 #define ASIZ_task_min_flt 0x00000008 -#define AOFF_task_maj_flt 0x00000260 +#define AOFF_task_maj_flt 0x00000250 #define ASIZ_task_maj_flt 0x00000008 -#define AOFF_task_nswap 0x00000268 +#define AOFF_task_nswap 0x00000258 #define ASIZ_task_nswap 0x00000008 -#define AOFF_task_cmin_flt 0x00000270 +#define AOFF_task_cmin_flt 0x00000260 #define ASIZ_task_cmin_flt 0x00000008 -#define AOFF_task_cmaj_flt 0x00000278 +#define AOFF_task_cmaj_flt 0x00000268 #define ASIZ_task_cmaj_flt 0x00000008 -#define AOFF_task_cnswap 0x00000280 +#define AOFF_task_cnswap 0x00000270 #define ASIZ_task_cnswap 0x00000008 -#define AOFF_task_swap_address 0x00000290 +#define AOFF_task_swap_address 0x00000280 #define ASIZ_task_swap_address 0x00000008 -#define AOFF_task_old_maj_flt 0x00000298 +#define AOFF_task_old_maj_flt 0x00000288 #define ASIZ_task_old_maj_flt 0x00000008 -#define AOFF_task_dec_flt 0x000002a0 +#define AOFF_task_dec_flt 0x00000290 #define ASIZ_task_dec_flt 0x00000008 -#define AOFF_task_swap_cnt 0x000002a8 +#define AOFF_task_swap_cnt 0x00000298 #define ASIZ_task_swap_cnt 0x00000008 -#define AOFF_task_rlim 0x000002b0 +#define AOFF_task_rlim 0x000002a0 #define ASIZ_task_rlim 0x000000a0 -#define AOFF_task_used_math 0x00000350 +#define AOFF_task_used_math 0x00000340 #define ASIZ_task_used_math 0x00000002 -#define AOFF_task_comm 0x00000352 +#define AOFF_task_comm 0x00000342 #define ASIZ_task_comm 0x00000010 -#define AOFF_task_link_count 0x00000364 +#define AOFF_task_link_count 0x00000354 #define ASIZ_task_link_count 0x00000004 -#define AOFF_task_tty 0x00000368 +#define AOFF_task_tty 0x00000358 #define ASIZ_task_tty 0x00000008 -#define AOFF_task_semundo 0x00000370 +#define AOFF_task_semundo 0x00000360 #define ASIZ_task_semundo 0x00000008 -#define AOFF_task_semsleeping 0x00000378 +#define AOFF_task_semsleeping 0x00000368 #define ASIZ_task_semsleeping 0x00000008 -#define AOFF_task_ldt 0x00000380 +#define AOFF_task_ldt 0x00000370 #define ASIZ_task_ldt 0x00000008 -#define AOFF_task_tss 0x000003c0 +#define AOFF_task_tss 0x00000380 #define ASIZ_task_tss 0x00000600 -#define AOFF_task_fs 0x000009c0 +#define AOFF_task_fs 0x00000980 #define ASIZ_task_fs 0x00000008 -#define AOFF_task_files 0x000009c8 +#define AOFF_task_files 0x00000988 #define ASIZ_task_files 0x00000008 -#define AOFF_task_mm 0x000009d0 +#define AOFF_task_mm 0x00000990 #define ASIZ_task_mm 0x00000008 -#define AOFF_task_sig 0x000009d8 +#define AOFF_task_sig 0x00000998 #define ASIZ_task_sig 0x00000008 -#define AOFF_task_processor 0x000009e0 +#define AOFF_task_has_cpu 0x000009a0 +#define ASIZ_task_has_cpu 0x00000004 +#define AOFF_task_processor 0x000009a4 #define ASIZ_task_processor 0x00000004 -#define AOFF_task_last_processor 0x000009e4 +#define AOFF_task_last_processor 0x000009a8 #define ASIZ_task_last_processor 0x00000004 -#define AOFF_task_lock_depth 0x000009e8 +#define AOFF_task_lock_depth 0x000009ac #define ASIZ_task_lock_depth 0x00000004 -#define AOFF_task_sigmask_lock 0x000009ec +#define AOFF_task_sigmask_lock 0x000009b0 #define ASIZ_task_sigmask_lock 0x00000000 #define AOFF_mm_mmap 0x00000000 #define ASIZ_mm_mmap 0x00000008 @@ -216,6 +214,8 @@ #define ASIZ_mm_locked_vm 0x00000008 #define AOFF_mm_def_flags 0x000000b0 #define ASIZ_mm_def_flags 0x00000008 +#define AOFF_mm_cpu_vm_mask 0x000000b8 +#define ASIZ_mm_cpu_vm_mask 0x00000008 #define AOFF_thread_float_regs 0x00000000 #define ASIZ_thread_float_regs 0x00000100 #define AOFF_thread_fsr 0x00000100 diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/bitops.h linux/include/asm-sparc64/bitops.h --- v2.1.37/linux/include/asm-sparc64/bitops.h Mon Apr 14 16:28:22 1997 +++ linux/include/asm-sparc64/bitops.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: bitops.h,v 1.11 1997/04/10 23:32:42 davem Exp $ +/* $Id: bitops.h,v 1.12 1997/05/14 20:48:04 davem Exp $ * bitops.h: Bit string operations on the V9. * * Copyright 1996 David S. Miller (davem@caip.rutgers.edu) @@ -19,7 +19,7 @@ * all bit-ops return 0 if bit was previously clear and != 0 otherwise. */ -extern __inline__ unsigned long set_bit(unsigned long nr, void *addr) +extern __inline__ unsigned long test_and_set_bit(unsigned long nr, void *addr) { unsigned long oldbit; unsigned long temp0, temp1; @@ -42,7 +42,12 @@ return oldbit != 0; } -extern __inline__ unsigned long clear_bit(unsigned long nr, void *addr) +extern __inline__ void set_bit(unsigned long nr, void *addr) +{ + (void) test_and_set_bit(nr, addr); +} + +extern __inline__ unsigned long test_and_clear_bit(unsigned long nr, void *addr) { unsigned long oldbit; unsigned long temp0, temp1; @@ -65,7 +70,12 @@ return oldbit != 0; } -extern __inline__ unsigned long change_bit(unsigned long nr, void *addr) +extern __inline__ void clear_bit(unsigned long nr, void *addr) +{ + (void) test_and_clear_bit(nr, addr); +} + +extern __inline__ unsigned long test_and_change_bit(unsigned long nr, void *addr) { unsigned long oldbit; unsigned long temp0, temp1; @@ -86,6 +96,11 @@ return oldbit != 0; } +extern __inline__ void change_bit(unsigned long nr, void *addr) +{ + (void) test_and_change_bit(nr, addr); +} + extern __inline__ unsigned long test_bit(int nr, __const__ void *addr) { return 1UL & (((__const__ int *) addr)[nr >> 5] >> (nr & 31)); @@ -266,8 +281,8 @@ #define ext2_find_next_zero_bit find_next_zero_le_bit /* Bitmap functions for the minix filesystem. */ -#define minix_set_bit(nr,addr) set_bit(nr,addr) -#define minix_clear_bit(nr,addr) clear_bit(nr,addr) +#define minix_set_bit(nr,addr) test_and_set_bit(nr,addr) +#define minix_clear_bit(nr,addr) test_and_clear_bit(nr,addr) #define minix_test_bit(nr,addr) test_bit(nr,addr) #define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/checksum.h linux/include/asm-sparc64/checksum.h --- v2.1.37/linux/include/asm-sparc64/checksum.h Mon Apr 14 16:28:22 1997 +++ linux/include/asm-sparc64/checksum.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: checksum.h,v 1.6 1997/04/10 23:32:43 davem Exp $ */ +/* $Id: checksum.h,v 1.7 1997/05/14 07:02:44 davem Exp $ */ #ifndef __SPARC64_CHECKSUM_H #define __SPARC64_CHECKSUM_H @@ -54,6 +54,7 @@ __asm__ __volatile__ (" call __csum_partial_copy_sparc_generic mov %4, %%g7 + srl %%o0, 0, %%o0 " : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (sum) : "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g5", "g7"); return (unsigned int)ret; @@ -81,6 +82,7 @@ 1: call __csum_partial_copy_sparc_generic stx %5, [%%sp + 0x7ff + 128] + srl %%o0, 0, %%o0 " : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) : "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g5", "g7"); return (unsigned int)ret; @@ -108,6 +110,7 @@ 1: call __csum_partial_copy_sparc_generic stx %5, [%%sp + 0x7ff + 128] + srl %%o0, 0, %%o0 " : "=r" (ret) : "0" (ret), "r" (d), "r" (l), "r" (s), "r" (err) : "o1", "o2", "o3", "o4", "o5", "o7", "g1", "g2", "g3", "g5", "g7"); return (unsigned int)ret; @@ -151,6 +154,7 @@ srl %%g2, 16, %0 addc %0, %%g0, %0 xnor %%g0, %0, %0 + srl %0, 0, %0 " : "=r" (sum), "=&r" (iph) : "r" (ihl), "1" (iph) : "g2", "g3", "g7", "cc"); @@ -179,11 +183,11 @@ " : "=r" (sum), "=r" (saddr) : "r" (daddr), "r" ((proto<<16)+len), "0" (sum), "1" (saddr) : "cc"); - return sum; + return (sum & 0xffff); } /* Fold a partial checksum without adding pseudo headers. */ -extern __inline__ unsigned int csum_fold(unsigned int sum) +extern __inline__ unsigned short csum_fold(unsigned int sum) { unsigned int tmp; @@ -195,7 +199,7 @@ " : "=&r" (sum), "=r" (tmp) : "0" (sum), "1" (sum<<16) : "cc"); - return sum; + return (sum & 0xffff); } #define _HAVE_ARCH_IPV6_CSUM diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/current.h linux/include/asm-sparc64/current.h --- v2.1.37/linux/include/asm-sparc64/current.h Mon Dec 30 02:00:02 1996 +++ linux/include/asm-sparc64/current.h Wed May 14 15:01:21 1997 @@ -1,12 +1,6 @@ #ifndef _SPARC64_CURRENT_H #define _SPARC64_CURRENT_H -/* Some architectures may want to do something "clever" here since - * this is the most frequently accessed piece of data in the entire - * kernel. - */ -extern struct task_struct *current_set[NR_CPUS]; - /* Sparc rules... */ register struct task_struct *current asm("g6"); diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/elf.h linux/include/asm-sparc64/elf.h --- v2.1.37/linux/include/asm-sparc64/elf.h Mon Apr 14 16:28:22 1997 +++ linux/include/asm-sparc64/elf.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: elf.h,v 1.3 1997/04/04 00:50:12 davem Exp $ */ +/* $Id: elf.h,v 1.4 1997/05/04 07:21:21 davem Exp $ */ #ifndef __ASM_SPARC64_ELF_H #define __ASM_SPARC64_ELF_H @@ -16,11 +16,6 @@ typedef unsigned long elf_fpregset_t; /* - * This is used to ensure we don't load something for the wrong architecture. - */ -#define elf_check_arch(x) ((x) == EM_SPARC) - -/* * These are used to set parameters in the core dumps. */ #ifndef ELF_ARCH @@ -28,6 +23,11 @@ #define ELF_CLASS ELFCLASS64 #define ELF_DATA ELFDATA2MSB; #endif + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x) == ELF_ARCH) /* Might be EM_SPARC64 or EM_SPARC */ #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/processor.h linux/include/asm-sparc64/processor.h --- v2.1.37/linux/include/asm-sparc64/processor.h Tue May 13 22:41:18 1997 +++ linux/include/asm-sparc64/processor.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: processor.h,v 1.23 1997/04/26 22:52:34 davem Exp $ +/* $Id: processor.h,v 1.24 1997/05/04 07:21:21 davem Exp $ * include/asm-sparc64/processor.h * * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) @@ -179,16 +179,13 @@ #define release_thread(tsk) do { } while(0) #ifdef __KERNEL__ -/* Allocation and freeing of basic task resources. */ +/* Allocation and freeing of task_struct and kernel stack. */ +#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL, 1, 0)) +#define free_task_struct(tsk) free_pages((unsigned long)(tsk),1) + +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) -/* XXX FIXME For task_struct must use SLAB or something other than - * XXX kmalloc() as FPU registers in TSS require that entire structure - * XXX be 64-byte aligned as well. - */ -#define alloc_kernel_stack(tsk) __get_free_page(GFP_KERNEL) -#define free_kernel_stack(stack) free_page(stack) -#define alloc_task_struct() kmalloc(sizeof(struct task_struct), GFP_KERNEL) -#define free_task_struct(tsk) kfree(tsk) #endif /* __KERNEL__ */ #endif /* !(__ASSEMBLY__) */ diff -u --recursive --new-file v2.1.37/linux/include/asm-sparc64/system.h linux/include/asm-sparc64/system.h --- v2.1.37/linux/include/asm-sparc64/system.h Mon Apr 14 16:28:24 1997 +++ linux/include/asm-sparc64/system.h Wed May 14 15:01:21 1997 @@ -1,4 +1,4 @@ -/* $Id: system.h,v 1.15 1997/04/10 23:32:49 davem Exp $ */ +/* $Id: system.h,v 1.16 1997/05/14 20:48:07 davem Exp $ */ #ifndef __SPARC64_SYSTEM_H #define __SPARC64_SYSTEM_H @@ -132,6 +132,7 @@ do { \ __label__ switch_continue; \ register unsigned long task_pc asm("o7"); \ + extern struct task_struct *current_set[NR_CPUS]; \ SWITCH_DO_LAZY_FPU(next); \ task_pc = ((unsigned long) &&switch_continue) - 0x8; \ __asm__ __volatile__( \ diff -u --recursive --new-file v2.1.37/linux/include/linux/fs.h linux/include/linux/fs.h --- v2.1.37/linux/include/linux/fs.h Tue May 13 22:41:19 1997 +++ linux/include/linux/fs.h Thu May 15 15:52:10 1997 @@ -611,9 +611,8 @@ #define BUF_CLEAN 0 #define BUF_LOCKED 1 /* Buffers scheduled for write */ -#define BUF_LOCKED1 2 /* Supers, inodes */ -#define BUF_DIRTY 3 /* Dirty buffers, not yet scheduled for write */ -#define NR_LIST 4 +#define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ +#define NR_LIST 3 void mark_buffer_uptodate(struct buffer_head * bh, int on); diff -u --recursive --new-file v2.1.37/linux/include/linux/in6.h linux/include/linux/in6.h --- v2.1.37/linux/include/linux/in6.h Thu Mar 27 14:40:10 1997 +++ linux/include/linux/in6.h Thu May 15 14:43:52 1997 @@ -104,7 +104,7 @@ */ #define IPV6_ADDRFORM 1 -#define IPV6_RXINFO 2 +#define IPV6_PKTINFO 2 #define IPV6_RXHOPOPTS 3 #define IPV6_RXDSTOPTS 4 #define IPV6_RXSRCRT 5 @@ -115,8 +115,6 @@ /* * Alternative names */ -#define IPV6_TXINFO IPV6_RXINFO -#define SCM_SRCINFO IPV6_TXINFO #define SCM_SRCRT IPV6_RXSRCRT #define IPV6_UNICAST_HOPS 16 diff -u --recursive --new-file v2.1.37/linux/include/linux/inet.h linux/include/linux/inet.h --- v2.1.37/linux/include/linux/inet.h Tue Aug 1 00:02:46 1995 +++ linux/include/linux/inet.h Wed May 14 15:01:21 1997 @@ -45,8 +45,8 @@ #ifdef __KERNEL__ extern void inet_proto_init(struct net_proto *pro); -extern char *in_ntoa(unsigned long in); -extern unsigned long in_aton(const char *str); +extern char *in_ntoa(__u32 in); +extern __u32 in_aton(const char *str); #endif #endif /* _LINUX_INET_H */ diff -u --recursive --new-file v2.1.37/linux/include/linux/malloc.h linux/include/linux/malloc.h --- v2.1.37/linux/include/linux/malloc.h Thu Feb 6 02:53:43 1997 +++ linux/include/linux/malloc.h Thu May 15 15:52:10 1997 @@ -1,11 +1,5 @@ #ifndef _LINUX_MALLOC_H #define _LINUX_MALLOC_H -#include - -void * kmalloc(unsigned int size, int priority); -void kfree(void * obj); - -#define kfree_s(a,b) kfree(a) - +#include #endif /* _LINUX_MALLOC_H */ diff -u --recursive --new-file v2.1.37/linux/include/linux/mm.h linux/include/linux/mm.h --- v2.1.37/linux/include/linux/mm.h Tue May 13 22:41:19 1997 +++ linux/include/linux/mm.h Thu May 15 15:52:10 1997 @@ -137,6 +137,7 @@ #define PG_decr_after 5 #define PG_swap_unlock_after 6 #define PG_DMA 7 +#define PG_Slab 8 #define PG_reserved 31 /* Make it prettier to test the above... */ @@ -149,8 +150,12 @@ #define PageDecrAfter(page) (test_bit(PG_decr_after, &(page)->flags)) #define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags)) #define PageDMA(page) (test_bit(PG_DMA, &(page)->flags)) +#define PageSlab(page) (test_bit(PG_Slab, &(page)->flags)) #define PageReserved(page) (test_bit(PG_reserved, &(page)->flags)) +#define PageSetSlab(page) (set_bit(PG_Slab, &(page)->flags)) +#define PageClearSlab(page) (clear_bit(PG_Slab, &(page)->flags)) + /* * page->reserved denotes a page which must never be accessed (which * may not even be present). @@ -260,7 +265,7 @@ extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); extern void vmtruncate(struct inode * inode, unsigned long offset); -extern void handle_mm_fault(struct vm_area_struct *vma, unsigned long address, int write_access); +extern void handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access); extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem); extern void mem_init(unsigned long start_mem, unsigned long end_mem); diff -u --recursive --new-file v2.1.37/linux/include/linux/sched.h linux/include/linux/sched.h --- v2.1.37/linux/include/linux/sched.h Tue May 13 22:41:19 1997 +++ linux/include/linux/sched.h Thu May 15 15:52:10 1997 @@ -151,6 +151,7 @@ unsigned long arg_start, arg_end, env_start, env_end; unsigned long rss, total_vm, locked_vm; unsigned long def_flags; + unsigned long cpu_vm_mask; }; #define INIT_MM { \ @@ -161,7 +162,7 @@ 0, 0, 0, 0, \ 0, 0, 0, 0, \ 0, 0, 0, \ - 0 } + 0, 0 } struct signal_struct { atomic_t count; diff -u --recursive --new-file v2.1.37/linux/include/linux/skbuff.h linux/include/linux/skbuff.h --- v2.1.37/linux/include/linux/skbuff.h Tue May 13 22:41:19 1997 +++ linux/include/linux/skbuff.h Thu May 15 15:52:10 1997 @@ -413,13 +413,15 @@ restore_flags(flags); } +extern const char skb_put_errstr[]; +extern const char skb_push_errstr[]; + /* * Add data to an sk_buff */ extern __inline__ unsigned char *skb_put(struct sk_buff *skb, unsigned int len) { - extern char *skb_put_errstr; unsigned char *tmp=skb->tail; skb->tail+=len; skb->len+=len; @@ -434,7 +436,6 @@ extern __inline__ unsigned char *skb_push(struct sk_buff *skb, unsigned int len) { - extern char *skb_push_errstr; skb->data-=len; skb->len+=len; if(skb->datahead) diff -u --recursive --new-file v2.1.37/linux/include/linux/slab.h linux/include/linux/slab.h --- v2.1.37/linux/include/linux/slab.h Tue May 13 22:41:19 1997 +++ linux/include/linux/slab.h Thu May 15 15:52:10 1997 @@ -12,6 +12,7 @@ typedef struct kmem_cache_s kmem_cache_t; #include +#include /* flags for kmem_cache_alloc() */ #define SLAB_BUFFER GFP_BUFFER /* 0x00 */ @@ -22,39 +23,48 @@ #define SLAB_NFS GFP_NFS /* 0x05 */ #define SLAB_DMA GFP_DMA /* 0x08 */ #define SLAB_LEVEL_MASK GFP_LEVEL_MASK /* 0x0f */ -#define SLAB_NO_GROW 0x00001000UL /* don't add another slab during an alloc */ +#define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ /* flags to pass to kmem_cache_create(). - * The first 3 are only valid when the allocator has been build + * The first 3 are only valid when the allocator as been build * SLAB_DEBUG_SUPPORT. */ -#define SLAB_DEBUG_FREE 0x00000100UL /* Peform time consuming ptr checks on free */ -#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor, on release, to conform state */ +#define SLAB_DEBUG_FREE 0x00000100UL /* Peform (expensive) checks on free */ +#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ #define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ -#define SLAB_HWCACHE_ALIGN 0x00000800UL /* align objs on an hw cache line */ +#define SLAB_POISION 0x00000800UL /* Poision objects */ +#define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ +#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ +#if 0 +#define SLAB_HIGH_PACK 0x00004000UL /* XXX */ +#endif /* flags passed to a constructor func */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */ -#define SLAB_DTOR_ATOMIC 0x002UL /* tell deconstructor it can't sleep */ #define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ /* prototypes */ extern long kmem_cache_init(long, long); extern void kmem_cache_sizes_init(void); -extern struct kmem_cache_s *kmem_cache_create(const char *, unsigned long, unsigned long, unsigned long, void (*)(void *, int, unsigned long), void (*)(void *, int, unsigned long)); -extern int kmem_cache_destroy(struct kmem_cache_s *); -extern int kmem_cache_shrink(struct kmem_cache_s *, int); -extern void *kmem_cache_alloc(struct kmem_cache_s *, unsigned long); -extern void kmem_cache_free(struct kmem_cache_s *, void *); -extern void *kmem_alloc(unsigned long, unsigned long); -extern void kmem_free(void *, unsigned long); +extern kmem_cache_t *kmem_find_general_cachep(size_t); +extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, + void (*)(void *, kmem_cache_t *, unsigned long), + void (*)(void *, kmem_cache_t *, unsigned long)); +extern int kmem_cache_shrink(kmem_cache_t *); +extern void *kmem_cache_alloc(kmem_cache_t *, int); +extern void kmem_cache_free(kmem_cache_t *, void *); + +extern void *kmalloc(size_t, int); +extern void kfree(void *); +extern void kfree_s(void *, size_t); + extern int kmem_cache_reap(int, int, int); extern int get_slabinfo(char *); -/* System wide slabs. */ -extern kmem_cache_t *vm_area_cachep; -extern kmem_cache_t *mm_cachep; +/* System wide caches */ +extern kmem_cache_t *vm_area_cachep; +extern kmem_cache_t *mm_cachep; #endif /* __KERNEL__ */ diff -u --recursive --new-file v2.1.37/linux/include/linux/socket.h linux/include/linux/socket.h --- v2.1.37/linux/include/linux/socket.h Tue May 13 22:41:19 1997 +++ linux/include/linux/socket.h Thu May 15 14:43:52 1997 @@ -3,8 +3,8 @@ #include /* arch-dependent defines */ #include /* the SIOCxxx I/O controls */ -#include /* pid_t */ #include /* iovec support */ +#include /* pid_t */ typedef unsigned short sa_family_t; @@ -50,7 +50,6 @@ __kernel_size_t cmsg_len; /* data byte count, including hdr */ int cmsg_level; /* originating protocol */ int cmsg_type; /* protocol-specific type */ - unsigned char cmsg_data[0]; }; /* @@ -58,17 +57,13 @@ * Table 5-14 of POSIX 1003.1g */ -#define CMSG_DATA(cmsg) (cmsg)->cmsg_data #define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr(mhdr, cmsg) #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) -/* Stevens's Adv. API specifies CMSG_SPACE & CMSG_LENGTH, - * I cannot understand, what the differenece? --ANK - */ - -#define CMSG_SPACE(len) CMSG_ALIGN((len)+sizeof(struct cmsghdr)) -#define CMSG_LENGTH(len) CMSG_ALIGN((len)+sizeof(struct cmsghdr)) +#define CMSG_DATA(cmsg) ((void *)(cmsg) + CMSG_ALIGN(sizeof(struct cmsghdr))) +#define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len)) +#define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len)) #define CMSG_FIRSTHDR(msg) ((msg)->msg_controllen >= sizeof(struct cmsghdr) ? \ (struct cmsghdr *)(msg)->msg_control : \ @@ -148,6 +143,7 @@ #define AF_DECNET 12 /* Reserved for DECnet project */ #define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/ #define AF_SECURITY 14 /* Security callback pseudo AF */ +#define pseudo_AF_KEY 15 /* PF_KEY key management API */ #define AF_MAX 32 /* For now.. */ /* Protocol families, same as address families. */ @@ -167,6 +163,7 @@ #define PF_DECNET AF_DECNET #define PF_NETBEUI AF_NETBEUI #define PF_SECURITY AF_SECURITY +#define PF_KEY pseudo_AF_KEY #define PF_MAX AF_MAX diff -u --recursive --new-file v2.1.37/linux/include/linux/sysctl.h linux/include/linux/sysctl.h --- v2.1.37/linux/include/linux/sysctl.h Tue May 13 22:41:19 1997 +++ linux/include/linux/sysctl.h Wed May 14 15:01:21 1997 @@ -147,6 +147,7 @@ NET_IPV4_ACCEPT_REDIRECTS, NET_IPV4_SECURE_REDIRECTS, NET_IPV4_RFC1620_REDIRECTS, + NET_TCP_SYN_RETRIES, }; diff -u --recursive --new-file v2.1.37/linux/include/net/ip.h linux/include/net/ip.h --- v2.1.37/linux/include/net/ip.h Thu Feb 6 02:59:02 1997 +++ linux/include/net/ip.h Thu May 15 15:54:29 1997 @@ -65,36 +65,6 @@ extern void ip_mc_dropdevice(struct device *dev); extern int ip_mc_procinfo(char *, char **, off_t, int, int); -/* Describe an IP fragment. */ -struct ipfrag -{ - int offset; /* offset of fragment in IP datagram */ - int end; /* last byte of data in datagram */ - int len; /* length of this fragment */ - struct sk_buff *skb; /* complete received fragment */ - unsigned char *ptr; /* pointer into real fragment data */ - struct ipfrag *next; /* linked list pointers */ - struct ipfrag *prev; -}; - -/* - * Describe an entry in the "incomplete datagrams" queue. - */ - -struct ipq -{ - unsigned char *mac; /* pointer to MAC header */ - struct iphdr *iph; /* pointer to IP header */ - int len; /* total length of original datagram */ - short ihlen; /* length of the IP header */ - short maclen; /* length of the MAC header */ - struct timer_list timer; /* when will this queue expire? */ - struct ipfrag *fragments; /* linked list of received fragments */ - struct ipq *next; /* linked list pointers */ - struct ipq *prev; - struct device *dev; /* Device - for icmp replies */ -}; - /* * Functions provided by ip.c */ diff -u --recursive --new-file v2.1.37/linux/init/main.c linux/init/main.c --- v2.1.37/linux/init/main.c Tue May 13 22:41:19 1997 +++ linux/init/main.c Wed May 14 15:01:21 1997 @@ -66,7 +66,6 @@ extern void init_IRQ(void); extern void init_modules(void); extern long console_init(long, long); -extern long kmalloc_init(long,long); extern void sock_init(void); extern void uidcache_init(void); extern unsigned long pci_init(unsigned long, unsigned long); @@ -872,7 +871,6 @@ #ifdef CONFIG_MCA memory_start = mca_init(memory_start,memory_end); #endif - memory_start = kmalloc_init(memory_start,memory_end); memory_start = kmem_cache_init(memory_start, memory_end); sti(); calibrate_delay(); @@ -885,10 +883,10 @@ } #endif mem_init(memory_start,memory_end); + kmem_cache_sizes_init(); #ifdef CONFIG_PROC_FS proc_root_init(); #endif - kmem_cache_sizes_init(); uidcache_init(); vma_init(); buffer_init(); diff -u --recursive --new-file v2.1.37/linux/kernel/fork.c linux/kernel/fork.c --- v2.1.37/linux/kernel/fork.c Tue May 13 22:41:20 1997 +++ linux/kernel/fork.c Wed May 14 15:01:21 1997 @@ -121,7 +121,7 @@ int i; uid_cachep = kmem_cache_create("uid_cache", sizeof(struct uid_taskcount), - sizeof(unsigned long) * 2, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!uid_cachep) panic("Cannot create uid taskcount SLAB cache\n"); @@ -257,6 +257,12 @@ init_new_context(mm); mm->count = 1; mm->def_flags = 0; + + /* It has not run yet, so cannot be present in anyone's + * cache or tlb. + */ + mm->cpu_vm_mask = 0; + tsk->mm = mm; tsk->min_flt = tsk->maj_flt = 0; tsk->cmin_flt = tsk->cmaj_flt = 0; diff -u --recursive --new-file v2.1.37/linux/kernel/ksyms.c linux/kernel/ksyms.c --- v2.1.37/linux/kernel/ksyms.c Tue May 13 22:41:20 1997 +++ linux/kernel/ksyms.c Wed May 14 15:01:21 1997 @@ -124,8 +124,14 @@ /* internal kernel memory management */ EXPORT_SYMBOL(__get_free_pages); EXPORT_SYMBOL(free_pages); +EXPORT_SYMBOL(kmem_find_general_cachep); +EXPORT_SYMBOL(kmem_cache_create); +EXPORT_SYMBOL(kmem_cache_shrink); +EXPORT_SYMBOL(kmem_cache_alloc); +EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmalloc); EXPORT_SYMBOL(kfree); +EXPORT_SYMBOL(kfree_s); EXPORT_SYMBOL(vmalloc); EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(mem_map); @@ -134,10 +140,6 @@ EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(update_vm_cache); -EXPORT_SYMBOL(kmem_cache_create); -EXPORT_SYMBOL(kmem_cache_destroy); -EXPORT_SYMBOL(kmem_cache_alloc); -EXPORT_SYMBOL(kmem_cache_free); /* filesystem internal functions */ EXPORT_SYMBOL(getname); diff -u --recursive --new-file v2.1.37/linux/mm/Makefile linux/mm/Makefile --- v2.1.37/linux/mm/Makefile Sun Jan 26 02:07:49 1997 +++ linux/mm/Makefile Wed May 14 15:01:21 1997 @@ -9,7 +9,7 @@ O_TARGET := mm.o O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - kmalloc.o vmalloc.o slab.o \ + vmalloc.o slab.o \ swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.37/linux/mm/kmalloc.c linux/mm/kmalloc.c --- v2.1.37/linux/mm/kmalloc.c Tue May 13 22:41:20 1997 +++ linux/mm/kmalloc.c Wed Dec 31 16:00:00 1969 @@ -1,453 +0,0 @@ -/* - * linux/mm/kmalloc.c - * - * Copyright (C) 1991, 1992 Linus Torvalds & Roger Wolff. - * - * Written by R.E. Wolff Sept/Oct '93. - * - */ - -/* - * Modified by Alex Bligh (alex@cconcepts.co.uk) 4 Apr 1994 to use multiple - * pages. So for 'page' throughout, read 'area'. - * - * Largely rewritten.. Linus - */ - -#include -#include -#include -#include - -#include -#include - -/* Define this if you want slow routines that try to trip errors */ -#undef SADISTIC_KMALLOC - -/* Private flags. */ - -#define MF_USED 0xffaa0055 -#define MF_DMA 0xff00aa55 -#define MF_FREE 0x0055ffaa - - -/* - * Much care has gone into making these routines in this file reentrant. - * - * The fancy bookkeeping of nbytesmalloced and the like are only used to - * report them to the user (oooohhhhh, aaaaahhhhh....) are not - * protected by cli(). (If that goes wrong. So what?) - * - * These routines restore the interrupt status to allow calling with ints - * off. - */ - -/* - * A block header. This is in front of every malloc-block, whether free or not. - */ -struct block_header { - unsigned long bh_flags; - union { - unsigned long ubh_length; - struct block_header *fbh_next; - } vp; -}; - - -#define bh_length vp.ubh_length -#define bh_next vp.fbh_next -#define BH(p) ((struct block_header *)(p)) - - -/* - * The page descriptor is at the front of every page that malloc has in use. - */ -struct page_descriptor { - struct page_descriptor *next; - struct block_header *firstfree; - int order; - int nfree; -}; - - -#define PAGE_DESC(p) ((struct page_descriptor *)(((unsigned long)(p)) & PAGE_MASK)) - - -/* - * A size descriptor describes a specific class of malloc sizes. - * Each class of sizes has its own freelist. - */ -struct size_descriptor { - struct page_descriptor *firstfree; - struct page_descriptor *dmafree; /* DMA-able memory */ - int nblocks; - - int nmallocs; - int nfrees; - int nbytesmalloced; - int npages; - unsigned long gfporder; /* number of pages in the area required */ -}; - -/* - * For now it is unsafe to allocate bucket sizes between n and - * n-sizeof(page_descriptor) where n is PAGE_SIZE * any power of two - * - * The blocksize and sizes arrays _must_ match! - */ -#if PAGE_SIZE == 4096 -static const unsigned int blocksize[] = { - 32, - 64, - 128, - 252, - 508, - 1020, - 2040, - 4096 - 16, - 8192 - 16, - 16384 - 16, - 32768 - 16, - 65536 - 16, - 131072 - 16, - 0 -}; - -static struct size_descriptor sizes[] = -{ - {NULL, NULL, 127, 0, 0, 0, 0, 0}, - {NULL, NULL, 63, 0, 0, 0, 0, 0}, - {NULL, NULL, 31, 0, 0, 0, 0, 0}, - {NULL, NULL, 16, 0, 0, 0, 0, 0}, - {NULL, NULL, 8, 0, 0, 0, 0, 0}, - {NULL, NULL, 4, 0, 0, 0, 0, 0}, - {NULL, NULL, 2, 0, 0, 0, 0, 0}, - {NULL, NULL, 1, 0, 0, 0, 0, 0}, - {NULL, NULL, 1, 0, 0, 0, 0, 1}, - {NULL, NULL, 1, 0, 0, 0, 0, 2}, - {NULL, NULL, 1, 0, 0, 0, 0, 3}, - {NULL, NULL, 1, 0, 0, 0, 0, 4}, - {NULL, NULL, 1, 0, 0, 0, 0, 5}, - {NULL, NULL, 0, 0, 0, 0, 0, 0} -}; -#elif PAGE_SIZE == 8192 -static const unsigned int blocksize[] = { - 64, - 128, - 248, - 504, - 1016, - 2040, - 4080, - 8192 - 32, - 16384 - 32, - 32768 - 32, - 65536 - 32, - 131072 - 32, - 262144 - 32, - 0 -}; - -struct size_descriptor sizes[] = -{ - {NULL, NULL, 127, 0, 0, 0, 0, 0}, - {NULL, NULL, 63, 0, 0, 0, 0, 0}, - {NULL, NULL, 31, 0, 0, 0, 0, 0}, - {NULL, NULL, 16, 0, 0, 0, 0, 0}, - {NULL, NULL, 8, 0, 0, 0, 0, 0}, - {NULL, NULL, 4, 0, 0, 0, 0, 0}, - {NULL, NULL, 2, 0, 0, 0, 0, 0}, - {NULL, NULL, 1, 0, 0, 0, 0, 0}, - {NULL, NULL, 1, 0, 0, 0, 0, 1}, - {NULL, NULL, 1, 0, 0, 0, 0, 2}, - {NULL, NULL, 1, 0, 0, 0, 0, 3}, - {NULL, NULL, 1, 0, 0, 0, 0, 4}, - {NULL, NULL, 1, 0, 0, 0, 0, 5}, - {NULL, NULL, 0, 0, 0, 0, 0, 0} -}; -#else -#error you need to make a version for your pagesize -#endif - -#define NBLOCKS(order) (sizes[order].nblocks) -#define BLOCKSIZE(order) (blocksize[order]) -#define AREASIZE(order) (PAGE_SIZE<<(sizes[order].gfporder)) - -/* - * Create a small cache of page allocations: this helps a bit with - * those pesky 8kB+ allocations for NFS when we're temporarily - * out of memory.. - * - * This is a _truly_ small cache, we just cache one single page - * order (for orders 0, 1 and 2, that is 4, 8 and 16kB on x86). - */ -#define MAX_CACHE_ORDER 3 -struct page_descriptor * kmalloc_cache[MAX_CACHE_ORDER]; - -static inline struct page_descriptor * get_kmalloc_pages(unsigned long priority, - unsigned long order, int dma) -{ - return (struct page_descriptor *) __get_free_pages(priority, order, dma); -} - -static inline void free_kmalloc_pages(struct page_descriptor * page, - unsigned long order, int dma) -{ - if (!dma && order < MAX_CACHE_ORDER) { - page = xchg(kmalloc_cache+order, page); - if (!page) - return; - } - free_pages((unsigned long) page, order); -} - -__initfunc(long kmalloc_init(long start_mem, long end_mem)) -{ - int order; - -/* - * Check the static info array. Things will blow up terribly if it's - * incorrect. This is a late "compile time" check..... - */ - for (order = 0; BLOCKSIZE(order); order++) { - if ((NBLOCKS(order) * BLOCKSIZE(order) + sizeof(struct page_descriptor)) > - AREASIZE(order)) { - printk("Cannot use %d bytes out of %d in order = %d block mallocs\n", - (int) (NBLOCKS(order) * BLOCKSIZE(order) + - sizeof(struct page_descriptor)), - (int) AREASIZE(order), - BLOCKSIZE(order)); - panic("This only happens if someone messes with kmalloc"); - } - } - return start_mem; -} - -static spinlock_t kmalloc_lock; - -/* - * Ugh, this is ugly, but we want the default case to run - * straight through, which is why we have the ugly goto's - */ -void *kmalloc(size_t size, int priority) -{ - unsigned long flags; - unsigned long type; - int order, dma; - struct block_header *p; - struct page_descriptor *page, **pg; - struct size_descriptor *bucket = sizes; - - /* Get order */ - order = 0; - { - unsigned int realsize = size + sizeof(struct block_header); - for (;;) { - int ordersize = BLOCKSIZE(order); - if (realsize <= ordersize) - break; - order++; - bucket++; - if (ordersize) - continue; - printk("kmalloc of too large a block (%d bytes).\n", (int) size); - return NULL; - } - } - - dma = 0; - type = MF_USED; - pg = &bucket->firstfree; - if (priority & GFP_DMA) { - dma = 1; - type = MF_DMA; - pg = &bucket->dmafree; - } - - priority &= GFP_LEVEL_MASK; - -/* Sanity check... */ - - if (in_interrupt() && priority != GFP_ATOMIC) { - static int count = 0; - if (++count < 5) { - printk("kmalloc called nonatomically from interrupt %p\n", - __builtin_return_address(0)); - priority = GFP_ATOMIC; - } - } - - spin_lock_irqsave(&kmalloc_lock, flags); - page = *pg; - if (!page) - goto no_bucket_page; - - p = page->firstfree; - if (p->bh_flags != MF_FREE) - goto not_free_on_freelist; - -found_it: - page->firstfree = p->bh_next; - page->nfree--; - if (!page->nfree) - *pg = page->next; - spin_unlock_irqrestore(&kmalloc_lock, flags); - bucket->nmallocs++; - bucket->nbytesmalloced += size; - p->bh_flags = type; /* As of now this block is officially in use */ - p->bh_length = size; -#ifdef SADISTIC_KMALLOC - memset(p+1, 0xf0, size); -#endif - return p + 1; /* Pointer arithmetic: increments past header */ - - -no_bucket_page: - /* - * If we didn't find a page already allocated for this - * bucket size, we need to get one.. - * - * This can be done without locks: it is private to this invocation - */ - spin_unlock_irqrestore(&kmalloc_lock, flags); - - { - int i, sz; - - /* sz is the size of the blocks we're dealing with */ - sz = BLOCKSIZE(order); - - page = get_kmalloc_pages(priority, bucket->gfporder, dma); - if (!page) - goto no_free_page; -found_cached_page: - - bucket->npages++; - - page->order = order; - /* Loop for all but last block: */ - i = (page->nfree = bucket->nblocks) - 1; - p = BH(page + 1); - while (i > 0) { - i--; - p->bh_flags = MF_FREE; - p->bh_next = BH(((long) p) + sz); - p = p->bh_next; - } - /* Last block: */ - p->bh_flags = MF_FREE; - p->bh_next = NULL; - - p = BH(page+1); - } - - /* - * Now we're going to muck with the "global" freelist - * for this size: this should be uninterruptible - */ - spin_lock_irq(&kmalloc_lock); - page->next = *pg; - *pg = page; - goto found_it; - - -no_free_page: - /* - * No free pages, check the kmalloc cache of - * pages to see if maybe we have something available - */ - if (!dma && order < MAX_CACHE_ORDER) { - page = xchg(kmalloc_cache+order, page); - if (page) - goto found_cached_page; - } - { - static unsigned long last = 0; - if (priority != GFP_BUFFER && (last + 10 * HZ < jiffies)) { - last = jiffies; - printk("Couldn't get a free page.....\n"); - } - return NULL; - } - -not_free_on_freelist: - spin_unlock_irqrestore(&kmalloc_lock, flags); - printk("Problem: block on freelist at %08lx isn't free.\n", (long) p); - return NULL; -} - -void kfree(void *__ptr) -{ - int dma; - unsigned long flags; - unsigned int order; - struct page_descriptor *page, **pg; - struct size_descriptor *bucket; - - if (!__ptr) - goto null_kfree; -#define ptr ((struct block_header *) __ptr) - page = PAGE_DESC(ptr); - __ptr = ptr - 1; - if (~PAGE_MASK & (unsigned long)page->next) - goto bad_order; - order = page->order; - if (order >= sizeof(sizes) / sizeof(sizes[0])) - goto bad_order; - bucket = sizes + order; - dma = 0; - pg = &bucket->firstfree; - if (ptr->bh_flags == MF_DMA) { - dma = 1; - ptr->bh_flags = MF_USED; - pg = &bucket->dmafree; - } - if (ptr->bh_flags != MF_USED) - goto bad_order; - ptr->bh_flags = MF_FREE; /* As of now this block is officially free */ -#ifdef SADISTIC_KMALLOC - memset(ptr+1, 0x0e, ptr->bh_length); -#endif - spin_lock_irqsave(&kmalloc_lock, flags); - - bucket->nfrees++; - bucket->nbytesmalloced -= ptr->bh_length; - - ptr->bh_next = page->firstfree; - page->firstfree = ptr; - if (!page->nfree++) { -/* Page went from full to one free block: put it on the freelist. */ - if (bucket->nblocks == 1) - goto free_page; - page->next = *pg; - *pg = page; - } -/* If page is completely free, free it */ - if (page->nfree == bucket->nblocks) { - for (;;) { - struct page_descriptor *tmp = *pg; - if (!tmp) - goto not_on_freelist; - if (tmp == page) - break; - pg = &tmp->next; - } - *pg = page->next; -free_page: - bucket->npages--; - free_kmalloc_pages(page, bucket->gfporder, dma); - } - spin_unlock_irqrestore(&kmalloc_lock, flags); -null_kfree: - return; - -bad_order: - printk("kfree of non-kmalloced memory: %p, next= %p, order=%d\n", - ptr+1, page->next, page->order); - return; - -not_on_freelist: - printk("Ooops. page %p doesn't show on freelist.\n", page); - spin_unlock_irqrestore(&kmalloc_lock, flags); -} diff -u --recursive --new-file v2.1.37/linux/mm/memory.c linux/mm/memory.c --- v2.1.37/linux/mm/memory.c Tue May 13 22:41:20 1997 +++ linux/mm/memory.c Thu May 15 14:43:52 1997 @@ -590,25 +590,12 @@ * and potentially makes it more efficient. */ static void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, - unsigned long address, int write_access) + unsigned long address, int write_access, pte_t *page_table) { - pgd_t *page_dir; - pmd_t *page_middle; - pte_t *page_table, pte; + pte_t pte; unsigned long old_page, new_page; new_page = __get_free_page(GFP_KERNEL); - page_dir = pgd_offset(vma->vm_mm, address); - if (pgd_none(*page_dir)) - goto end_wp_page; - if (pgd_bad(*page_dir)) - goto bad_wp_pagedir; - page_middle = pmd_offset(page_dir, address); - if (pmd_none(*page_middle)) - goto end_wp_page; - if (pmd_bad(*page_middle)) - goto bad_wp_pagemiddle; - page_table = pte_offset(page_middle, address); pte = *page_table; if (!pte_present(pte)) goto end_wp_page; @@ -650,14 +637,6 @@ bad_wp_page: printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); send_sig(SIGKILL, tsk, 1); - goto end_wp_page; -bad_wp_pagemiddle: - printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle)); - send_sig(SIGKILL, tsk, 1); - goto end_wp_page; -bad_wp_pagedir: - printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir)); - send_sig(SIGKILL, tsk, 1); end_wp_page: if (new_page) free_page(new_page); @@ -786,24 +765,10 @@ * do not need to flush old virtual caches or the TLB. */ static void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, - unsigned long address, int write_access) + unsigned long address, int write_access, pte_t *page_table, pte_t entry) { - pgd_t * pgd; - pmd_t * pmd; - pte_t * page_table; - pte_t entry; unsigned long page; - pgd = pgd_offset(tsk->mm, address); - pmd = pmd_alloc(pgd, address); - if (!pmd) - goto no_memory; - page_table = pte_alloc(pmd, address); - if (!page_table) - goto no_memory; - entry = *page_table; - if (pte_present(entry)) - goto is_present; if (!pte_none(entry)) goto swap_page; address &= PAGE_MASK; @@ -865,11 +830,6 @@ swap_page: do_swap_page(tsk, vma, address, page_table, entry, write_access); return; - -no_memory: - oom(tsk); -is_present: - return; } /* @@ -885,27 +845,30 @@ * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). */ -static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address, +static inline void handle_pte_fault(struct task_struct *tsk, + struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { - if (!pte_present(*pte)) { - do_no_page(current, vma, address, write_access); + pte_t entry = *pte; + + if (!pte_present(entry)) { + do_no_page(tsk, vma, address, write_access, pte, entry); return; } - set_pte(pte, pte_mkyoung(*pte)); + set_pte(pte, pte_mkyoung(entry)); flush_tlb_page(vma, address); if (!write_access) return; - if (pte_write(*pte)) { - set_pte(pte, pte_mkdirty(*pte)); + if (pte_write(entry)) { + set_pte(pte, pte_mkdirty(entry)); flush_tlb_page(vma, address); return; } - do_wp_page(current, vma, address, write_access); + do_wp_page(tsk, vma, address, write_access, pte); } -void handle_mm_fault(struct vm_area_struct * vma, unsigned long address, - int write_access) +void handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma, + unsigned long address, int write_access) { pgd_t *pgd; pmd_t *pmd; @@ -918,9 +881,9 @@ pte = pte_alloc(pmd, address); if (!pte) goto no_memory; - handle_pte_fault(vma, address, write_access, pte); + handle_pte_fault(tsk, vma, address, write_access, pte); update_mmu_cache(vma, address, *pte); return; no_memory: - oom(current); + oom(tsk); } diff -u --recursive --new-file v2.1.37/linux/mm/mmap.c linux/mm/mmap.c --- v2.1.37/linux/mm/mmap.c Tue May 13 22:41:20 1997 +++ linux/mm/mmap.c Wed May 14 15:01:21 1997 @@ -659,14 +659,14 @@ { vm_area_cachep = kmem_cache_create("vm_area_struct", sizeof(struct vm_area_struct), - sizeof(long)*8, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!vm_area_cachep) panic("vma_init: Cannot alloc vm_area_struct cache."); mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), - sizeof(long) * 4, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!mm_cachep) panic("vma_init: Cannot alloc mm_struct cache."); diff -u --recursive --new-file v2.1.37/linux/mm/slab.c linux/mm/slab.c --- v2.1.37/linux/mm/slab.c Tue May 13 22:41:20 1997 +++ linux/mm/slab.c Wed May 14 15:01:21 1997 @@ -1,8 +1,81 @@ /* * linux/mm/slab.c - * Written by Mark Hemment, 1996. + * Written by Mark Hemment, 1996/97. * (markhe@nextd.demon.co.uk) + * + * 11 April '97. Started multi-threading - markhe + * The global cache-chain is protected by the semaphore 'cache_chain_sem'. + * The sem is only needed when accessing/extending the cache-chain, which + * can never happen inside an interrupt (kmem_cache_create(), + * kmem_cache_shrink() and kmem_cache_reap()). + * This is a medium-term exclusion lock. + * + * Each cache has its own lock; 'c_spinlock'. This lock is needed only + * when accessing non-constant members of a cache-struct. + * Note: 'constant members' are assigned a value in kmem_cache_create() before + * the cache is linked into the cache-chain. The values never change, so not + * even a multi-reader lock is needed for these members. + * The c_spinlock is only ever held for a few cycles. + * + * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which + * maybe be sleeping and therefore not holding the semaphore/lock), the + * c_growing field is used. This also prevents reaping from a cache. + * + * Note, caches can _never_ be destroyed. When a sub-system (eg module) has + * finished with a cache, it can only be shrunk. This leaves the cache empty, + * but already enabled for re-use, eg. during a module re-load. + * + * Notes: + * o Constructors/deconstructors are called while the cache-lock + * is _not_ held. Therefore they _must_ be threaded. + * o Constructors must not attempt to allocate memory from the + * same cache that they are a constructor for - infinite loop! + * (There is no easy way to trap this.) + * o The per-cache locks must be obtained with local-interrupts disabled. + * o When compiled with debug support, and an object-verify (upon release) + * is request for a cache, the verify-function is called with the cache + * lock held. This helps debugging. + * o The functions called from try_to_free_page() must not attempt + * to allocate memory from a cache which is being grown. + * The buffer sub-system might try to allocate memory, via buffer_cachep. + * As this pri is passed to the SLAB, and then (if necessary) onto the + * gfp() funcs (which avoid calling try_to_free_page()), no deadlock + * should happen. + * + * The positioning of the per-cache lock is tricky. If the lock is + * placed on the same h/w cache line as commonly accessed members + * the number of L1 cache-line faults is reduced. However, this can + * lead to the cache-line ping-ponging between processors when the + * lock is in contention (and the common members are being accessed). + * Decided to keep it away from common members. + * + * More fine-graining is possible, with per-slab locks...but this might be + * taking fine graining too far, but would have the advantage; + * During most allocs/frees no writes occur to the cache-struct. + * Therefore a multi-reader/one writer lock could be used (the writer + * needed when the slab chain is being link/unlinked). + * As we would not have an exclusion lock for the cache-structure, one + * would be needed per-slab (for updating s_free ptr, and/or the contents + * of s_index). + * The above locking would allow parallel operations to different slabs within + * the same cache with reduced spinning. + * + * Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator), + * would allow most allocations from the same cache to execute in parallel. + * + * At present, each engine can be growing a cache. This should be blocked. + * + * It is not currently 100% safe to examine the page_struct outside of a kernel + * or global cli lock. The risk is v. small, and non-fatal. + * + * Calls to printk() are not 100% safe (the function is not threaded). However, + * printk() is only used under an error condition, and the risk is v. small (not + * sure if the console write functions 'enjoy' executing multiple contextes in + * parallel. I guess they don't...). + * Note, for most calls to printk() any held cache-lock is dropped. This is not + * always done for text size reasons - having *_unlock() everywhere is bloat. */ + /* * An implementation of the Slab Allocator as described in outline in; * UNIX Internals: The New Frontiers by Uresh Vahalia @@ -10,157 +83,251 @@ * or with a little more detail in; * The Slab Allocator: An Object-Caching Kernel Memory Allocator * Jeff Bonwick (Sun Microsystems). - * Presented at: USENIX Summer 1994 Technical Conference + * Presented at: USENIX Summer 1994 Technical Conference + */ + +/* + * This implementation deviates from Bonwick's paper as it + * does not use a hash-table for large objects, but rather a per slab + * index to hold the bufctls. This allows the bufctl structure to + * be small (one word), but limits the number of objects a slab (not + * a cache) can contain when off-slab bufctls are used. The limit is the + * size of the largest general-cache that does not use off-slab bufctls, + * divided by the size of a bufctl. For 32bit archs, is this 256/4 = 64. + * This is not serious, as it is only for large objects, when it is unwise + * to have too many per slab. + * Note: This limit can be raised by introducing a general-cache whose size + * is less than 512 (PAGE_SIZE<<3), but greater than 256. */ -#include #include +#include #include +#include #include -#include -#include -/* SLAB_MGMT_CHECKS - define to enable extra checks in - * kmem_cache_[create|destroy|shrink]. - * If you're not messing around with these funcs, then undef this. - * SLAB_HIGH_PACK - define to allow 'bufctl's to be stored within objs that do not - * have a state. This allows more objs per slab, but removes the - * ability to sanity check an addr on release (if the addr is - * within any slab, anywhere, kmem_cache_free() will accept it!). - * SLAB_DEBUG_SUPPORT - when defined, kmem_cache_create() will honour; SLAB_DEBUG_FREE, - * SLAB_DEBUG_INITIAL and SLAB_RED_ZONE. - */ -#define SLAB_MGMT_CHECKS -#undef SLAB_HIGH_PACK -#define SLAB_DEBUG_SUPPORT /* undef this when your cache is stable */ - -#define BYTES_PER_WORD sizeof(void *) - -/* legal flag mask for kmem_cache_create() */ -#if defined(SLAB_DEBUG_SUPPORT) -#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_HWCACHE_ALIGN|SLAB_RED_ZONE) -#else -#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN) -#endif /* SLAB_DEBUG_SUPPORT */ +#include +#include +#include +#include +#include -/* Magic num for red zoning. - * Placed in the first word after the end of an obj +/* If there is a different PAGE_SIZE around, and it works with this allocator, + * then change the following. */ -#define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */ -#define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */ +#if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096) +#error Your page size is probably not correctly supported - please check +#endif -/* Used for linking objs within a slab. How much of the struct is - * used, and where its placed, depends on the packing used in a cache. - * Don't mess with the order! - */ -typedef struct kmem_bufctl_s { - struct kmem_bufctl_s *buf_nextp; - struct kmem_slab_s *buf_slabp; - void *buf_objp; /* start of obj */ - struct kmem_bufctl_s *buf_hnextp; - struct kmem_bufctl_s **buf_hashp; -} kmem_bufctl_t; +/* SLAB_MGMT_CHECKS - 1 to enable extra checks in kmem_cache_create(). + * 0 if you wish to reduce memory usage. + * + * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE, + * SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISION. + * 0 for faster, smaller, code (espically in the critical paths). + * + * SLAB_STATS - 1 to collect stats for /proc/slabinfo. + * 0 for faster, smaller, code (espically in the critical paths). + * + * SLAB_SELFTEST - 1 to perform a few tests, mainly for developement. + */ +#define SLAB_MGMT_CHECKS 1 +#define SLAB_DEBUG_SUPPORT 0 +#define SLAB_STATS 0 +#define SLAB_SELFTEST 0 -/* different portions of the bufctl are used - so need some macros */ -#define kmem_bufctl_offset(x) ((unsigned long)&((kmem_bufctl_t *)0)->x) -#define kmem_bufctl_short_size (kmem_bufctl_offset(buf_objp)) -#define kmem_bufctl_very_short_size (kmem_bufctl_offset(buf_slabp)) +/* Shouldn't this be in a header file somewhere? */ +#define BYTES_PER_WORD sizeof(void *) + +/* Legal flag mask for kmem_cache_create(). */ +#if SLAB_DEBUG_SUPPORT +#if 0 +#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \ + SLAB_POISION|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \ + SLAB_HIGH_PACK) +#endif +#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \ + SLAB_POISION|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP) +#else +#if 0 +#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK) +#endif +#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP) +#endif /* SLAB_DEBUG_SUPPORT */ /* Slab management struct. * Manages the objs in a slab. Placed either at the end of mem allocated - * for the slab, or from an internal obj cache (SLAB_CFLGS_OFF_SLAB). - * Slabs are chain into a partially ordered list. The linking ptrs must - * be first in the struct! - * The size of the struct is important(ish); it should align well on - * cache line(s) - */ + * for a slab, or from an internal obj cache (cache_slabp). + * Slabs are chained into a partially ordered list; fully used first, partial + * next, and then fully free slabs. + * The first 4 members are referenced during an alloc/free operation, and + * should always appear on the same cache line. + * Note: The offset between some members _must_ match offsets within + * the kmem_cache_t - see kmem_cache_init() for the checks. */ + +#define SLAB_OFFSET_BITS 16 /* could make this larger for 64bit archs */ + typedef struct kmem_slab_s { - struct kmem_slab_s *s_nextp; - struct kmem_slab_s *s_prevp; - void *s_mem; /* addr of mem allocated for slab */ - unsigned long s_jiffies; - kmem_bufctl_t *s_freep; /* ptr to first inactive obj in slab */ - unsigned long s_flags; - unsigned long s_magic; - unsigned long s_inuse; /* num of objs active in slab */ + struct kmem_bufctl_s *s_freep; /* ptr to first inactive obj in slab */ + struct kmem_bufctl_s *s_index; + unsigned long s_magic; + unsigned long s_inuse; /* num of objs active in slab */ + + struct kmem_slab_s *s_nextp; + struct kmem_slab_s *s_prevp; + void *s_mem; /* addr of first obj in slab */ + unsigned long s_offset:SLAB_OFFSET_BITS, + s_dma:1; } kmem_slab_t; -/* to test for end of slab chain */ -#define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_firstp)) +/* When the slab mgmt is on-slab, this gives the size to use. */ +#define slab_align_size (L1_CACHE_ALIGN(sizeof(kmem_slab_t))) + +/* Test for end of slab chain. */ +#define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_offset)) /* s_magic */ -#define SLAB_MAGIC_ALLOC 0xA5C32F2BUL -#define SLAB_MAGIC_UNALLOC 0xB2F23C5AUL +#define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* slab is alive */ +#define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* slab has been destoryed */ -/* s_flags */ -#define SLAB_SFLGS_DMA 0x000001UL /* slab's mem can do DMA */ +/* Bufctl's are used for linking objs within a slab, identifying what slab an obj + * is in, and the address of the associated obj (for sanity checking with off-slab + * bufctls). What a bufctl contains depends upon the state of the obj and + * the organisation of the cache. + */ +typedef struct kmem_bufctl_s { + union { + struct kmem_bufctl_s *buf_nextp; + kmem_slab_t *buf_slabp; /* slab for obj */ + void * buf_objp; + } u; +} kmem_bufctl_t; -/* cache struct - manages a cache. - * c_lastp must appear immediately after c_firstp! +/* ...shorthand... */ +#define buf_nextp u.buf_nextp +#define buf_slabp u.buf_slabp +#define buf_objp u.buf_objp + +#if SLAB_DEBUG_SUPPORT +/* Magic nums for obj red zoning. + * Placed in the first word before and the first word after an obj. + */ +#define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */ +#define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */ + +/* ...and for poisioning */ +#define SLAB_POISION_BYTE 0x5a /* byte value for poisioning */ +#define SLAB_POISION_END 0xa5 /* end-byte of poisioning */ + +#endif /* SLAB_DEBUG_SUPPORT */ + +/* Cache struct - manages a cache. + * First four members are commonly referenced during an alloc/free operation. */ struct kmem_cache_s { kmem_slab_t *c_freep; /* first slab w. free objs */ - unsigned long c_flags; + unsigned long c_flags; /* constant flags */ unsigned long c_offset; - struct kmem_bufctl_s **c_hashp; /* ptr for off-slab bufctls */ - kmem_slab_t *c_firstp; /* first slab in chain */ - kmem_slab_t *c_lastp; /* last slab in chain */ - unsigned long c_hashbits; unsigned long c_num; /* # of objs per slab */ - unsigned long c_gfporder; /* order of pgs per slab (2^n) */ - unsigned long c_org_size; + unsigned long c_magic; unsigned long c_inuse; /* kept at zero */ - void (*c_ctor)(void *, int, unsigned long); /* constructor func */ - void (*c_dtor)(void *, int, unsigned long); /* de-constructor func */ + kmem_slab_t *c_firstp; /* first slab in chain */ + kmem_slab_t *c_lastp; /* last slab in chain */ + + spinlock_t c_spinlock; + unsigned long c_growing; + unsigned long c_dflags; /* dynamic flags */ + size_t c_org_size; + unsigned long c_gfporder; /* order of pgs per slab (2^n) */ + void (*c_ctor)(void *, kmem_cache_t *, unsigned long); /* constructor func */ + void (*c_dtor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */ unsigned long c_align; /* alignment of objs */ - unsigned long c_colour; /* cache colouring range */ - unsigned long c_colour_next;/* cache colouring */ + size_t c_colour; /* cache colouring range */ + size_t c_colour_next;/* cache colouring */ + unsigned long c_failures; const char *c_name; struct kmem_cache_s *c_nextp; + kmem_cache_t *c_index_cachep; +#if SLAB_STATS + unsigned long c_num_active; + unsigned long c_num_allocations; + unsigned long c_high_mark; + unsigned long c_grown; + unsigned long c_reaped; + atomic_t c_errors; +#endif /* SLAB_STATS */ }; -/* magic # for c_magic - used to detect out-of-slabs in __kmem_cache_alloc() */ -#define SLAB_C_MAGIC 0x4F17A36DUL - /* internal c_flags */ #define SLAB_CFLGS_OFF_SLAB 0x010000UL /* slab mgmt in own cache */ #define SLAB_CFLGS_BUFCTL 0x020000UL /* bufctls in own cache */ -#define SLAB_CFLGS_RELEASED 0x040000UL /* cache is/being destroyed */ +#define SLAB_CFLGS_GENERAL 0x080000UL /* a general-cache */ -#if defined(SLAB_HIGH_PACK) -#define SLAB_CFLGS_PTR_IN_OBJ 0x080000UL /* free ptr in obj */ -#endif +/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */ +#define SLAB_CFLGS_GROWN 0x000002UL /* don't reap a recently grown */ #define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB) #define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL) -#define SLAB_RELEASED(x) ((x) & SLAB_CFLGS_RELEASED) -#if defined(SLAB_HIGH_PACK) -#define SLAB_PTR_IN_OBJ(x) ((x) & SLAB_CFLGS_PTR_IN_OBJ) +#define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN) + +#if SLAB_STATS +#define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++) +#define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--) +#define SLAB_STATS_INC_ALLOCED(x) ((x)->c_num_allocations++) +#define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++) +#define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++) +#define SLAB_STATS_SET_HIGH(x) do { if ((x)->c_num_active > (x)->c_high_mark) \ + (x)->c_high_mark = (x)->c_num_active; \ + } while (0) +#define SLAB_STATS_INC_ERR(x) (atomic_inc(&(x)->c_errors)) #else -#define SLAB_PTR_IN_OBJ(x) (0) +#define SLAB_STATS_INC_ACTIVE(x) +#define SLAB_STATS_DEC_ACTIVE(x) +#define SLAB_STATS_INC_ALLOCED(x) +#define SLAB_STATS_INC_GROWN(x) +#define SLAB_STATS_INC_REAPED(x) +#define SLAB_STATS_SET_HIGH(x) +#define SLAB_STATS_INC_ERR(x) +#endif /* SLAB_STATS */ + +#if SLAB_SELFTEST +#if !SLAB_DEBUG_SUPPORT +#error Debug support needed for self-test #endif +static void kmem_self_test(void); +#endif /* SLAB_SELFTEST */ + +/* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */ +#define SLAB_C_MAGIC 0x4F17A36DUL /* maximum size of an obj (in 2^order pages) */ #define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */ -/* maximum num of pages for a slab (avoids trying to ask for too may contigious pages) */ +/* maximum num of pages for a slab (prevents large requests to the VM layer) */ #define SLAB_MAX_GFP_ORDER 5 /* 32 pages */ /* the 'prefered' minimum num of objs per slab - maybe less for large objs */ #define SLAB_MIN_OBJS_PER_SLAB 4 -/* if the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB, - * then the page order must be less than this before trying the next order +/* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB, + * then the page order must be less than this before trying the next order. */ #define SLAB_BREAK_GFP_ORDER 2 -/* size of hash tables for caches which use off-slab bufctls (SLAB_CFLGS_BUFCTL) */ -#define KMEM_HASH_SIZE 128 +/* Macros for storing/retrieving the cachep and or slab from the + * global 'mem_map'. With off-slab bufctls, these are used to find the + * slab an obj belongs to. With kmalloc(), and kfree(), these are used + * to find the cache which an obj belongs to. + */ +#define SLAB_SET_PAGE_CACHE(pg, x) ((pg)->next = (struct page *)(x)) +#define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->next) +#define SLAB_SET_PAGE_SLAB(pg, x) ((pg)->prev = (struct page *)(x)) +#define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->prev) -/* size description struct for general-caches */ +/* Size description struct for general-caches. */ typedef struct cache_sizes { - unsigned long cs_size; + size_t cs_size; kmem_cache_t *cs_cachep; } cache_sizes_t; @@ -176,177 +343,177 @@ {2048, NULL}, {4096, NULL}, {8192, NULL}, -#if PAGE_SIZE == 8192 {16384, NULL}, -#endif + {32768, NULL}, + {65536, NULL}, + {131072, NULL}, {0, NULL} }; -/* Names for the general-caches. - * Not placed into the sizes struct for a good reason; the - * string ptr is not needed while searching in kmem_alloc()/ - * kmem_free(), and would 'get-in-the-way' - think about it. +/* Names for the general-caches. Not placed into the sizes struct for + * a good reason; the string ptr is not needed while searching in kmalloc(), + * and would 'get-in-the-way' in the h/w cache. */ static char *cache_sizes_name[] = { #if PAGE_SIZE == 4096 - "cache-32", + "size-32", #endif - "cache-64", - "cache-128", - "cache-256", - "cache-512", - "cache-1024", - "cache-2048", - "cache-4096", -#if PAGE_SIZE == 4096 - "cache-8192" -#elif PAGE_SIZE == 8192 - "cache-8192", - "cache-16384" -#else -#error Your page size is not supported for the general-caches - please fix -#endif -}; - -static void kmem_hash_ctor(void *ptr, int , unsigned long); /* fwd ref */ -extern kmem_cache_t cache_cache; /* fwd ref */ - -/* internal cache of hash objs, only used when bufctls are off-slab */ -static kmem_cache_t cache_hash = { -/* freep, flags */ kmem_slab_end(&cache_hash), 0, -/* offset, hashp */ sizeof(kmem_bufctl_t*)*KMEM_HASH_SIZE, NULL, -/* firstp, lastp */ kmem_slab_end(&cache_hash), kmem_slab_end(&cache_hash), -/* hashbits, num, gfporder */ 0, 0, 0, -/* org_size, magic */ sizeof(kmem_bufctl_t*)*KMEM_HASH_SIZE, SLAB_C_MAGIC, -/* inuse, ctor, dtor, align */ 0, kmem_hash_ctor, NULL, L1_CACHE_BYTES, -/* colour, colour_next */ 0, 0, -/* name, nextp */ "hash_cache", &cache_cache -}; - -/* internal cache of freelist mgmnt objs, only use when bufctls are off-slab */ -static kmem_cache_t cache_bufctl = { -/* freep, flags */ kmem_slab_end(&cache_bufctl), 0, -/* offset, hashp */ sizeof(kmem_bufctl_t), NULL, -/* firstp, lastp */ kmem_slab_end(&cache_bufctl), kmem_slab_end(&cache_bufctl), -/* hashbits, num, gfporder */ 0, 0, 0, -/* org_size, magic */ sizeof(kmem_bufctl_t), SLAB_C_MAGIC, -/* inuse, ctor, dtor, align */ 0, NULL, NULL, BYTES_PER_WORD*2, -/* colour, colour_next */ 0, 0, -/* name, nextp */ "bufctl_cache", &cache_hash -}; - -/* internal cache of slab mngmnt objs, only used when slab mgmt is off-slab */ -static kmem_cache_t cache_slab = { -/* freep, flags */ kmem_slab_end(&cache_slab), 0, -/* offset, hashp */ sizeof(kmem_slab_t), NULL, -/* firstp, lastp */ kmem_slab_end(&cache_slab), kmem_slab_end(&cache_slab), -/* hashbits, num, gfporder */ 0, 0, 0, -/* org_size, magic */ sizeof(kmem_slab_t), SLAB_C_MAGIC, -/* inuse, ctor, dtor, align */ 0, NULL, NULL, L1_CACHE_BYTES, -/* colour, colour_next */ 0, 0, -/* name, nextp */ "slab_cache", &cache_bufctl + "size-64", + "size-128", + "size-256", + "size-512", + "size-1024", + "size-2048", + "size-4096", + "size-8192", + "size-16384", + "size-32768", + "size-65536", + "size-131072" }; /* internal cache of cache description objs */ static kmem_cache_t cache_cache = { -/* freep, flags */ kmem_slab_end(&cache_cache), 0, -/* offset, hashp */ sizeof(kmem_cache_t), NULL, +/* freep, flags */ kmem_slab_end(&cache_cache), SLAB_NO_REAP, +/* offset, num */ sizeof(kmem_cache_t), 0, +/* c_magic, c_inuse */ SLAB_C_MAGIC, 0, /* firstp, lastp */ kmem_slab_end(&cache_cache), kmem_slab_end(&cache_cache), -/* hashbits, num, gfporder */ 0, 0, 0, -/* org_size, magic */ sizeof(kmem_cache_t), SLAB_C_MAGIC, -/* inuse, ctor, dtor, align */ 0, NULL, NULL, L1_CACHE_BYTES, +/* spinlock */ SPIN_LOCK_UNLOCKED, +/* growing */ 0, +/* dflags */ 0, +/* org_size, gfp */ 0, 0, +/* ctor, dtor, align */ NULL, NULL, L1_CACHE_BYTES, /* colour, colour_next */ 0, 0, +/* failures */ 0, /* name */ "kmem_cache", -/* nextp */ &cache_slab +/* nextp */ &cache_cache, +/* index */ NULL, }; -/* constructor for hash tables */ -static void kmem_hash_ctor(void *ptr, int size, unsigned long flags) -{ - memset(ptr, 0, sizeof(kmem_bufctl_t*)*KMEM_HASH_SIZE); -} +/* Guard access to the cache-chain. */ +static struct semaphore cache_chain_sem; -/* place maintainer for reaping */ +/* Place maintainer for reaping. */ static kmem_cache_t *clock_searchp = &cache_cache; -/* Init an internal cache */ -__initfunc(static void -kmem_own_cache_init(kmem_cache_t *cachep)) -{ - unsigned long size, i; - - if (cachep->c_inuse || cachep->c_magic != SLAB_C_MAGIC) { - panic("Bad init of internal cache %s", cachep->c_name); - /* NOTREACHED */ - } - size = cachep->c_offset + kmem_bufctl_short_size; - i = size % cachep->c_align; - if (i) - size += (cachep->c_align-i); - cachep->c_offset = size-kmem_bufctl_short_size; - - i = ((PAGE_SIZE<c_gfporder)-sizeof(kmem_slab_t)); - cachep->c_num = i / size; /* num of objs per slab */ +/* Internal slab mgmt cache, for when slab mgmt is off-slab. */ +static kmem_cache_t *cache_slabp = NULL; - /* cache colouring */ - cachep->c_colour = 1 + (i-(cachep->c_num*size))/cachep->c_align; - cachep->c_colour_next = cachep->c_colour; -} +/* Max number of objs-per-slab for caches which use bufctl's. + * Needed to avoid a possible looping condition in kmem_cache_grow(). + */ +static unsigned long bufctl_limit = 0; -/* Initialisation - setup all internal caches */ -__initfunc(long -kmem_cache_init(long start, long end)) +/* Initialisation - setup the `cache' cache. */ +__initfunc(long kmem_cache_init(long start, long end)) { - /* sanity */ + size_t size, i; + +#define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x) +#define kmem_slab_diff(a,b) (kmem_slab_offset(a) - kmem_slab_offset(b)) #define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x) -#define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x) - if (((kmem_cache_offset(c_magic)-kmem_cache_offset(c_firstp)) != kmem_slab_offset(s_magic)) || - ((kmem_cache_offset(c_inuse)-kmem_cache_offset(c_firstp)) != kmem_slab_offset(s_inuse))) { +#define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b)) + + /* Sanity checks... */ + if (kmem_cache_diff(c_firstp, c_magic) != kmem_slab_diff(s_nextp, s_magic) || + kmem_cache_diff(c_firstp, c_inuse) != kmem_slab_diff(s_nextp, s_inuse) || + ((kmem_cache_offset(c_lastp) - + ((unsigned long) kmem_slab_end((kmem_cache_t*)NULL))) != + kmem_slab_offset(s_prevp)) || + kmem_cache_diff(c_lastp, c_firstp) != kmem_slab_diff(s_prevp, s_nextp)) { /* Offsets to the magic are incorrect, either the structures have * been incorrectly changed, or adjustments are needed for your * architecture. */ - panic("kmem_cache_init(): Offsets are different - been messed with!\n"); + panic("kmem_cache_init(): Offsets are wrong - I've been messed with!"); /* NOTREACHED */ } #undef kmem_cache_offset +#undef kmem_cache_diff #undef kmem_slab_offset +#undef kmem_slab_diff + + cache_chain_sem = MUTEX; + + size = cache_cache.c_offset + sizeof(kmem_bufctl_t); + size += (L1_CACHE_BYTES-1); + size &= ~(L1_CACHE_BYTES-1); + cache_cache.c_offset = size-sizeof(kmem_bufctl_t); + + i = (PAGE_SIZE<cs_cachep = + kmem_cache_create(*names++, sizes->cs_size, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) + goto panic_time; + if (!found) { + /* Inc off-slab bufctl limit until the ceiling is hit. */ + if (SLAB_BUFCTL(sizes->cs_cachep->c_flags)) + found++; + else + bufctl_limit = + (sizes->cs_size/sizeof(kmem_bufctl_t)); + } + sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL; + sizes++; + } while (sizes->cs_size); +#if SLAB_SELFTEST + kmem_self_test(); +#endif /* SLAB_SELFTEST */ + return; + } +panic_time: + panic("kmem_cache_sizes_init: Error creating caches"); + /* NOTREACHED */ } -/* Interface to system's page allocator. - * dma pts to non-zero if all of the mem is suitable for DMA +/* Interface to system's page allocator. Dma pts to non-zero if all + * of memory is DMAable. No need to hold the cache-lock. */ static inline void * -kmem_getpages(const kmem_cache_t *cachep, unsigned long flags, unsigned int *dma) +kmem_getpages(kmem_cache_t *cachep, unsigned long flags, unsigned int *dma) { - struct page *page; void *addr; - addr = (void*) __get_free_pages(flags & SLAB_LEVEL_MASK, \ - cachep->c_gfporder, flags & SLAB_DMA); - *dma = 1<c_gfporder; - if (!(flags & SLAB_DMA) && addr) { - /* need to check if can dma */ - page = mem_map + MAP_NR(addr); + *dma = flags & SLAB_DMA; + addr = (void*) __get_free_pages(flags & SLAB_LEVEL_MASK, + cachep->c_gfporder, *dma); + /* Assume that now we have the pages no one else can legally + * messes with the 'struct page's. + * However vm_scan() might try to test the structure to see if + * it is a named-page or buffer-page. The members it tests are + * of no interest here..... + */ + if (!*dma && addr) { + /* Need to check if can dma. */ + struct page *page = mem_map + MAP_NR(addr); + *dma = 1<c_gfporder; while ((*dma)--) { if (!PageDMA(page)) { *dma = 0; @@ -358,58 +525,52 @@ return addr; } -/* Interface to system's page release */ +/* Interface to system's page release. */ static inline void kmem_freepages(kmem_cache_t *cachep, void *addr) { + unsigned long i = (1<c_gfporder); + struct page *page = &mem_map[MAP_NR(addr)]; + + /* free_pages() does not clear the type bit - we do that. + * The pages have been unlinked from their cache-slab, + * but their 'struct page's might be accessed in + * vm_scan(). Shouldn't be a worry. + */ + while (i--) { + PageClearSlab(page); + page++; + } free_pages((unsigned long)addr, cachep->c_gfporder); } -/* Hashing function - used for caches with off-slab bufctls */ -static inline int -kmem_hash(const kmem_cache_t *cachep, const void *objp) +#if SLAB_DEBUG_SUPPORT +static inline void +kmem_poision_obj(kmem_cache_t *cachep, void *addr) { - return (((unsigned long)objp >> cachep->c_hashbits) & (KMEM_HASH_SIZE-1)); + memset(addr, SLAB_POISION_BYTE, cachep->c_org_size); + *(unsigned char *)(addr+cachep->c_org_size-1) = SLAB_POISION_END; } -/* Link bufctl into a hash table - used for caches with off-slab bufctls - * - called with ints disabled - */ -static inline void * -kmem_add_to_hash(kmem_cache_t *cachep, kmem_bufctl_t *bufp) +static inline int +kmem_check_poision_obj(kmem_cache_t *cachep, void *addr) { - kmem_bufctl_t **bufpp = bufp->buf_hashp; - - bufp->buf_hnextp = *bufpp; - return (*bufpp = bufp)->buf_objp; + void *end; + end = memchr(addr, SLAB_POISION_END, cachep->c_org_size); + if (end != (addr+cachep->c_org_size-1)) + return 1; + return 0; } +#endif /* SLAB_DEBUG_SUPPORT */ -/* Find bufcntl for given obj addr, and unlink. - * - called with ints disabled +/* Three slab chain funcs - all called with ints disabled and the appropiate + * cache-lock held. */ -static inline kmem_bufctl_t * -kmem_remove_from_hash(kmem_cache_t *cachep, const void *objp) -{ - kmem_bufctl_t *bufp; - kmem_bufctl_t **bufpp = &cachep->c_hashp[kmem_hash(cachep, objp)]; - - for (;*bufpp; bufpp = &(*bufpp)->buf_hnextp) { - if ((*bufpp)->buf_objp != objp) - continue; - bufp = *bufpp; - *bufpp = bufp->buf_hnextp; - return bufp; - } - return NULL; -} - -/* Three slab chain funcs - all called with ints disabled */ static inline void kmem_slab_unlink(kmem_slab_t *slabp) { kmem_slab_t *prevp = slabp->s_prevp; kmem_slab_t *nextp = slabp->s_nextp; - prevp->s_nextp = nextp; nextp->s_prevp = prevp; } @@ -417,781 +578,881 @@ static inline void kmem_slab_link_end(kmem_cache_t *cachep, kmem_slab_t *slabp) { + kmem_slab_t *lastp = cachep->c_lastp; slabp->s_nextp = kmem_slab_end(cachep); - slabp->s_prevp = cachep->c_lastp; - kmem_slab_end(cachep)->s_prevp = slabp; - slabp->s_prevp->s_nextp = slabp; + slabp->s_prevp = lastp; + cachep->c_lastp = slabp; + lastp->s_nextp = slabp; } static inline void kmem_slab_link_free(kmem_cache_t *cachep, kmem_slab_t *slabp) { kmem_slab_t *nextp = cachep->c_freep; - + kmem_slab_t *prevp = nextp->s_prevp; slabp->s_nextp = nextp; - cachep->c_freep = slabp; - slabp->s_prevp = nextp->s_prevp; + slabp->s_prevp = prevp; nextp->s_prevp = slabp; slabp->s_prevp->s_nextp = slabp; } -/* Cal the num objs, wastage, and bytes left over for a given slab size */ -static int -kmem_cache_cal_waste(unsigned long gfporder, unsigned long size, - unsigned long extra, unsigned long flags, - unsigned long *left_over, unsigned long *num) -{ - unsigned long wastage; - - wastage = PAGE_SIZE << gfporder; - gfporder = 0; - if (!SLAB_OFF_SLAB(flags)) - gfporder = sizeof(kmem_slab_t); +/* Destroy all the objs in a slab, and release the mem back to the system. + * Before calling the slab must have been unlinked from the cache. + * The cache-lock is not held/needed. + */ +static void +kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp) +{ + if (cachep->c_dtor +#if SLAB_DEBUG_SUPPORT + || cachep->c_flags & (SLAB_POISION || SLAB_RED_ZONE) +#endif /*SLAB_DEBUG_SUPPORT*/ + ) { + /* Doesn't use the bufctl ptrs to find objs. */ + unsigned long num = cachep->c_num; + void *objp = slabp->s_mem; + do { +#if SLAB_DEBUG_SUPPORT + if (cachep->c_flags & SLAB_RED_ZONE) { + if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) + printk(KERN_ERR "kmem_slab_destroy: " + "Bad front redzone - %s\n", + cachep->c_name); + objp += BYTES_PER_WORD; + if (*((unsigned long*)(objp+cachep->c_org_size)) != + SLAB_RED_MAGIC1) + printk(KERN_ERR "kmem_slab_destroy: " + "Bad rear redzone - %s\n", + cachep->c_name); + } + if (cachep->c_dtor) +#endif /*SLAB_DEBUG_SUPPORT*/ + (cachep->c_dtor)(objp, cachep, 0); +#if SLAB_DEBUG_SUPPORT + else if (cachep->c_flags & SLAB_POISION) { + if (kmem_check_poision_obj(cachep, objp)) + printk(KERN_ERR "kmem_slab_destory: " + "Bad poision - %s\n", cachep->c_name); + } + if (cachep->c_flags & SLAB_RED_ZONE) + objp -= BYTES_PER_WORD; +#endif /* SLAB_DEBUG_SUPPORT */ + objp += cachep->c_offset; + if (!slabp->s_index) + objp += sizeof(kmem_bufctl_t); + } while (--num); + } + + slabp->s_magic = SLAB_MAGIC_DESTROYED; + kmem_freepages(cachep, slabp->s_mem-slabp->s_offset); + if (slabp->s_index) + kmem_cache_free(cachep->c_index_cachep, slabp->s_index); + if (SLAB_OFF_SLAB(cachep->c_flags)) + kmem_cache_free(cache_slabp, slabp); +} + +/* Cal the num objs, wastage, and bytes left over for a given slab size. */ +static inline size_t +kmem_cache_cal_waste(unsigned long gfporder, size_t size, size_t extra, + unsigned long flags, size_t *left_over, unsigned long *num) +{ + size_t wastage = PAGE_SIZE< ((1<= size) { - printk(KERN_WARNING "%sAlign weired %lu - %s\n", func_nm, align, name); - align = 0; + if (dtor && !ctor) { + /* Decon, but no con - doesn't make sense */ + printk("%sDecon but no con - %s\n", func_nm, name); + goto opps; } - if (dtor && !ctor) { - /* Descon, but no con - doesn't make sense */ - printk(KERN_ERR "%sDecon but no con - %s\n", func_nm, name); - return NULL; + if (offset < 0 || offset > size) { + printk("%sOffset weired %d - %s\n", func_nm, (int) offset, name); + offset = 0; } +#if SLAB_DEBUG_SUPPORT if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { /* No constructor, but inital state check requested */ - printk(KERN_WARNING "%sNo con, but init state check requested - %s\n", - func_nm, name); + printk("%sNo con, but init state check requested - %s\n", func_nm, name); flags &= ~SLAB_DEBUG_INITIAL; } + + if ((flags & SLAB_POISION) && ctor) { + /* request for poisioning, but we can't do that with a constructor */ + printk("%sPoisioning requested, but con given - %s\n", func_nm, name); + flags &= ~SLAB_POISION; + } +#if 0 + if ((flags & SLAB_HIGH_PACK) && ctor) { + printk("%sHigh pack requested, but con given - %s\n", func_nm, name); + flags &= ~SLAB_HIGH_PACK; + } + if ((flags & SLAB_HIGH_PACK) && (flags & (SLAB_POISION|SLAB_RED_ZONE))) { + printk("%sHigh pack requested, but with poisioning/red-zoning - %s\n", + func_nm, name); + flags &= ~SLAB_HIGH_PACK; + } +#endif +#endif /* SLAB_DEBUG_SUPPORT */ #endif /* SLAB_MGMT_CHECKS */ - /* get cache's description obj */ + /* Always checks flags, a caller might be expecting debug + * support which isn't available. + */ + if (flags & ~SLAB_C_MASK) { + printk("%sIllgl flg %lX - %s\n", func_nm, flags, name); + flags &= SLAB_C_MASK; + } + + /* Get cache's description obj. */ cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL); if (!cachep) goto opps; + memset(cachep, 0, sizeof(kmem_cache_t)); - /* remember original size, so can be passed to a constructor or decon. - * Allows the same con/decon to be used for caches of similar objs - * that have a different size data buffer assoicated with them + /* Check that size is in terms of words. This is needed to avoid + * unaligned accesses for some archs when redzoning is used, and makes + * sure any on-slab bufctl's are also correctly aligned. */ - cachep->c_org_size = size; + if (size & (BYTES_PER_WORD-1)) { + size += (BYTES_PER_WORD-1); + size &= ~(BYTES_PER_WORD-1); + printk("%sForcing size word alignment - %s\n", func_nm, name); + } -#if defined(SLAB_DEBUG_SUPPORT) - if (flags & SLAB_RED_ZONE) - size += BYTES_PER_WORD; /* word for redzone */ +#if SLAB_DEBUG_SUPPORT + if (flags & SLAB_RED_ZONE) { + /* There is no point trying to honour cache alignment when redzoning. */ + flags &= ~SLAB_HWCACHE_ALIGN; + size += 2*BYTES_PER_WORD; /* words for redzone */ + } #endif /* SLAB_DEBUG_SUPPORT */ + cachep->c_org_size = size; - /* Make a guess if slab mngmnt obj and/or bufctls are 'on' or 'off' slab */ - i = kmem_bufctl_short_size; + align = BYTES_PER_WORD; + if (flags & SLAB_HWCACHE_ALIGN) + align = L1_CACHE_BYTES; + + /* Determine if the slab mgmt and/or bufclts are 'on' or 'off' slab. */ + extra = sizeof(kmem_bufctl_t); if (size < (PAGE_SIZE>>3)) { - /* Size is small(ish). Use format where bufctl size per - * obj is low, and slab mngmnt is on-slab + /* Size is small(ish). Use packing where bufctl size per + * obj is low, and slab mngmnt is on-slab. */ - if (!ctor && !dtor && !(flags & SLAB_RED_ZONE)) { - /* the objs in this cache have no state - can store - * store freelist ptr within obj. (redzoning is a state) +#if 0 + if ((flags & SLAB_HIGH_PACK)) { + /* Special high packing for small objects + * (mainly for vm_mapping structs, but + * others can use it). */ -#if defined(SLAB_HIGH_PACK) - i=0; - flags |= SLAB_CFLGS_PTR_IN_OBJ; -#else - i = kmem_bufctl_very_short_size; -#endif + if (size == (L1_CACHE_BYTES/4) || size == (L1_CACHE_BYTES/2) || + size == L1_CACHE_BYTES) { + /* The bufctl is stored with the object. */ + extra = 0; + } else + flags &= ~SLAB_HIGH_PACK; } +#endif } else { /* Size is large, assume best to place the slab mngmnt obj - * off-slab (should allow better packing of objs) + * off-slab (should allow better packing of objs). */ flags |= SLAB_CFLGS_OFF_SLAB; - if (!(size & ~PAGE_MASK) || - size == (PAGE_SIZE+PAGE_SIZE/2) || - size == (PAGE_SIZE/2) || - size == (PAGE_SIZE/4) || - size == (PAGE_SIZE/8)) { - /* to avoid waste the bufctls are off-slab */ + if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2) + || size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) { + /* To avoid waste the bufctls are off-slab... */ flags |= SLAB_CFLGS_BUFCTL; - /* get hash table for cache */ - cachep->c_hashp = kmem_cache_alloc(&cache_hash, SLAB_KERNEL); - if (cachep->c_hashp == NULL) { - kmem_cache_free(&cache_cache, cachep); - goto opps; - } - i = 0; - cachep->c_hashbits = PAGE_SHIFT; - if (size <= (PAGE_SIZE/2)) { - cachep->c_hashbits--; - if (size <= (PAGE_SIZE/4)) cachep->c_hashbits--; - if (size <= (PAGE_SIZE/8)) cachep->c_hashbits -= 2; - } - } /* else slab mngmnt is off-slab, but freelist ptrs are on */ + extra = 0; + } /* else slab mngmnt is off-slab, but freelist ptrs are on. */ } - size += i; - - /* Adjust the mem used for objs so they will align correctly. - * Force objs to start on word boundaries, but caller may specify - * h/w cache line boundaries. This 'alignment' is slightly different - * to the 'align' argument. Objs may be requested to start on h/w - * lines (as that is how the members of the obj have been organised), - * but the 'align' may be quite high (say 64) as the first 64 bytes - * are commonly accessed/modified within a loop (stops h/w line - * thrashing). The 'align' is the slab colouring. - */ - words = BYTES_PER_WORD; - if (flags & SLAB_HWCACHE_ALIGN) - words = L1_CACHE_BYTES; - words--; - size += words; - size = size & ~words; - /* alignment might not be a factor of the boundary alignment - fix-up */ - align += words; - align = align & ~words; + size += extra; + if (flags & SLAB_HWCACHE_ALIGN) { + /* Need to adjust size so that objs are cache aligned. */ + if (size > (L1_CACHE_BYTES/2)) { + size_t words = size % L1_CACHE_BYTES; + if (words) + size += (L1_CACHE_BYTES-words); + } else { + /* Small obj size, can get at least two per cache line. */ + int num_per_line = L1_CACHE_BYTES/size; + left_over = L1_CACHE_BYTES - (num_per_line*size); + if (left_over) { + /* Need to adjust size so objs cache align. */ + if (left_over%num_per_line) { + /* Odd num of objs per line - fixup. */ + num_per_line--; + left_over += size; + } + size += (left_over/num_per_line); + } + } + } else if (!(size%L1_CACHE_BYTES)) { + /* Size happens to cache align... */ + flags |= SLAB_HWCACHE_ALIGN; + align = L1_CACHE_BYTES; + } /* Cal size (in pages) of slabs, and the num of objs per slab. - * This could be made much more intelligent. */ - cachep->c_gfporder=0; + * This could be made much more intelligent. For now, try to avoid + * using high page-orders for slabs. When the gfp() funcs are more + * friendly towards high-order requests, this should be changed. + */ do { - unsigned long wastage; - wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, i, - flags, &left_over, &num); - if (!num) + size_t wastage; + unsigned int break_flag = 0; +cal_wastage: + wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, extra, + flags, &left_over, &cachep->c_num); + if (!cachep->c_num) goto next; - if (SLAB_PTR_IN_OBJ(flags)) + if (break_flag) break; + if (SLAB_BUFCTL(flags) && cachep->c_num > bufctl_limit) { + /* Oops, this num of objs will cause problems. */ + cachep->c_gfporder--; + break_flag++; + goto cal_wastage; + } if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER) break; - /* large num of objs is good, but v. large slabs are bad for the - * VM sub-system + + /* Large num of objs is good, but v. large slabs are currently + * bad for the gfp()s. */ - if (num <= SLAB_MIN_OBJS_PER_SLAB) { + if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) { if (cachep->c_gfporder < SLAB_BREAK_GFP_ORDER) goto next; } - /* stop caches with small objs having a large num of pages */ - if (left_over <= sizeof(kmem_slab_t)) + + /* Stop caches with small objs having a large num of pages. */ + if (left_over <= slab_align_size) break; if ((wastage*8) <= (PAGE_SIZE<c_gfporder)) - break; /* acceptable wastage */ + break; /* Acceptable internal fragmentation. */ next: cachep->c_gfporder++; } while (1); - cachep->c_num = num; - /* try with requested alignment, but reduce it if that will - * allow at least some alignment words + /* If the slab has been placed off-slab, and we have enough space then + * move it on-slab. This is at the expense of any extra colouring. */ - words++; - if (left_over < align) - align = (left_over / words) * words; - else if (!align && words <= left_over) { - /* no alignment given, but space enough - give one */ - align = words; - if (words == BYTES_PER_WORD) { - if (BYTES_PER_WORD*4 <= left_over) - align += align; - if (BYTES_PER_WORD*8 <= left_over) - align += align; + if ((flags & SLAB_CFLGS_OFF_SLAB) && !SLAB_BUFCTL(flags) && + left_over >= slab_align_size) { + flags &= ~SLAB_CFLGS_OFF_SLAB; + left_over -= slab_align_size; + } + + /* Offset must be a factor of the alignment. */ + offset += (align-1); + offset &= ~(align-1); + + /* Mess around with the offset alignment. */ + if (!left_over) { + offset = 0; + } else if (left_over < offset) { + offset = align; + if (flags & SLAB_HWCACHE_ALIGN) { + if (left_over < offset) + offset = 0; + } else { + /* Offset is BYTES_PER_WORD, and left_over is at + * least BYTES_PER_WORD. + */ + if (left_over >= (BYTES_PER_WORD*2)) { + offset >>= 1; + if (left_over >= (BYTES_PER_WORD*4)) + offset >>= 1; + } + } + } else if (!offset) { + /* No offset requested, but space enough - give one. */ + offset = left_over/align; + if (flags & SLAB_HWCACHE_ALIGN) { + if (offset >= 8) { + /* A large number of colours - use a larger alignment. */ + align <<= 1; + } + } else { + if (offset >= 10) { + align <<= 1; + if (offset >= 16) + align <<= 1; + } } + offset = align; } - cachep->c_align = align; #if 0 - printk("Size:%lu Orig:%lu Left:%lu Align %lu Pages:%d - %s\n", - size, cachep->c_org_size, left_over, align, 1<c_gfporder, name); - if (SLAB_OFF_SLAB(flags)) printk("OFF SLAB\n"); - if (SLAB_BUFCTL(flags)) printk("BUFCTL PTRS\n"); +printk("%s: Left_over:%d Align:%d Size:%d\n", name, left_over, offset, size); #endif - /* if the bufctl's are on-slab, c_offset does not inc the size of the bufctl */ + if ((cachep->c_align = (unsigned long) offset)) + cachep->c_colour = (left_over/offset); + cachep->c_colour_next = cachep->c_colour; + + /* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */ if (!SLAB_BUFCTL(flags)) - size -= kmem_bufctl_short_size; + size -= sizeof(kmem_bufctl_t); + else + cachep->c_index_cachep = + kmem_find_general_cachep(cachep->c_num*sizeof(kmem_bufctl_t)); + cachep->c_offset = (unsigned long) size; cachep->c_freep = kmem_slab_end(cachep); - cachep->c_flags = flags; - cachep->c_offset = size; cachep->c_firstp = kmem_slab_end(cachep); cachep->c_lastp = kmem_slab_end(cachep); + cachep->c_flags = flags; cachep->c_ctor = ctor; cachep->c_dtor = dtor; cachep->c_magic = SLAB_C_MAGIC; - cachep->c_inuse = 0; /* always zero */ - cachep->c_name = name; /* simply point to the name */ - - cachep->c_colour = 1; - if (align) - cachep->c_colour += (left_over/align); - cachep->c_colour_next = cachep->c_colour; + cachep->c_name = name; /* Simply point to the name. */ + spin_lock_init(&cachep->c_spinlock); - /* warn on dup cache names */ + /* Need the semaphore to access the chain. */ + down(&cache_chain_sem); searchp = &cache_cache; do { + /* The name field is constant - no lock needed. */ if (!strcmp(searchp->c_name, name)) { - printk(KERN_WARNING "%sDup name - %s\n", func_nm, name); + printk("%sDup name - %s\n", func_nm, name); break; } searchp = searchp->c_nextp; } while (searchp != &cache_cache); + + /* There is no reason to lock our new cache before we + * link it in - no one knows about it yet... + */ cachep->c_nextp = cache_cache.c_nextp; cache_cache.c_nextp = cachep; - return cachep; + up(&cache_chain_sem); opps: - printk(KERN_WARNING "%sOut of mem creating cache %s\n", func_nm, name); - return NULL; -} - -/* Destroy all the objs in a slab, and release the mem back to the system. - * Before calling the slab must have been unlinked - */ -static void -kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp, unsigned long flags) -{ - if (cachep->c_dtor || SLAB_BUFCTL(cachep->c_flags)) { - kmem_bufctl_t *bufp = slabp->s_freep; - - /* for each obj in slab... */ - while (bufp) { - kmem_bufctl_t *freep; - if (cachep->c_dtor) { - void *objp = ((void*)bufp)-cachep->c_offset; - if (SLAB_BUFCTL(cachep->c_flags)) - objp = bufp->buf_objp; - (cachep->c_dtor)(objp, cachep->c_org_size, flags); - } - freep = bufp; - bufp = bufp->buf_nextp; - if (SLAB_BUFCTL(cachep->c_flags)) - kmem_cache_free(&cache_bufctl, freep); - } - } - - slabp->s_magic = SLAB_MAGIC_UNALLOC; - kmem_freepages(cachep, slabp->s_mem); - if (SLAB_OFF_SLAB(cachep->c_flags)) - kmem_cache_free(&cache_slab, slabp); -} - -/* Destroy (remove) a cache. - * All objs in the cache should be inactive - */ -int -kmem_cache_destroy(kmem_cache_t *cachep) -{ - kmem_cache_t **searchp; - kmem_slab_t *slabp; - unsigned long save_flags; - -#if defined(SLAB_MGMT_CHECKS) - if (!cachep) { - printk(KERN_ERR "kmem_dest: NULL ptr\n"); - goto err_end; - } - - if (in_interrupt()) { - printk(KERN_ERR "kmem_dest: Called during int - %s\n", cachep->c_name); -err_end: - return 1; - } -#endif /* SLAB_MGMT_CHECKS */ - - /* unlink the cache from the chain of active caches. - * Note: the chain is never modified during an int - */ - searchp = &(cache_cache.c_nextp); - for (;*searchp != &cache_cache; searchp = &((*searchp)->c_nextp)) { - if (*searchp != cachep) - continue; - goto good_cache; - } - printk(KERN_ERR "kmem_dest: Invalid cache addr %p\n", cachep); - return 1; -good_cache: - /* disable cache so attempts to allocated from an int can - * be caught. - */ - save_flags(save_flags); - cli(); - if (cachep->c_freep != kmem_slab_end(cachep)) { - restore_flags(save_flags); - printk(KERN_ERR "kmem_dest: active cache - %s\n", cachep->c_name); - return 2; - } - *searchp = cachep->c_nextp; /* remove from cache chain */ - cachep->c_flags |= SLAB_CFLGS_RELEASED; - cachep->c_freep = kmem_slab_end(cachep); - if (cachep == clock_searchp) - clock_searchp = cachep->c_nextp; - restore_flags(save_flags); - - while ((slabp = cachep->c_firstp) != kmem_slab_end(cachep)) { - kmem_slab_unlink(slabp); - kmem_slab_destroy(cachep, slabp, 0); - } - - if (SLAB_BUFCTL(cachep->c_flags)) - kmem_cache_free(&cache_hash, cachep->c_hashp); - kmem_cache_free(&cache_cache, cachep); - return 0; + return cachep; } -/* Shrink a cache, ie. remove _all_ inactive slabs. - * Can be called when a user of a cache knows they are not going to be - * needing any new objs for a while. - * NOTE: This func is probably going to disappear - let me know if you - * are using it! +/* Shrink a cache. Releases as many slabs as possible for a cache. + * It is expected this function will be called by a module when it is + * unloaded. The cache is _not_ removed, this creates too many problems and + * the cache-structure does not take up much room. A module should keep its + * cache pointer(s) in unloaded memory, so when reloaded it knows the cache + * is available. To help debugging, a zero exit status indicates all slabs + * were released. */ int -kmem_cache_shrink(kmem_cache_t *cachep, int wait) +kmem_cache_shrink(kmem_cache_t *cachep) { + kmem_cache_t *searchp; kmem_slab_t *slabp; - unsigned long dtor_flags; - unsigned long save_flags, num_freed=0; + int ret; -#if defined(SLAB_MGMT_CHECKS) if (!cachep) { printk(KERN_ERR "kmem_shrink: NULL ptr\n"); - goto end; + return 2; } - if (in_interrupt()) { printk(KERN_ERR "kmem_shrink: Called during int - %s\n", cachep->c_name); - goto end; + return 2; } -#endif /* SLAB_MGMT_CHECKS */ - dtor_flags = 0; - if (!wait) /* not allowed to wait */ - dtor_flags = SLAB_DTOR_ATOMIC; - - save_flags(save_flags); - while (0) { - cli(); - slabp = cachep->c_lastp; - if (slabp == kmem_slab_end(cachep) || slabp->s_inuse) { - restore_flags(save_flags); - goto end; - } - kmem_slab_unlink(slabp); - if (cachep->c_freep == slabp) - cachep->c_freep = kmem_slab_end(cachep); - restore_flags(save_flags); - num_freed++; - kmem_slab_destroy(cachep, slabp, dtor_flags); - } -end: - return num_freed; -} + /* Find the cache in the chain of caches. */ + down(&cache_chain_sem); /* Semaphore is needed. */ + searchp = &cache_cache; + for (;searchp->c_nextp != &cache_cache; searchp = searchp->c_nextp) { + if (searchp->c_nextp != cachep) + continue; -/* Search for a slab whose objs are suitable for DMA. - * Note: since testing the first free slab (in __kmem_cache_alloc()), - * ints must not have been enabled! - */ -static inline kmem_slab_t * -kmem_cache_search_dma(kmem_cache_t *cachep) -{ - kmem_slab_t *slabp = cachep->c_freep->s_nextp; + /* Accessing clock_searchp is safe - we hold the mutex. */ + if (cachep == clock_searchp) + clock_searchp = cachep->c_nextp; + goto found; + } + up(&cache_chain_sem); + printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n", cachep); + return 2; +found: + /* Relase the sempahore before getting the cache-lock. This could + * mean multiple engines are shrinking the cache, but so what... + */ + up(&cache_chain_sem); + spin_lock_irq(&cachep->c_spinlock); - for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) { - if (!(slabp->s_flags & SLAB_SFLGS_DMA)) - continue; + /* If the cache is growing, stop shrinking. */ + while (!cachep->c_growing) { + slabp = cachep->c_lastp; + if (slabp->s_inuse || slabp == kmem_slab_end(cachep)) + break; kmem_slab_unlink(slabp); - kmem_slab_link_free(cachep, slabp); - return slabp; - } - return NULL; + spin_unlock_irq(&cachep->c_spinlock); + kmem_slab_destroy(cachep, slabp); + spin_lock_irq(&cachep->c_spinlock); + } + ret = 1; + if (cachep->c_lastp == kmem_slab_end(cachep)) + ret--; /* Cache is empty. */ + spin_unlock_irq(&cachep->c_spinlock); + return ret; } -/* get the mem for a slab mgmt obj */ +/* Get the mem for a slab mgmt obj. */ static inline kmem_slab_t * -kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, unsigned long local_flags, unsigned long offset) +kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags) { kmem_slab_t *slabp; if (SLAB_OFF_SLAB(cachep->c_flags)) { - /* slab mngmnt obj is off-slab */ - if (!(slabp = kmem_cache_alloc(&cache_slab, local_flags))) - return NULL; + /* Slab mgmt obj is off-slab. */ + slabp = kmem_cache_alloc(cache_slabp, local_flags); } else { - /* slab mngmnt at end of slab mem */ - slabp = objp + (PAGE_SIZE << cachep->c_gfporder); - slabp--; - if (!SLAB_PTR_IN_OBJ(cachep->c_flags)) { - /* A bit of extra help for the L1 cache; try to position the slab - * mgmnt struct at different offsets within the gap at the end - * of a slab. This helps avoid thrashing the h/w cache lines, - * that map to the end of a page, too much... - */ - unsigned long gap = cachep->c_offset; - if (!SLAB_BUFCTL(cachep->c_flags)) - gap += kmem_bufctl_short_size; - gap = (PAGE_SIZE << cachep->c_gfporder)-((gap*cachep->c_num)+offset+sizeof(*slabp)); - gap /= (sizeof(*slabp)/2); - gap *= (sizeof(*slabp)/2); - slabp = (((void*)slabp)-gap); - } + /* Slab mgmnt at end of slab mem, placed so that + * the position is 'coloured'. + */ + void *end; + end = objp + (cachep->c_num * cachep->c_offset); + if (!SLAB_BUFCTL(cachep->c_flags)) + end += (cachep->c_num * sizeof(kmem_bufctl_t)); + slabp = (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end); } - slabp->s_flags = slabp->s_inuse = slabp->s_jiffies = 0; + if (slabp) { + slabp->s_inuse = 0; + slabp->s_dma = 0; + slabp->s_index = NULL; + } return slabp; } -static inline int -kmem_cache_init_objs(kmem_cache_t *cachep, kmem_slab_t *slabp, void *objp, - unsigned long local_flags, unsigned long ctor_flags) +static inline void +kmem_cache_init_objs(kmem_cache_t * cachep, kmem_slab_t * slabp, void *objp, + unsigned long ctor_flags) { kmem_bufctl_t **bufpp = &slabp->s_freep; - unsigned long num = cachep->c_num; + unsigned long num = cachep->c_num-1; do { - if (SLAB_BUFCTL(cachep->c_flags)) { - if (!(*bufpp = kmem_cache_alloc(&cache_bufctl, local_flags))) { - kmem_slab_destroy(cachep, slabp, 0); - return 1; - } - (*bufpp)->buf_objp = objp; - (*bufpp)->buf_hashp = &cachep->c_hashp[kmem_hash(cachep, objp)]; +#if SLAB_DEBUG_SUPPORT + if (cachep->c_flags & SLAB_RED_ZONE) { + *((unsigned long*)(objp)) = SLAB_RED_MAGIC1; + objp += BYTES_PER_WORD; + *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1; } +#endif /* SLAB_DEBUG_SUPPORT */ + /* Constructors are not allowed to allocate memory from the same cache + * which they are a constructor for. Otherwise, deadlock. + * They must also be threaded. + */ if (cachep->c_ctor) - cachep->c_ctor(objp, cachep->c_org_size, ctor_flags); + cachep->c_ctor(objp, cachep, ctor_flags); +#if SLAB_DEBUG_SUPPORT + else if (cachep->c_flags & SLAB_POISION) { + /* need to poision the objs */ + kmem_poision_obj(cachep, objp); + } -#if defined(SLAB_DEBUG_SUPPORT) - if (cachep->c_flags & SLAB_RED_ZONE) - *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1; + if (cachep->c_flags & SLAB_RED_ZONE) { + if (*((unsigned long*)(objp+cachep->c_org_size)) != + SLAB_RED_MAGIC1) { + *((unsigned long*)(objp+cachep->c_org_size)) = + SLAB_RED_MAGIC1; + printk(KERN_ERR "kmem_init_obj: Bad rear redzone " + "after constructor - %s\n", cachep->c_name); + } + objp -= BYTES_PER_WORD; + if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) { + *((unsigned long*)(objp)) = SLAB_RED_MAGIC1; + printk(KERN_ERR "kmem_init_obj: Bad front redzone " + "after constructor - %s\n", cachep->c_name); + } + } #endif /* SLAB_DEBUG_SUPPORT */ objp += cachep->c_offset; - if (!SLAB_BUFCTL(cachep->c_flags)) { + if (!slabp->s_index) { *bufpp = objp; - objp += kmem_bufctl_short_size; - } - if (!SLAB_PTR_IN_OBJ(cachep->c_flags)) - (*bufpp)->buf_slabp = slabp; + objp += sizeof(kmem_bufctl_t); + } else + *bufpp = &slabp->s_index[num]; bufpp = &(*bufpp)->buf_nextp; - } while (--num); + } while (num--); + *bufpp = NULL; - return 0; } -/* Grow (by 1) the number of slabs within a cache. - * This is called by kmem_cache_alloc() when there are no - * inactive objs left in a cache +/* Grow (by 1) the number of slabs within a cache. This is called by + * kmem_cache_alloc() when there are no active objs left in a cache. */ -static void -kmem_cache_grow(kmem_cache_t *cachep, unsigned long flags) +static int +kmem_cache_grow(kmem_cache_t * cachep, int flags) { kmem_slab_t *slabp; + struct page *page; void *objp; - unsigned int offset, dma; - unsigned long ctor_flags, local_flags, save_flags; + size_t offset; + unsigned int dma, local_flags; + unsigned long ctor_flags; + unsigned long save_flags; + + /* Be lazy and only check for valid flags here, + * keeping it out of the critical path in kmem_cache_alloc(). + */ + if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) { + printk(KERN_WARNING "kmem_grow: Illegal flgs %X (correcting) - %s\n", + flags, cachep->c_name); + flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW); + } if (flags & SLAB_NO_GROW) - return; /* caller doesn't want us to grow */ + return 0; - save_flags(save_flags); /* The test for missing atomic flag is performed here, rather than * the more obvious place, simply to reduce the critical path length - * in kmem_cache_alloc(). If a caller is slightly mis-behaving, - * will eventually be caught here (where it matters) + * in kmem_cache_alloc(). If a caller is slightly mis-behaving they + * will eventually be caught here (where it matters). */ if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) { - static int count = 0; - if (count < 8) { - printk(KERN_ERR "kmem_grow: Called nonatomically from " - "int - %s\n", cachep->c_name); - count++; - } + printk(KERN_ERR "kmem_grow: Called nonatomically from int - %s\n", + cachep->c_name); flags &= ~SLAB_LEVEL_MASK; flags |= SLAB_ATOMIC; } - local_flags = (flags & SLAB_LEVEL_MASK); ctor_flags = SLAB_CTOR_CONSTRUCTOR; - if ((flags & SLAB_LEVEL_MASK) == SLAB_ATOMIC) { - /* Not allowed to sleep. - * Need to tell a constructor about this - it - * might need to know.... + local_flags = (flags & SLAB_LEVEL_MASK); + if (local_flags == SLAB_ATOMIC) { + /* Not allowed to sleep. Need to tell a constructor about + * this - it might need to know... */ ctor_flags |= SLAB_CTOR_ATOMIC; } - slabp = NULL; - /* get mem for the objs */ - if (!(objp = kmem_getpages(cachep, flags, &dma))) - goto opps1; + /* About to mess with non-constant members - lock. */ + spin_lock_irqsave(&cachep->c_spinlock, save_flags); - /* get colour for the slab, and cal the next value */ - cli(); - if (!(offset = --(cachep->c_colour_next))) + /* Get colour for the slab, and cal the next value. */ + if (!(offset = cachep->c_colour_next--)) cachep->c_colour_next = cachep->c_colour; - restore_flags(save_flags); offset *= cachep->c_align; + cachep->c_dflags = SLAB_CFLGS_GROWN; + + cachep->c_growing++; +re_try: + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + + /* A series of memory allocations for a new slab. + * Neither the cache-chain semaphore, or cache-lock, are + * held, but the incrementing c_growing prevents this + * this cache from being reaped or shrunk. + * Note: The cache could be selected in for reaping in + * kmem_cache_reap(), but when the final test is made the + * growing value will be seen. + */ + + /* Get mem for the objs. */ + if (!(objp = kmem_getpages(cachep, flags, &dma))) + goto failed; - /* get slab mgmt */ - if (!(slabp = kmem_cache_slabmgmt(cachep, objp, local_flags, offset))) - goto opps2; + /* Get slab mgmt. */ + if (!(slabp = kmem_cache_slabmgmt(cachep, objp+offset, local_flags))) + goto opps1; if (dma) - slabp->s_flags = SLAB_SFLGS_DMA; - + slabp->s_dma = 1; + if (SLAB_BUFCTL(cachep->c_flags)) { + slabp->s_index = kmem_cache_alloc(cachep->c_index_cachep, local_flags); + if (!slabp->s_index) + goto opps2; + } + + /* Nasty!!!!!! I hope this is OK. */ + dma = 1 << cachep->c_gfporder; + page = &mem_map[MAP_NR(objp)]; + do { + SLAB_SET_PAGE_CACHE(page, cachep); + SLAB_SET_PAGE_SLAB(page, slabp); + PageSetSlab(page); + page++; + } while (--dma); + + slabp->s_offset = offset; /* It will fit... */ + objp += offset; /* Address of first object. */ slabp->s_mem = objp; - objp += offset; /* address of first object */ /* For on-slab bufctls, c_offset is the distance between the start of * an obj and its related bufctl. For off-slab bufctls, c_offset is * the distance between objs in the slab. - * Reason for bufctl at end of obj (when on slab), as opposed to the front; - * if stored within the obj (has no state), and the obj is 'used' after being - * freed then (normally) most activity occurs at the beginning of the obj. - * By keeping the bufctl ptr away from the front, should reduce the chance of - * corruption. Also, allows easier alignment of objs onto cache lines when - * bufctl is not stored with the objs. - * Downsize; if, while an obj is active, a write is made past its end, then the - * bufctl will be corrupted :( */ - if (kmem_cache_init_objs(cachep, slabp, objp, local_flags, ctor_flags)) - goto no_objs; + kmem_cache_init_objs(cachep, slabp, objp, ctor_flags); + + spin_lock_irq(&cachep->c_spinlock); - cli(); - /* make slab active */ + /* Make slab active. */ slabp->s_magic = SLAB_MAGIC_ALLOC; kmem_slab_link_end(cachep, slabp); if (cachep->c_freep == kmem_slab_end(cachep)) cachep->c_freep = slabp; - restore_flags(save_flags); - return; -no_objs: - kmem_freepages(cachep, slabp->s_mem); + SLAB_STATS_INC_GROWN(cachep); + cachep->c_failures = 0; + cachep->c_growing--; + + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + return 1; opps2: - kmem_freepages(cachep, objp); + if (SLAB_OFF_SLAB(cachep->c_flags)) + kmem_cache_free(cache_slabp, slabp); opps1: - if (slabp && SLAB_OFF_SLAB(cachep->c_flags)) - kmem_cache_free(&cache_slab, slabp); - /* printk("kmem_alloc: Out of mem - %s\n", cachep->c_name); */ - return; + kmem_freepages(cachep, objp); +failed: + if (local_flags != SLAB_ATOMIC && cachep->c_gfporder) { + /* For large order (>0) slabs, we try again. + * Needed because the gfp() functions are not good at giving + * out contigious pages unless pushed (but do not push too hard). + */ + spin_lock_irq(&cachep->c_spinlock); + if (cachep->c_failures++ < 4 && cachep->c_freep == kmem_slab_end(cachep)) + goto re_try; + cachep->c_failures = 1; /* Memory is low, don't try as hard next time. */ + cachep->c_growing--; + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + } + return 0; +} + +static void +kmem_report_alloc_err(const char *str, kmem_cache_t * cachep) +{ + if (cachep) + SLAB_STATS_INC_ERR(cachep); /* this is atomic */ + printk(KERN_ERR "kmem_alloc: %s (name=%s)\n", + str, cachep ? cachep->c_name : "unknown"); } -#if defined(SLAB_DEBUG_SUPPORT) -/* Perform extra freeing checks. - * Currently, this check is only for caches that use bufctl structures - * within the slab. Those which use bufctl's from the internal cache - * have a reasonable check when the address is searched for. +static void +kmem_report_free_err(const char *str, void *objp, kmem_cache_t * cachep) +{ + if (cachep) + SLAB_STATS_INC_ERR(cachep); + printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n", + str, objp, cachep ? cachep->c_name : "unknown"); +} + +/* Search for a slab whose objs are suitable for DMA. + * Note: since testing the first free slab (in __kmem_cache_alloc()), + * ints must not have been enabled, or the cache-lock released! + */ +static inline kmem_slab_t * +kmem_cache_search_dma(kmem_cache_t * cachep) +{ + kmem_slab_t *slabp = cachep->c_freep->s_nextp; + + for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) { + if (!(slabp->s_dma)) + continue; + kmem_slab_unlink(slabp); + kmem_slab_link_free(cachep, slabp); + cachep->c_freep = slabp; + break; + } + return slabp; +} + +#if SLAB_DEBUG_SUPPORT +/* Perform extra freeing checks. Currently, this check is only for caches + * that use bufctl structures within the slab. Those which use bufctl's + * from the internal cache have a reasonable check when the address is + * searched for. Called with the cache-lock held. */ static void * -kmem_extra_free_checks(const kmem_cache_t *cachep, kmem_bufctl_t *search_bufp, - const kmem_bufctl_t *bufp, void * objp) +kmem_extra_free_checks(kmem_cache_t * cachep, kmem_bufctl_t *search_bufp, + kmem_bufctl_t *bufp, void * objp) { if (SLAB_BUFCTL(cachep->c_flags)) - goto end; + return objp; - /* check slab's freelist to see if this obj is there */ + /* Check slab's freelist to see if this obj is there. */ for (; search_bufp; search_bufp = search_bufp->buf_nextp) { if (search_bufp != bufp) continue; - printk(KERN_ERR "kmem_free: Double free detected during checking " - "%p - %s\n", objp, cachep->c_name); return NULL; } -end: return objp; } #endif /* SLAB_DEBUG_SUPPORT */ +/* Called with cache lock held. */ static inline void kmem_cache_full_free(kmem_cache_t *cachep, kmem_slab_t *slabp) { - if (!slabp->s_nextp->s_inuse) - return; /* at correct position */ - slabp->s_jiffies = jiffies; /* set release time */ - if (cachep->c_freep == slabp) - cachep->c_freep = slabp->s_nextp; - kmem_slab_unlink(slabp); - kmem_slab_link_end(cachep, slabp); - - return; + if (slabp->s_nextp->s_inuse) { + /* Not at correct position. */ + if (cachep->c_freep == slabp) + cachep->c_freep = slabp->s_nextp; + kmem_slab_unlink(slabp); + kmem_slab_link_end(cachep, slabp); + } } +/* Called with cache lock held. */ static inline void kmem_cache_one_free(kmem_cache_t *cachep, kmem_slab_t *slabp) { - if (slabp->s_nextp->s_inuse != cachep->c_num) { - cachep->c_freep = slabp; - return; + if (slabp->s_nextp->s_inuse == cachep->c_num) { + kmem_slab_unlink(slabp); + kmem_slab_link_free(cachep, slabp); } - kmem_slab_unlink(slabp); - kmem_slab_link_free(cachep, slabp); - return; + cachep->c_freep = slabp; } -/* Returns a ptr to an obj in the given cache. - * The obj is in the initial state (if there is one) - */ +/* Returns a ptr to an obj in the given cache. */ static inline void * -__kmem_cache_alloc(kmem_cache_t *cachep, unsigned long flags) +__kmem_cache_alloc(kmem_cache_t *cachep, int flags) { kmem_slab_t *slabp; kmem_bufctl_t *bufp; void *objp; unsigned long save_flags; - /* sanity check */ + /* Sanity check. */ if (!cachep) goto nul_ptr; - save_flags(save_flags); - cli(); - /* get slab alloc is to come from */ + spin_lock_irqsave(&cachep->c_spinlock, save_flags); +try_again: + /* Get slab alloc is to come from. */ slabp = cachep->c_freep; - /* magic is a sanity check _and_ says if we need a new slab */ + /* Magic is a sanity check _and_ says if we need a new slab. */ if (slabp->s_magic != SLAB_MAGIC_ALLOC) goto alloc_new_slab; -try_again: - /* DMA allocations are 'rare' - keep out of critical path */ + /* DMA requests are 'rare' - keep out of the critical path. */ if (flags & SLAB_DMA) goto search_dma; try_again_dma: + SLAB_STATS_INC_ALLOCED(cachep); + SLAB_STATS_INC_ACTIVE(cachep); + SLAB_STATS_SET_HIGH(cachep); slabp->s_inuse++; bufp = slabp->s_freep; slabp->s_freep = bufp->buf_nextp; - if (!SLAB_BUFCTL(cachep->c_flags)) { - /* Nasty - we want the 'if' to be taken in the common case */ - if (slabp->s_freep) { -short_finished: + if (slabp->s_freep) { +ret_obj: + if (!slabp->s_index) { + bufp->buf_slabp = slabp; objp = ((void*)bufp) - cachep->c_offset; - restore_flags(save_flags); -#if defined(SLAB_DEBUG_SUPPORT) +finished: + /* The lock is not needed by the red-zone or poision ops, and the + * obj has been removed from the slab. Should be safe to drop + * the lock here. + */ + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); +#if SLAB_DEBUG_SUPPORT if (cachep->c_flags & SLAB_RED_ZONE) goto red_zone; +ret_red: + if ((cachep->c_flags & SLAB_POISION) && kmem_check_poision_obj(cachep, objp)) + kmem_report_alloc_err("Bad poision", cachep); #endif /* SLAB_DEBUG_SUPPORT */ return objp; - } else { - cachep->c_freep = slabp->s_nextp; - goto short_finished; } + /* Update index ptr. */ + objp = ((bufp-slabp->s_index)*cachep->c_offset) + slabp->s_mem; + bufp->buf_objp = objp; + goto finished; } + cachep->c_freep = slabp->s_nextp; + goto ret_obj; - if (!slabp->s_freep) - cachep->c_freep = slabp->s_nextp; - - /* link into hash chain */ - objp = kmem_add_to_hash(cachep, bufp); - restore_flags(save_flags); -#if defined(SLAB_DEBUG_SUPPORT) - if (!(cachep->c_flags & SLAB_RED_ZONE)) -#endif /* SLAB_DEBUG_SUPPORT */ - return objp; - -#if defined(SLAB_DEBUG_SUPPORT) +#if SLAB_DEBUG_SUPPORT red_zone: - /* set alloc red-zone, and check old one */ + /* Set alloc red-zone, and check old one. */ + if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1) + kmem_report_alloc_err("Bad front redzone", cachep); + objp += BYTES_PER_WORD; if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1) - printk(KERN_ERR "kmem_alloc: Bad redzone %p - %s\n", - objp, cachep->c_name); - return objp; + kmem_report_alloc_err("Bad rear redzone", cachep); + goto ret_red; #endif /* SLAB_DEBUG_SUPPORT */ search_dma: - if (slabp->s_flags & SLAB_SFLGS_DMA) - goto try_again_dma; - /* need to search... */ - if ((slabp = kmem_cache_search_dma(cachep))) + if (slabp->s_dma || (slabp = kmem_cache_search_dma(cachep))!=kmem_slab_end(cachep)) goto try_again_dma; alloc_new_slab: - /* Either out of slabs, or magic number corruption */ - if (slabp != kmem_slab_end(cachep)) - goto bad_slab; - /* need a new slab */ - restore_flags(save_flags); - if (SLAB_RELEASED(cachep->c_flags)) { - printk(KERN_ERR "kmem_alloc: destroyed cache\n"); - goto end; - } - - /* Be lazy and only check for valid flags - * here (keeping it out of the critical path above) - */ - if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) { - printk(KERN_ERR "kmem_alloc: Illegal flgs %lX (correcting) - %s\n", - flags, cachep->c_name); - flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW); + /* Either out of slabs, or magic number corruption. */ + if (slabp == kmem_slab_end(cachep)) { + /* Need a new slab. Release the lock before calling kmem_cache_grow(). + * This allows objs to be released back into the cache while growing. + */ + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + if (kmem_cache_grow(cachep, flags)) { + /* Someone may have stolen our objs. Doesn't matter, we'll + * just come back here again. + */ + goto try_again; + } + /* Couldn't grow, but some objs may have been freed. */ + spin_lock_irq(&cachep->c_spinlock); + if (cachep->c_freep != kmem_slab_end(cachep)) + goto try_again; + } else { + /* Very serious error - maybe panic() here? */ + kmem_report_alloc_err("Bad slab magic (corrupt)", cachep); } - - kmem_cache_grow(cachep, flags); - cli(); - if ((slabp=cachep->c_freep) != kmem_slab_end(cachep)) - goto try_again; - restore_flags(save_flags); -end: + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); +err_exit: return NULL; -bad_slab: - /* v. serious error - maybe panic() here? */ - printk(KERN_ERR "kmem_alloc: Bad slab magic (corruption) - %s\n", - cachep->c_name); - goto end; nul_ptr: - printk(KERN_ERR "kmem_alloc: NULL ptr\n"); - goto end; + kmem_report_alloc_err("NULL ptr", NULL); + goto err_exit; } -/* Release an obj back to its cache. - * If the obj has a constructed state, it should be - * in this state _before_ it is released. +/* Release an obj back to its cache. If the obj has a constructed state, + * it should be in this state _before_ it is released. */ static inline void __kmem_cache_free(kmem_cache_t *cachep, void *objp) @@ -1200,128 +1461,137 @@ kmem_bufctl_t *bufp; unsigned long save_flags; - /* basic sanity checks */ - if (!cachep) - goto nul_cache; - if (!objp) - goto nul_obj; + /* Basic sanity checks. */ + if (!cachep || !objp) + goto null_addr; - save_flags(save_flags); -#if defined(SLAB_DEBUG_SUPPORT) +#if SLAB_DEBUG_SUPPORT + if (cachep->c_flags & SLAB_RED_ZONE) + objp -= BYTES_PER_WORD; +#endif /* SLAB_DEBUG_SUPPORT */ + + +#if SLAB_DEBUG_SUPPORT + /* A verify func is called without the cache-lock held. */ if (cachep->c_flags & SLAB_DEBUG_INITIAL) goto init_state_check; finished_initial: #endif /* SLAB_DEBUG_SUPPORT */ + spin_lock_irqsave(&cachep->c_spinlock, save_flags); + if (SLAB_BUFCTL(cachep->c_flags)) goto bufctl; - bufp = (kmem_bufctl_t *)(objp+cachep->c_offset); - /* get slab for the obj */ - if (SLAB_PTR_IN_OBJ(cachep->c_flags)) { - /* if SLAB_HIGH_PACK is undef, the below is optimised away */ - slabp = (kmem_slab_t *)((((unsigned long)objp)&PAGE_MASK)+PAGE_SIZE); - slabp--; - } else - slabp = (kmem_slab_t *) bufp->buf_slabp; + /* Get slab for the object. */ +#if 0 + /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects. + * Is this worth while? XXX + */ + if (cachep->c_flags & SLAB_HIGH_PACK) + slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]); + else +#endif + slabp = bufp->buf_slabp; - if (slabp->s_magic != SLAB_MAGIC_ALLOC) /* sanity check */ - goto bad_obj; - cli(); +check_magic: + if (slabp->s_magic != SLAB_MAGIC_ALLOC) /* Sanity check. */ + goto bad_slab; -#if defined(SLAB_DEBUG_SUPPORT) - if (cachep->c_flags & (SLAB_DEBUG_FREE|SLAB_RED_ZONE)) +#if SLAB_DEBUG_SUPPORT + if (cachep->c_flags & SLAB_DEBUG_FREE) goto extra_checks; +passed_extra: #endif /* SLAB_DEBUG_SUPPORT */ -passed_extra: - if (!slabp->s_inuse) /* sanity check */ - goto too_many; - bufp->buf_nextp = slabp->s_freep; - slabp->s_freep = bufp; - if (--(slabp->s_inuse)) { - if (bufp->buf_nextp) { - restore_flags(save_flags); - return; + if (slabp->s_inuse) { /* Sanity check. */ + SLAB_STATS_DEC_ACTIVE(cachep); + slabp->s_inuse--; + bufp->buf_nextp = slabp->s_freep; + slabp->s_freep = bufp; + if (slabp->s_inuse) { + if (bufp->buf_nextp) { + /* (hopefully) The most common case. */ +finished: +#if SLAB_DEBUG_SUPPORT + /* Need to poision the obj while holding the lock. */ + if (cachep->c_flags & SLAB_POISION) + kmem_poision_obj(cachep, objp); + if (cachep->c_flags & SLAB_RED_ZONE) + goto red_zone; +return_red: +#endif /* SLAB_DEBUG_SUPPORT */ + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + return; + } + kmem_cache_one_free(cachep, slabp); + goto finished; } - kmem_cache_one_free(cachep, slabp); - restore_flags(save_flags); - return; + kmem_cache_full_free(cachep, slabp); + goto finished; } - kmem_cache_full_free(cachep, slabp); - restore_flags(save_flags); + + /* Don't add to freelist. */ + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + kmem_report_free_err("free with no active objs", objp, cachep); return; bufctl: - /* Off-slab bufctls. Need to search hash for bufctl, and hence the slab. - * No 'extra' checks are performed for objs stored this way, finding - * the obj a check enough + /* No 'extra' checks are performed for objs stored this way, finding + * the obj is check enough. */ - cli(); - if ((bufp = kmem_remove_from_hash(cachep, objp))) { - slabp = (kmem_slab_t *) bufp->buf_slabp; -#if defined(SLAB_DEBUG_SUPPORT) - if (cachep->c_flags & SLAB_RED_ZONE) - goto red_zone; -#endif /* SLAB_DEBUG_SUPPORT */ - goto passed_extra; - } - restore_flags(save_flags); - printk(KERN_ERR "kmem_free: Either bad obj addr or double free: %p - %s\n", - objp, cachep->c_name); + slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]); + bufp = &slabp->s_index[(objp - slabp->s_mem)/cachep->c_offset]; + if (bufp->buf_objp == objp) + goto check_magic; + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + kmem_report_free_err("Either bad obj addr or double free", objp, cachep); return; -#if defined(SLAB_DEBUG_SUPPORT) -red_zone: - if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) { - /* Either write past end of the object, or a double free */ - printk(KERN_ERR "kmem_free: Bad redzone %p - %s\n", - objp, cachep->c_name); - } - goto passed_extra; +#if SLAB_DEBUG_SUPPORT init_state_check: - /* Need to call the slab's constructor so that - * the caller can perform a verify of its state (debugging) + /* Need to call the slab's constructor so the + * caller can perform a verify of its state (debugging). */ - cachep->c_ctor(objp, cachep->c_org_size, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); + cachep->c_ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); goto finished_initial; extra_checks: - if ((cachep->c_flags & SLAB_DEBUG_FREE) && - (objp != kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp))) { - restore_flags(save_flags); + if (!kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp)) { + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + kmem_report_free_err("Double free detected during checks", objp, cachep); return; } - if (cachep->c_flags & SLAB_RED_ZONE) - goto red_zone; goto passed_extra; -#endif /* SLAB_DEBUG_SUPPORT */ -bad_obj: - /* The addr of the slab doesn't contain the correct - * magic num +red_zone: + /* We hold the cache-lock while checking the red-zone, just incase + * some tries to take this obj from us... */ - if (slabp->s_magic == SLAB_MAGIC_UNALLOC) { - /* magic num says this is an unalloc slab */ - printk(KERN_ERR "kmem_free: obj %p from destroyed slab - %s\n", - objp, cachep->c_name); - return; + if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) { + /* Either write before start of obj, or a double free. */ + kmem_report_free_err("Bad front redzone", objp, cachep); } - printk(KERN_ERR "kmem_free: Bad obj %p - %s\n", objp, cachep->c_name); - return; -too_many: - /* don't add to freelist */ - restore_flags(save_flags); - printk(KERN_ERR "kmem_free: obj free for slab with no active objs - %s\n", - cachep->c_name); - return; -nul_obj: - printk(KERN_ERR "kmem_free: NULL obj - %s\n", cachep->c_name); + objp += BYTES_PER_WORD; + if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) { + /* Either write past end of obj, or a double free. */ + kmem_report_free_err("Bad rear redzone", objp, cachep); + } + goto return_red; +#endif /* SLAB_DEBUG_SUPPORT */ +bad_slab: + /* Slab doesn't contain the correct magic num. */ + if (slabp->s_magic == SLAB_MAGIC_DESTROYED) { + /* Magic num says this is a destroyed slab. */ + kmem_report_free_err("free from inactive slab", objp, cachep); + } else + kmem_report_free_err("Bad obj addr", objp, cachep); + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); return; -nul_cache: - printk(KERN_ERR "kmem_free: NULL cache ptr\n"); +null_addr: + kmem_report_free_err("NULL ptr", objp, cachep); return; } void * -kmem_cache_alloc(kmem_cache_t *cachep, unsigned long flags) +kmem_cache_alloc(kmem_cache_t *cachep, int flags) { return __kmem_cache_alloc(cachep, flags); } @@ -1333,163 +1603,248 @@ } void * -kmem_alloc(unsigned long size, unsigned long flags) +kmalloc(size_t size, int flags) { - cache_sizes_t *cachep = cache_sizes; + cache_sizes_t *csizep = cache_sizes; - for (; cachep->cs_size; cachep++) { - if (size > cachep->cs_size) + for (; csizep->cs_size; csizep++) { + if (size > csizep->cs_size) continue; - /* should the inline version be used here? */ - return kmem_cache_alloc(cachep->cs_cachep, flags); + return __kmem_cache_alloc(csizep->cs_cachep, flags); } - printk(KERN_ERR "kmem_alloc: Size (%lu) too large\n", size); + printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size); return NULL; } void -kmem_free(void *objp, unsigned long size) +kfree(void *objp) { - cache_sizes_t *cachep = cache_sizes; + struct page *page; + int nr; - for (; cachep->cs_size; cachep++) { - if (size > cachep->cs_size) - continue; - /* should the inline version be used here? */ - kmem_cache_free(cachep->cs_cachep, objp); - return; + if (!objp) + goto null_ptr; + nr = MAP_NR(objp); + if (nr >= max_mapnr) + goto null_ptr; + + /* Assume we own the page structure - hence no locking. + * If someone is misbehaving (eg. someone calling us with a bad + * address), then access to the page structure can race with the + * kmem_slab_destory() code. Need to add a spin_lock to each page + * structure, which would be useful in threading the gfp() functions.... + */ + page = &mem_map[nr]; + if (PageSlab(page)) { + kmem_cache_t *cachep; + + /* Here, we (again) assume the obj address is good. + * If it isn't, and happens to map onto another + * general-cache page which has no active objs, then + * we race.... + */ + cachep = SLAB_GET_PAGE_CACHE(page); + if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) { + __kmem_cache_free(cachep, objp); + return; + } + } +null_ptr: + printk(KERN_ERR "kfree: Bad obj %p\n", objp); + return; +} + +void +kfree_s(void *objp, size_t size) +{ + struct page *page; + int nr; + + if (!objp) + goto null_ptr; + nr = MAP_NR(objp); + if (nr >= max_mapnr) + goto null_ptr; + /* See comment in kfree() */ + page = &mem_map[nr]; + if (PageSlab(page)) { + kmem_cache_t *cachep; + /* See comment in kfree() */ + cachep = SLAB_GET_PAGE_CACHE(page); + if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) { + if (size <= cachep->c_org_size) { /* XXX better check */ + __kmem_cache_free(cachep, objp); + return; + } + } } - printk(KERN_ERR "kmem_free: Size (%lu) too large - strange\n", size); +null_ptr: + printk(KERN_ERR "kfree_s: Bad obj %p\n", objp); + return; } +kmem_cache_t * +kmem_find_general_cachep(size_t size) +{ + cache_sizes_t *csizep = cache_sizes; + + /* This function could be moved to the header-file, and + * made inline so consumers can quickly determine what + * cache-ptr they require. + */ + for (; csizep->cs_size; csizep++) { + if (size > csizep->cs_size) + continue; + break; + } + return csizep->cs_cachep; +} /* Called from try_to_free_page(). - * Ideal solution would have a weight for each cache, based on; - * o num of fully free slabs - * o if the objs have a constructor/deconstructor - * o length of time slabs have been fully free (ie. ageing) * This function _cannot_ be called within a int, but it * can be interrupted. */ int kmem_cache_reap(int pri, int dma, int wait) { - unsigned long dtor_flags = 0; - unsigned long best_jiffie; - unsigned long now; - int count = 8; - kmem_slab_t *best_slabp = NULL; - kmem_cache_t *best_cachep = NULL; kmem_slab_t *slabp; kmem_cache_t *searchp; - unsigned long save_flags; + kmem_cache_t *best_cachep; + unsigned long scan; + unsigned long reap_level; - /* 'pri' maps to the number of caches to examine, not the number of slabs. - * This avoids only checking the jiffies for slabs in one cache at the - * expensive spending more cycles + if (in_interrupt()) { + printk("kmem_cache_reap() called within int!\n"); + return 0; + } + scan = 9-pri; + reap_level = pri >> 1; + + /* We really need a test semphore op so we can avoid sleeping when + * !wait is true. */ - pri = (9 - pri); - if (!wait) /* not allowed to wait */ - dtor_flags = SLAB_DTOR_ATOMIC; + down(&cache_chain_sem); + best_cachep = NULL; searchp = clock_searchp; - save_flags(save_flags); - now = jiffies; - best_jiffie = now - (2*HZ); /* 2secs - avoid heavy thrashing */ - while (pri--) { - kmem_slab_t *local_slabp; - unsigned long local_jiffie; - if (searchp == &cache_cache) + do { + unsigned long full_free; + /* It's safe to test this without holding the cache-lock. */ + if (searchp->c_flags & SLAB_NO_REAP) goto next; - - /* sanity check for corruption */ + spin_lock_irq(&searchp->c_spinlock); + if (searchp->c_growing) + goto next_unlock; + if (searchp->c_dflags & SLAB_CFLGS_GROWN) { + searchp->c_dflags &= ~SLAB_CFLGS_GROWN; + goto next_unlock; + } + /* Sanity check for corruption of static values. */ if (searchp->c_inuse || searchp->c_magic != SLAB_C_MAGIC) { - printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", - searchp->c_name); + spin_unlock_irq(&searchp->c_spinlock); + printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", searchp->c_name); goto next; } + full_free = 0; - local_slabp = NULL; - local_jiffie = now - (2*HZ); - cli(); - /* As the fully free slabs, within a cache, have no particular - * order, we need to test them all. Infact, we only check 'count' - * slabs. + /* Count num of fully free slabs. Hopefully there are not many, + * we are holding the cache lock.... */ slabp = searchp->c_lastp; - for (;count && slabp != kmem_slab_end(searchp) && !slabp->s_inuse; slabp = slabp->s_prevp, count--) { - if (slabp->s_jiffies >= local_jiffie) - continue; - - /* weight caches with a con/decon */ - if ((searchp->c_ctor || searchp->c_dtor) && slabp->s_jiffies >= (local_jiffie - (2*HZ))) - continue; - - /* weight caches with high page orders. Avoids stressing the - * VM sub-system by reducing the frequency requests for a large - * num of contigious pages - */ - if (searchp->c_gfporder > 1 && slabp->s_jiffies >= (local_jiffie - (4*HZ))) - continue; + while (!slabp->s_inuse && slabp != kmem_slab_end(searchp)) { + slabp = slabp->s_prevp; + full_free++; + } + spin_unlock_irq(&searchp->c_spinlock); - local_jiffie = slabp->s_jiffies; - local_slabp = slabp; - if (!searchp->c_gfporder && (now-local_jiffie) >= (300*HZ)) { - /* an old, one page slab. Make a quick get away... */ - pri = 0; + if (full_free) { + if (full_free >= 10) { + best_cachep = searchp; break; } - } - if (local_slabp) { - if (!count || local_jiffie < best_jiffie) { - best_slabp = local_slabp; - best_jiffie = local_jiffie; + + /* Try to avoid slabs with constructors and/or + * more than one page per slab (as it can be difficult + * to get high orders from gfp()). + */ + if (pri == 6) { /* magic '6' from try_to_free_page() */ + if (searchp->c_ctor) + full_free--; + if (full_free && searchp->c_gfporder) + full_free--; + } + if (full_free >= reap_level) { + reap_level = full_free; best_cachep = searchp; - if (!count) - break; } } - restore_flags(save_flags); + goto next; +next_unlock: + spin_unlock_irq(&searchp->c_spinlock); next: searchp = searchp->c_nextp; - if (searchp == clock_searchp) - break; - count = 8; /* # of slabs at which we force a reap */ - } + } while (--scan && searchp != clock_searchp); - /* only move along with we didn't find an over allocated cache */ - if (count) - clock_searchp = clock_searchp->c_nextp; + clock_searchp = searchp; + up(&cache_chain_sem); - if (!best_slabp) + if (!best_cachep) { + /* couldn't find anthying to reap */ return 0; + } - cli(); - if (best_slabp->s_inuse) { - /* an object in our selected slab has been - * allocated. This souldn't happen v. often, so we - * simply fail - which isn't ideal but will do. - * NOTE: No test for the case where an obj has been - * allocated from the slab, and then freed. While - * this would change our idea of the best slab to - * reap, it's not worth the re-calculation effort. + spin_lock_irq(&best_cachep->c_spinlock); + if (!best_cachep->c_growing && !(slabp = best_cachep->c_lastp)->s_inuse && slabp != kmem_slab_end(best_cachep)) { + if (slabp == best_cachep->c_freep) + best_cachep->c_freep = kmem_slab_end(best_cachep); + kmem_slab_unlink(slabp); + SLAB_STATS_INC_REAPED(best_cachep); + + /* Safe to drop the lock. The slab is no longer linked to the + * cache. */ - restore_flags(save_flags); - return 0; + spin_unlock_irq(&best_cachep->c_spinlock); + kmem_slab_destroy(best_cachep, slabp); + return 1; } + spin_unlock_irq(&best_cachep->c_spinlock); + return 0; +} - if (best_cachep->c_freep == best_slabp) - best_cachep->c_freep = best_slabp->s_nextp; - kmem_slab_unlink(best_slabp); - - restore_flags(save_flags); - kmem_slab_destroy(best_cachep, best_slabp, dtor_flags); +#if SLAB_SELFTEST +/* A few v. simple tests */ +static void +kmem_self_test(void) +{ + kmem_cache_t *test_cachep; - return 1; + printk(KERN_INFO "kmem_test() - start\n"); + test_cachep = kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE|SLAB_POISION, NULL, NULL); + if (test_cachep) { + char *objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL); + if (objp) { + /* Write in front and past end, red-zone test. */ + *(objp-1) = 1; + *(objp+16) = 1; + kmem_cache_free(test_cachep, objp); + + /* Mess up poisioning. */ + *objp = 10; + objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL); + kmem_cache_free(test_cachep, objp); + + /* Mess up poisioning (again). */ + *objp = 10; + kmem_cache_shrink(test_cachep); + } + } + printk(KERN_INFO "kmem_test() - finished\n"); } +#endif /* SLAB_SELFTEST */ +#if defined(CONFIG_PROC_FS) /* /proc/slabinfo - * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab + * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab */ int get_slabinfo(char *buf) @@ -1497,31 +1852,62 @@ kmem_cache_t *cachep; kmem_slab_t *slabp; unsigned long active_objs; - unsigned long num_slabs, active_slabs; unsigned long save_flags; + unsigned long num_slabs; + unsigned long num_objs; int len=0; +#if SLAB_STATS + unsigned long active_slabs; +#endif /* SLAB_STATS */ - /* output format version, so at least we can change it without _too_ - * many complaints + __save_flags(save_flags); + + /* Output format version, so at least we can change it without _too_ + * many complaints. */ +#if SLAB_STATS + len = sprintf(buf, "slabinfo - version: 1.0 (statistics)\n"); +#else len = sprintf(buf, "slabinfo - version: 1.0\n"); - save_flags(save_flags); +#endif /* SLAB_STATS */ + down(&cache_chain_sem); cachep = &cache_cache; do { - active_slabs = num_slabs = active_objs = 0; - cli(); - for (slabp = cachep->c_firstp; - slabp != kmem_slab_end(cachep); - slabp = slabp->s_nextp) { - num_slabs++; +#if SLAB_STATS + active_slabs = 0; +#endif /* SLAB_STATS */ + num_slabs = active_objs = 0; + spin_lock_irq(&cachep->c_spinlock); + for (slabp = cachep->c_firstp; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) { active_objs += slabp->s_inuse; + num_slabs++; +#if SLAB_STATS if (slabp->s_inuse) active_slabs++; +#endif /* SLAB_STATS */ } - restore_flags(save_flags); - len += sprintf(buf+len, "%-20s%lu %lu %lu %lu %d\n", cachep->c_name, - active_objs, cachep->c_num*num_slabs, - active_slabs, num_slabs, 1<c_gfporder); + num_objs = cachep->c_num*num_slabs; +#if SLAB_STATS + { + unsigned long errors; + unsigned long high = cachep->c_high_mark; + unsigned long grown = cachep->c_grown; + unsigned long reaped = cachep->c_reaped; + unsigned long allocs = cachep->c_num_allocations; + errors = (unsigned long) atomic_read(&cachep->c_errors); + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + len += sprintf(buf+len, "%-16s %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n", + cachep->c_name, active_objs, num_objs, active_slabs, num_slabs, + (1<c_gfporder)*num_slabs, + high, allocs, grown, reaped, errors); + } +#else + spin_unlock_irqrestore(&cachep->c_spinlock, save_flags); + len += sprintf(buf+len, "%-17s %6lu %6lu\n", cachep->c_name, active_objs, num_objs); +#endif /* SLAB_STATS */ } while ((cachep = cachep->c_nextp) != &cache_cache); + up(&cache_chain_sem); + return len; } +#endif /* CONFIG_PROC_FS */ diff -u --recursive --new-file v2.1.37/linux/net/TUNABLE linux/net/TUNABLE --- v2.1.37/linux/net/TUNABLE Mon May 6 02:26:16 1996 +++ linux/net/TUNABLE Wed May 14 15:01:21 1997 @@ -1,6 +1,5 @@ -The following parameters should be tunable but aren't, until we get sysctl -or similar schemes. For now you'll have to dig around. Various CONFIG_xxx -items that should be configurable using sysctl omitted. +The following parameters should be tunable at compile time. Some of them +exist as sysctls too. This is far from complete @@ -54,8 +53,6 @@ MASQUERADE_EXPIRE_UDP Time we keep a UDP masquerade for (tunable) MAXVIFS Maximum mrouted vifs (1-32) MFC_LINES Lines in the multicast router cache (tunable) -SK_RMEM_MAX Max memory a socket owns for receive (tunable) -SK_WMEM_MAX Max memory a socket owns for send (tunable) NetROM parameters are tunable via an ioctl passing a struct diff -u --recursive --new-file v2.1.37/linux/net/core/scm.c linux/net/core/scm.c --- v2.1.37/linux/net/core/scm.c Tue May 13 22:41:21 1997 +++ linux/net/core/scm.c Thu May 15 14:43:52 1997 @@ -1,6 +1,7 @@ /* scm.c - Socket level control messages processing. * * Author: Alexey Kuznetsov, + * Alignment and value checking mods by Craig Metz * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -60,12 +61,12 @@ int num; struct scm_fp_list *fpl = *fplp; struct file **fpp; - int *fdp = (int*)cmsg->cmsg_data; + int *fdp = (int*)CMSG_DATA(cmsg); int i; - num = (cmsg->cmsg_len - sizeof(struct cmsghdr))/sizeof(int); + num = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))/sizeof(int); - if (!num) + if (num <= 0) return 0; if (num > SCM_MAX_FD) @@ -153,9 +154,9 @@ goto error; break; case SCM_CREDENTIALS: - if (cmsg->cmsg_len < sizeof(*cmsg) + sizeof(struct ucred)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) goto error; - memcpy(&p->creds, cmsg->cmsg_data, sizeof(struct ucred)); + memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); err = scm_check_creds(&p->creds); if (err) goto error; @@ -163,9 +164,9 @@ case SCM_CONNECT: if (scm_flags) goto error; - if (cmsg->cmsg_len < sizeof(*cmsg) + sizeof(int)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) goto error; - memcpy(&acc_fd, cmsg->cmsg_data, sizeof(int)); + memcpy(&acc_fd, CMSG_DATA(cmsg), sizeof(int)); p->sock = NULL; if (acc_fd != -1) { if (acc_fd < 0 || acc_fd >= NR_OPEN || @@ -207,7 +208,7 @@ void put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control; - int cmlen = sizeof(*cm) + len; + int cmlen = CMSG_LEN(len); int err; if (cm==NULL || msg->msg_controllen < sizeof(*cm)) { @@ -224,9 +225,9 @@ if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) - err = copy_to_user(cm->cmsg_data, data, cmlen - sizeof(*cm)); + err = copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)); if (!err) { - cmlen = CMSG_ALIGN(cmlen); + cmlen = CMSG_SPACE(len); msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } @@ -246,7 +247,7 @@ if (fdnum < fdmax) fdmax = fdnum; - for (i=0, cmfptr=(int*)cm->cmsg_data; i 0) { - int cmlen = i*sizeof(int) + sizeof(struct cmsghdr); + int cmlen = CMSG_LEN(i*sizeof(int)); if (!err) err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) @@ -265,7 +266,7 @@ if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { - cmlen = CMSG_ALIGN(cmlen); + cmlen = CMSG_SPACE(i*sizeof(int)); msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } diff -u --recursive --new-file v2.1.37/linux/net/core/skbuff.c linux/net/core/skbuff.c --- v2.1.37/linux/net/core/skbuff.c Tue May 13 22:41:21 1997 +++ linux/net/core/skbuff.c Wed May 14 15:01:21 1997 @@ -71,8 +71,8 @@ * Strings we don't want inline's duplicating */ -char *skb_push_errstr="skpush:under: %p:%d"; -char *skb_put_errstr ="skput:over: %p:%d"; +const char skb_push_errstr[]="skpush:under: %p:%d"; +const char skb_put_errstr[] ="skput:over: %p:%d"; void show_net_buffers(void) { diff -u --recursive --new-file v2.1.37/linux/net/ipv4/arp.c linux/net/ipv4/arp.c --- v2.1.37/linux/net/ipv4/arp.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/arp.c Wed May 14 15:01:21 1997 @@ -379,7 +379,7 @@ extern atomic_t hh_count; atomic_dec(&hh_count); #endif - kfree_s(hh, sizeof(struct(struct hh_cache))); + kfree_s(hh, sizeof(struct hh_cache)); } } } diff -u --recursive --new-file v2.1.37/linux/net/ipv4/icmp.c linux/net/ipv4/icmp.c --- v2.1.37/linux/net/ipv4/icmp.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/icmp.c Wed May 14 15:01:21 1997 @@ -1021,7 +1021,7 @@ { struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); - sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest); + sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source); if (!sk) return 0; if (sk->saddr != iph->saddr) return 0; if (sk->daddr != iph->daddr) return 0; @@ -1035,7 +1035,7 @@ { struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); - sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest); + sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source); if (!sk) return 0; if (sk->saddr != iph->saddr && __ip_chk_addr(iph->saddr) != IS_MYADDR) return 0; diff -u --recursive --new-file v2.1.37/linux/net/ipv4/ip_fragment.c linux/net/ipv4/ip_fragment.c --- v2.1.37/linux/net/ipv4/ip_fragment.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_fragment.c Wed May 14 15:01:21 1997 @@ -5,11 +5,14 @@ * * The IP fragmentation functionality. * + * Version: $Id: ip_fragment.c,v 1.21 1997/05/13 07:45:08 davem Exp $ + * * Authors: Fred N. van Kempen * Alan Cox * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. + * David S. Miller : Begin massive cleanup... */ #include @@ -29,31 +32,49 @@ #include #include -/* - * Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. +/* Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to measurably + * harm machine performance. */ - #define IPFRAG_HIGH_THRESH (256*1024) #define IPFRAG_LOW_THRESH (192*1024) -/* - * This fragment handler is a bit of a heap. On the other hand it works quite - * happily and handles things quite well. - */ +/* Describe an IP fragment. */ +struct ipfrag { + int offset; /* offset of fragment in IP datagram */ + int end; /* last byte of data in datagram */ + int len; /* length of this fragment */ + struct sk_buff *skb; /* complete received fragment */ + unsigned char *ptr; /* pointer into real fragment data */ + struct ipfrag *next; /* linked list pointers */ + struct ipfrag *prev; +}; + +/* Describe an entry in the "incomplete datagrams" queue. */ +struct ipq { + struct iphdr *iph; /* pointer to IP header */ + struct ipq *next; /* linked list pointers */ + struct ipfrag *fragments; /* linked list of received fragments */ + int len; /* total length of original datagram */ + short ihlen; /* length of the IP header */ + struct timer_list timer; /* when will this queue expire? */ + struct ipq **pprev; + struct device *dev; /* Device - for icmp replies */ +}; + +#define IPQ_HASHSZ 64 + +struct ipq *ipq_hash[IPQ_HASHSZ]; -static struct ipq *ipqueue = NULL; /* IP fragment queue */ +#define ipqhashfn(id, saddr, daddr, prot) \ + ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1)) atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ -char *in_ntoa(unsigned long in); +char *in_ntoa(__u32 in); -/* - * Memory Tracking Functions - */ - +/* Memory Tracking Functions. */ extern __inline__ void frag_kfree_skb(struct sk_buff *skb, int type) { atomic_sub(skb->truesize, &ip_frag_mem); @@ -69,28 +90,24 @@ extern __inline__ void *frag_kmalloc(int size, int pri) { void *vp=kmalloc(size,pri); + if(!vp) return NULL; atomic_add(size, &ip_frag_mem); return vp; } -/* - * Create a new fragment entry. - */ - -static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr) +/* Create a new fragment entry. */ +static struct ipfrag *ip_frag_create(int offset, int end, + struct sk_buff *skb, unsigned char *ptr) { struct ipfrag *fp; - unsigned long flags; fp = (struct ipfrag *) frag_kmalloc(sizeof(struct ipfrag), GFP_ATOMIC); - if (fp == NULL) - { + if (fp == NULL) { NETDEBUG(printk(KERN_ERR "IP: frag_create: no memory left !\n")); return(NULL); } - memset(fp, 0, sizeof(struct ipfrag)); /* Fill in the structure. */ fp->offset = offset; @@ -98,85 +115,63 @@ fp->len = end - offset; fp->skb = skb; fp->ptr = ptr; + fp->next = fp->prev = NULL; - /* - * Charge for the SKB as well. - */ - - save_flags(flags); - cli(); + /* Charge for the SKB as well. */ atomic_add(skb->truesize, &ip_frag_mem); - restore_flags(flags); return(fp); } - -/* - * Find the correct entry in the "incomplete datagrams" queue for - * this IP datagram, and return the queue entry address if found. +/* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and return the queue entry address if found. */ - -static struct ipq *ip_find(struct iphdr *iph) +static inline struct ipq *ip_find(struct iphdr *iph) { + __u16 id = iph->id; + __u32 saddr = iph->saddr; + __u32 daddr = iph->daddr; + __u8 protocol = iph->protocol; + unsigned int hash = ipqhashfn(id, saddr, daddr, protocol); struct ipq *qp; - struct ipq *qplast; - cli(); - qplast = NULL; - for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next) - { - if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr && - iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol) - { - del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */ - sti(); - return(qp); + start_bh_atomic(); + for(qp = ipq_hash[hash]; qp; qp = qp->next) { + if(qp->iph->id == id && + qp->iph->saddr == saddr && + qp->iph->daddr == daddr && + qp->iph->protocol == protocol) { + del_timer(&qp->timer); + break; } } - sti(); - return(NULL); + end_bh_atomic(); + return qp; } - -/* - * Remove an entry from the "incomplete datagrams" queue, either - * because we completed, reassembled and processed it, or because - * it timed out. +/* Remove an entry from the "incomplete datagrams" queue, either + * because we completed, reassembled and processed it, or because + * it timed out. */ - static void ip_free(struct ipq *qp) { struct ipfrag *fp; - struct ipfrag *xp; - - /* - * Stop the timer for this entry. - */ + /* Stop the timer for this entry. */ del_timer(&qp->timer); /* Remove this entry from the "incomplete datagrams" queue. */ - cli(); - if (qp->prev == NULL) - { - ipqueue = qp->next; - if (ipqueue != NULL) - ipqueue->prev = NULL; - } - else - { - qp->prev->next = qp->next; - if (qp->next != NULL) - qp->next->prev = qp->prev; - } + start_bh_atomic(); + if(qp->next) + qp->next->pprev = qp->pprev; + *qp->pprev = qp->next; + end_bh_atomic(); /* Release all fragment data. */ - fp = qp->fragments; - while (fp != NULL) - { - xp = fp->next; + while (fp) { + struct ipfrag *xp = fp->next; + frag_kfree_skb(fp->skb,FREE_READ); frag_kfree_s(fp, sizeof(struct ipfrag)); fp = xp; @@ -187,83 +182,65 @@ /* Finally, release the queue descriptor itself. */ frag_kfree_s(qp, sizeof(struct ipq)); - sti(); } - -/* - * Oops- a fragment queue timed out. Kill it and send an ICMP reply. - */ - +/* Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(unsigned long arg) { - struct ipq *qp; - - qp = (struct ipq *)arg; - - /* - * Send an ICMP "Fragment Reassembly Timeout" message. - */ + struct ipq *qp = (struct ipq *) arg; + /* Send an ICMP "Fragment Reassembly Timeout" message. */ ip_statistics.IpReasmTimeout++; ip_statistics.IpReasmFails++; - /* This if is always true... shrug */ - if(qp->fragments!=NULL) - icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, - ICMP_EXC_FRAGTIME, 0); + icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); - /* - * Nuke the fragment queue. - */ + /* Nuke the fragment queue. */ ip_free(qp); } -/* - * Memory limiting on fragments. Evictor trashes the oldest - * fragment queue until we are back under the low threshold +/* Memory limiting on fragments. Evictor trashes the oldest + * fragment queue until we are back under the low threshold. */ - static void ip_evictor(void) { - while(atomic_read(&ip_frag_mem)>IPFRAG_LOW_THRESH) - { - if(!ipqueue) + while(atomic_read(&ip_frag_mem)>IPFRAG_LOW_THRESH) { + int i; + + /* FIXME: Make LRU queue of frag heads. -DaveM */ + for(i = 0; i < IPQ_HASHSZ; i++) + if(ipq_hash[i]) + break; + if(i >= IPQ_HASHSZ) panic("ip_evictor: memcount"); - ip_free(ipqueue); + ip_free(ipq_hash[i]); } } -/* - * Add an entry to the 'ipq' queue for a newly received IP datagram. - * We will (hopefully :-) receive all other fragments of this datagram - * in time, so we just create a queue for this datagram, in which we - * will insert the received fragments at their respective positions. +/* Add an entry to the 'ipq' queue for a newly received IP datagram. + * We will (hopefully :-) receive all other fragments of this datagram + * in time, so we just create a queue for this datagram, in which we + * will insert the received fragments at their respective positions. */ - static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph) { struct ipq *qp; + unsigned int hash; int ihlen; qp = (struct ipq *) frag_kmalloc(sizeof(struct ipq), GFP_ATOMIC); - if (qp == NULL) - { + if (qp == NULL) { NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); return(NULL); } - memset(qp, 0, sizeof(struct ipq)); - - /* - * Allocate memory for the IP header (plus 8 octets for ICMP). - */ + /* Allocate memory for the IP header (plus 8 octets for ICMP). */ ihlen = iph->ihl * 4; + qp->iph = (struct iphdr *) frag_kmalloc(64 + 8, GFP_ATOMIC); - if (qp->iph == NULL) - { + if (qp->iph == NULL) { NETDEBUG(printk(KERN_ERR "IP: create: no memory left !\n")); frag_kfree_s(qp, sizeof(struct ipq)); - return(NULL); + return NULL; } memcpy(qp->iph, iph, ihlen + 8); @@ -279,21 +256,19 @@ add_timer(&qp->timer); /* Add this entry to the queue. */ - qp->prev = NULL; - cli(); - qp->next = ipqueue; - if (qp->next != NULL) - qp->next->prev = qp; - ipqueue = qp; - sti(); - return(qp); -} + hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); + start_bh_atomic(); + if((qp->next = ipq_hash[hash]) != NULL) + qp->next->pprev = &qp->next; + ipq_hash[hash] = qp; + qp->pprev = &ipq_hash[hash]; + end_bh_atomic(); -/* - * See if a fragment queue is complete. - */ + return qp; +} +/* See if a fragment queue is complete. */ static int ip_done(struct ipq *qp) { struct ipfrag *fp; @@ -301,13 +276,12 @@ /* Only possible if we received the final fragment. */ if (qp->len == 0) - return(0); + return 0; /* Check all fragment offsets to see if they connect. */ fp = qp->fragments; offset = 0; - while (fp != NULL) - { + while (fp) { if (fp->offset > offset) return(0); /* fragment(s) missing */ offset = fp->end; @@ -315,18 +289,15 @@ } /* All fragments are present. */ - return(1); + return 1; } - -/* - * Build a new IP datagram from all its fragments. +/* Build a new IP datagram from all its fragments. * - * FIXME: We copy here because we lack an effective way of handling lists - * of bits on input. Until the new skb data handling is in I'm not going - * to touch this with a bargepole. + * FIXME: We copy here because we lack an effective way of handling lists + * of bits on input. Until the new skb data handling is in I'm not going + * to touch this with a bargepole. */ - static struct sk_buff *ip_glue(struct ipq *qp) { struct sk_buff *skb; @@ -335,25 +306,23 @@ unsigned char *ptr; int count, len; - /* - * Allocate a new buffer for the datagram. - */ + /* Allocate a new buffer for the datagram. */ len = qp->ihlen + qp->len; - if(len>65535) - { - printk(KERN_INFO "Oversized IP packet from %s.\n", in_ntoa(qp->iph->saddr)); + if(len>65535) { + printk(KERN_INFO "Oversized IP packet from %s.\n", + in_ntoa(qp->iph->saddr)); ip_statistics.IpReasmFails++; ip_free(qp); return NULL; } - if ((skb = dev_alloc_skb(len)) == NULL) - { + if ((skb = dev_alloc_skb(len)) == NULL) { ip_statistics.IpReasmFails++; - NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp)); + NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " + "queue %p\n", qp)); ip_free(qp); - return(NULL); + return NULL; } /* Fill in the basic details. */ @@ -368,11 +337,10 @@ /* Copy the data portions of all fragments into the new buffer. */ fp = qp->fragments; - while(fp != NULL) - { - if(count+fp->len > skb->len) - { - NETDEBUG(printk(KERN_ERR "Invalid fragment list: Fragment over size.\n")); + while(fp) { + if(count+fp->len > skb->len) { + NETDEBUG(printk(KERN_ERR "Invalid fragment list: " + "Fragment over size.\n")); ip_free(qp); kfree_skb(skb,FREE_WRITE); ip_statistics.IpReasmFails++; @@ -396,14 +364,10 @@ iph->tot_len = htons((iph->ihl * 4) + count); ip_statistics.IpReasmOKs++; - return(skb); + return skb; } - -/* - * Process an incoming IP datagram fragment. - */ - +/* Process an incoming IP datagram fragment. */ struct sk_buff *ip_defrag(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; @@ -417,45 +381,37 @@ ip_statistics.IpReasmReqds++; - /* - * Start by cleaning up the memory - */ - + /* Start by cleaning up the memory. */ if(atomic_read(&ip_frag_mem)>IPFRAG_HIGH_THRESH) ip_evictor(); - /* - * Find the entry of this IP datagram in the "incomplete datagrams" queue. - */ - + + /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ qp = ip_find(iph); /* Is this a non-fragmented datagram? */ offset = ntohs(iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; - if (((flags & IP_MF) == 0) && (offset == 0)) - { - if (qp != NULL) - ip_free(qp); /* Fragmented frame replaced by full unfragmented copy */ - return(skb); + if (((flags & IP_MF) == 0) && (offset == 0)) { + if (qp != NULL) { + /* Fragmented frame replaced by full unfragmented copy. */ + ip_free(qp); + } + return skb; } offset <<= 3; /* offset is in 8-byte chunks */ ihl = iph->ihl * 4; - /* - * If the queue already existed, keep restarting its timer as long + /* If the queue already existed, keep restarting its timer as long * as we still are receiving fragments. Otherwise, create a fresh * queue entry. */ - - if (qp != NULL) - { + if (qp) { /* ANK. If the first fragment is received, * we should remember the correct IP header (with options) */ - if (offset == 0) - { + if (offset == 0) { qp->ihlen = ihl; memcpy(qp->iph, iph, ihl+8); } @@ -464,84 +420,59 @@ qp->timer.data = (unsigned long) qp; /* pointer to queue */ qp->timer.function = ip_expire; /* expire function */ add_timer(&qp->timer); - } - else - { - /* - * If we failed to create it, then discard the frame - */ - if ((qp = ip_create(skb, iph)) == NULL) - { + } else { + /* If we failed to create it, then discard the frame. */ + if ((qp = ip_create(skb, iph)) == NULL) { kfree_skb(skb, FREE_READ); ip_statistics.IpReasmFails++; return NULL; } } - /* - * Attempt to construct an oversize packet. - */ - - if(ntohs(iph->tot_len)+(int)offset>65535) - { - printk(KERN_INFO "Oversized packet received from %s\n",in_ntoa(iph->saddr)); + /* Attempt to construct an oversize packet. */ + if(ntohs(iph->tot_len)+(int)offset>65535) { + printk(KERN_INFO "Oversized packet received from %s\n", + in_ntoa(iph->saddr)); frag_kfree_skb(skb, FREE_READ); ip_statistics.IpReasmFails++; return NULL; } - /* - * Determine the position of this fragment. - */ - + /* Determine the position of this fragment. */ end = offset + ntohs(iph->tot_len) - ihl; - /* - * Point into the IP datagram 'data' part. - */ - + /* Point into the IP datagram 'data' part. */ ptr = skb->data + ihl; - /* - * Is this the final fragment? - */ - + /* Is this the final fragment? */ if ((flags & IP_MF) == 0) qp->len = end; - /* - * Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? */ - prev = NULL; - for(next = qp->fragments; next != NULL; next = next->next) - { + for(next = qp->fragments; next != NULL; next = next->next) { if (next->offset >= offset) break; /* bingo! */ prev = next; } - /* - * We found where to put this one. - * Check for overlap with preceding fragment, and, if needed, - * align things so that any overlaps are eliminated. + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. */ - if (prev != NULL && offset < prev->end) - { + if (prev != NULL && offset < prev->end) { i = prev->end - offset; offset += i; /* ptr into datagram */ ptr += i; /* ptr into fragment data */ } - /* - * Look for overlap with succeeding segments. + /* Look for overlap with succeeding segments. * If we can merge fragments, do it. */ - - for(tmp=next; tmp != NULL; tmp = tfp) - { + for(tmp=next; tmp != NULL; tmp = tfp) { tfp = tmp->next; if (tmp->offset >= end) break; /* no overlaps at all */ @@ -550,12 +481,11 @@ tmp->len -= i; /* so reduce size of */ tmp->offset += i; /* next fragment */ tmp->ptr += i; - /* - * If we get a frag size of <= 0, remove it and the packet - * that it goes with. + + /* If we get a frag size of <= 0, remove it and the packet + * that it goes with. */ - if (tmp->len <= 0) - { + if (tmp->len <= 0) { if (tmp->prev != NULL) tmp->prev->next = tmp->next; else @@ -564,26 +494,20 @@ if (tmp->next != NULL) tmp->next->prev = tmp->prev; - next=tfp; /* We have killed the original next frame */ + /* We have killed the original next frame. */ + next = tfp; frag_kfree_skb(tmp->skb,FREE_READ); frag_kfree_s(tmp, sizeof(struct ipfrag)); } } - /* - * Insert this fragment in the chain of fragments. - */ - + /* Insert this fragment in the chain of fragments. */ tfp = NULL; tfp = ip_frag_create(offset, end, skb, ptr); - /* - * No memory to save the fragment - so throw the lot - */ - - if (!tfp) - { + /* No memory to save the fragment - so throw the lot. */ + if (!tfp) { frag_kfree_skb(skb, FREE_READ); return NULL; } @@ -597,16 +521,14 @@ if (next != NULL) next->prev = tfp; - /* - * OK, so we inserted this new fragment into the chain. - * Check if we now have a full IP datagram which we can - * bump up to the IP layer... - */ - - if (ip_done(qp)) - { - skb2 = ip_glue(qp); /* glue together the fragments */ + /* OK, so we inserted this new fragment into the chain. + * Check if we now have a full IP datagram which we can + * bump up to the IP layer... + */ + if (ip_done(qp)) { + /* Glue together the fragments. */ + skb2 = ip_glue(qp); return(skb2); } - return(NULL); + return NULL; } diff -u --recursive --new-file v2.1.37/linux/net/ipv4/ip_options.c linux/net/ipv4/ip_options.c --- v2.1.37/linux/net/ipv4/ip_options.c Sun Jan 19 05:47:28 1997 +++ linux/net/ipv4/ip_options.c Wed May 14 15:01:21 1997 @@ -505,7 +505,7 @@ opt->is_data = 1; opt->is_setbyuser = 1; if (optlen && ip_options_compile(opt, NULL)) { - kfree_s(opt, sizeof(struct options) + optlen); + kfree_s(opt, sizeof(struct ip_options) + optlen); return -EINVAL; } *optp = opt; diff -u --recursive --new-file v2.1.37/linux/net/ipv4/ip_sockglue.c linux/net/ipv4/ip_sockglue.c --- v2.1.37/linux/net/ipv4/ip_sockglue.c Wed Apr 23 19:01:29 1997 +++ linux/net/ipv4/ip_sockglue.c Thu May 15 14:43:52 1997 @@ -126,26 +126,24 @@ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (cmsg->cmsg_level != SOL_IP) continue; - switch (cmsg->cmsg_type) - { + switch (cmsg->cmsg_type) { case IP_LOCALADDR: - if (cmsg->cmsg_len < sizeof(struct in_addr)+sizeof(*cmsg)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_addr))) return -EINVAL; - memcpy(&ipc->addr, cmsg->cmsg_data, 4); + memcpy(&ipc->addr, CMSG_DATA(cmsg), sizeof(struct in_addr)); break; case IP_RETOPTS: - err = cmsg->cmsg_len - sizeof(*cmsg); - err = ip_options_get(&ipc->opt, cmsg->cmsg_data, - err < 40 ? err : 40, 0); + err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0); if (err) return err; break; case IP_TXINFO: { struct in_pktinfo *info; - if (cmsg->cmsg_len < sizeof(*info)+sizeof(*cmsg)) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) return -EINVAL; - info = (struct in_pktinfo*)cmsg->cmsg_data; + info = (struct in_pktinfo *)CMSG_DATA(cmsg); if (info->ipi_ifindex && !devp) return -EINVAL; if ((*devp = dev_get_by_index(info->ipi_ifindex)) == NULL) @@ -212,7 +210,7 @@ sk->opt = opt; sti(); if (old_opt) - kfree_s(old_opt, sizeof(struct optlen) + old_opt->optlen); + kfree_s(old_opt, sizeof(struct ip_options) + old_opt->optlen); return 0; } case IP_RXINFO: diff -u --recursive --new-file v2.1.37/linux/net/ipv4/sysctl_net_ipv4.c linux/net/ipv4/sysctl_net_ipv4.c --- v2.1.37/linux/net/ipv4/sysctl_net_ipv4.c Wed Apr 23 19:01:29 1997 +++ linux/net/ipv4/sysctl_net_ipv4.c Wed May 14 15:01:21 1997 @@ -41,16 +41,17 @@ extern int sysctl_tcp_tsack; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; +extern int sysctl_syn_retries; extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp); -struct ipv4_config ipv4_config = { 1, 1, 1, 1, }; +struct ipv4_config ipv4_config = { 1, 1, 1, 0, }; #ifdef CONFIG_SYSCTL struct ipv4_config ipv4_def_router_config = { 0, 1, 1, 1, 1, 1, 1, }; -struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 1, }; +struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 0, }; int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp) @@ -144,6 +145,8 @@ {NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects", &ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_SYN_RETRIES, "tcp_syn_retries", + &sysctl_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff -u --recursive --new-file v2.1.37/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c --- v2.1.37/linux/net/ipv4/tcp.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/tcp.c Wed May 14 15:01:21 1997 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.63 1997/04/29 09:38:33 mj Exp $ + * Version: $Id: tcp.c,v 1.65 1997/05/06 09:31:43 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -1360,7 +1360,10 @@ case TCP_CLOSE: case TCP_LISTEN: break; - case TCP_LAST_ACK: /* Could have shutdown() then close() */ + case TCP_LAST_ACK: /* Could have shutdown() then close() + * (but don't do send_fin again!) */ + ns=TCP_LAST_ACK; + break; case TCP_CLOSE_WAIT: /* They have FIN'd us. We send our FIN and wait only for the ACK */ ns=TCP_LAST_ACK; @@ -1662,7 +1665,7 @@ { tcp_openreq_cachep = kmem_cache_create("tcp_open_request", sizeof(struct open_request), - sizeof(long)*8, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!tcp_openreq_cachep) panic("tcp_init: Cannot alloc open_request cache."); diff -u --recursive --new-file v2.1.37/linux/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c --- v2.1.37/linux/net/ipv4/tcp_ipv4.c Tue May 13 22:41:24 1997 +++ linux/net/ipv4/tcp_ipv4.c Wed May 14 15:01:21 1997 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.42 1997/04/29 16:09:46 schenk Exp $ + * Version: $Id: tcp_ipv4.c,v 1.43 1997/05/06 09:31:44 davem Exp $ * * IPv4 specific functions * @@ -888,7 +888,7 @@ { if(!req->sk && req->af.v4_req.opt) kfree_s(req->af.v4_req.opt, - sizeof(struct options) + req->af.v4_req.opt->optlen); + sizeof(struct ip_options) + req->af.v4_req.opt->optlen); } static struct or_calltable or_ipv4 = { diff -u --recursive --new-file v2.1.37/linux/net/ipv4/tcp_timer.c linux/net/ipv4/tcp_timer.c --- v2.1.37/linux/net/ipv4/tcp_timer.c Wed Apr 23 19:01:30 1997 +++ linux/net/ipv4/tcp_timer.c Wed May 14 15:01:21 1997 @@ -22,6 +22,8 @@ #include +int sysctl_syn_retries = TCP_SYN_RETRIES; + static void tcp_sltimer_handler(unsigned long); static void tcp_syn_recv_timer(unsigned long); static void tcp_keepalive(unsigned long data); @@ -178,7 +180,7 @@ } /* Have we tried to SYN too many times (repent repent 8)) */ - if(tp->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT) { + if(tp->retransmits > sysctl_syn_retries && sk->state==TCP_SYN_SENT) { if(sk->err_soft) sk->err=sk->err_soft; else diff -u --recursive --new-file v2.1.37/linux/net/ipv4/utils.c linux/net/ipv4/utils.c --- v2.1.37/linux/net/ipv4/utils.c Mon Oct 28 04:29:31 1996 +++ linux/net/ipv4/utils.c Wed May 14 15:01:21 1997 @@ -46,7 +46,7 @@ * Display an IP address in readable format. */ -char *in_ntoa(unsigned long in) +char *in_ntoa(__u32 in) { static char buff[18]; char *p; @@ -62,7 +62,7 @@ * Convert an ASCII string to binary IP. */ -unsigned long in_aton(const char *str) +__u32 in_aton(const char *str) { unsigned long l; unsigned int val; diff -u --recursive --new-file v2.1.37/linux/net/ipv6/addrconf.c linux/net/ipv6/addrconf.c --- v2.1.37/linux/net/ipv6/addrconf.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/addrconf.c Wed May 14 15:01:21 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: addrconf.c,v 1.19 1997/04/29 09:38:41 mj Exp $ + * $Id: addrconf.c,v 1.20 1997/05/07 09:40:04 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -1274,6 +1274,7 @@ for (idev = inet6_dev_lst[i]; idev; ) { struct inet6_dev *back; + addrconf_ifdown(idev->dev); back = idev; idev = idev->next; kfree(back); diff -u --recursive --new-file v2.1.37/linux/net/ipv6/af_inet6.c linux/net/ipv6/af_inet6.c --- v2.1.37/linux/net/ipv6/af_inet6.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/af_inet6.c Wed May 14 15:01:21 1997 @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.17 1997/04/29 09:38:39 mj Exp $ + * $Id: af_inet6.c,v 1.18 1997/05/07 09:40:12 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -458,12 +458,26 @@ #endif /* CONFIG_PROC_FS */ #ifdef MODULE +int ipv6_unload(void) +{ + return 0; +} +#endif + +#ifdef MODULE int init_module(void) #else __initfunc(void inet6_proto_init(struct net_proto *pro)) #endif { struct sk_buff *dummy_skb; + +#ifdef MODULE + if (!mod_member_present(&__this_module, can_unload)) + return -EINVAL; + + __this_module.can_unload = &ipv6_unload; +#endif printk(KERN_INFO "IPv6 v0.2 for NET3.037\n"); diff -u --recursive --new-file v2.1.37/linux/net/ipv6/datagram.c linux/net/ipv6/datagram.c --- v2.1.37/linux/net/ipv6/datagram.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/datagram.c Thu May 15 14:43:52 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: datagram.c,v 1.11 1997/05/03 00:58:25 davem Exp $ + * $Id: datagram.c,v 1.12 1997/05/15 18:55:09 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -39,7 +39,7 @@ src_info.ipi6_ifindex = skb->dev->ifindex; ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); - put_cmsg(msg, SOL_IPV6, IPV6_RXINFO, sizeof(src_info), &src_info); + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxhlim) { @@ -67,20 +67,18 @@ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (cmsg->cmsg_level != SOL_IPV6) { - printk(KERN_DEBUG "cmsg_level %d\n", cmsg->cmsg_level); + printk(KERN_DEBUG "invalid cmsg_level %d\n", cmsg->cmsg_level); continue; } switch (cmsg->cmsg_type) { - - case IPV6_TXINFO: - if (cmsg->cmsg_len < (sizeof(struct cmsghdr) + - sizeof(struct in6_pktinfo))) { + case IPV6_PKTINFO: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; } - src_info = (struct in6_pktinfo *) cmsg->cmsg_data; + src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (src_info->ipi6_ifindex) { int index = src_info->ipi6_ifindex; @@ -104,18 +102,13 @@ break; case IPV6_RXSRCRT: - - len = cmsg->cmsg_len; - - len -= sizeof(struct cmsghdr); - - /* validate option length */ - if (len < sizeof(struct ipv6_rt_hdr)) { + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) { err = -EINVAL; goto exit_f; } - rthdr = (struct ipv6_rt_hdr *) cmsg->cmsg_data; + len = cmsg->cmsg_len - sizeof(struct cmsghdr); + rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); /* * TYPE 0 @@ -142,21 +135,16 @@ break; case IPV6_HOPLIMIT: - - len = cmsg->cmsg_len; - len -= sizeof(struct cmsghdr); - - if (len < sizeof(int)) { + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { err = -EINVAL; goto exit_f; } - *hlimit = *((int *) cmsg->cmsg_data); + *hlimit = *(int *)CMSG_DATA(cmsg); break; default: - printk(KERN_DEBUG "invalid cmsg type: %d\n", - cmsg->cmsg_type); + printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); err = -EINVAL; break; }; diff -u --recursive --new-file v2.1.37/linux/net/ipv6/ip6_input.c linux/net/ipv6/ip6_input.c --- v2.1.37/linux/net/ipv6/ip6_input.c Thu Mar 27 14:40:16 1997 +++ linux/net/ipv6/ip6_input.c Wed May 14 15:01:21 1997 @@ -6,7 +6,7 @@ * Pedro Roque * Ian P. Morris * - * $Id: ip6_input.c,v 1.4 1997/03/18 18:24:35 davem Exp $ + * $Id: ip6_input.c,v 1.6 1997/05/11 16:06:52 davem Exp $ * * Based in linux/net/ipv4/ip_input.c * @@ -133,7 +133,7 @@ struct tlvtype_proc *curr; while ((hdr=(struct ipv6_tlvtype *)skb->h.raw) != lastopt) { - switch (hdr->type & 0x3F) { + switch (hdr->type) { case 0: /* TLV encoded Pad1 */ skb->h.raw++; break; @@ -144,7 +144,7 @@ default: /* Other TLV code so scan list */ for (curr=procs; curr->type != 255; curr++) { - if (curr->type == (hdr->type & 0x3F)) { + if (curr->type == (hdr->type)) { curr->func(skb, dev, nhptr, opt); skb->h.raw += hdr->len+2; break; @@ -166,10 +166,12 @@ struct sk_buff *skb=*skb_ptr; struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw; int res = 0; + void *lastopt=skb->h.raw+hdr->hdrlen+sizeof(struct ipv6_destopt_hdr); - if (ip6_parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt, - skb->h.raw+hdr->hdrlen)) + skb->h.raw += sizeof(struct ipv6_destopt_hdr); + if (ip6_parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt, lastopt)) res = hdr->nexthdr; + skb->h.raw+=hdr->hdrlen; return res; } diff -u --recursive --new-file v2.1.37/linux/net/ipv6/ipv6_sockglue.c linux/net/ipv6/ipv6_sockglue.c --- v2.1.37/linux/net/ipv6/ipv6_sockglue.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/ipv6_sockglue.c Thu May 15 14:43:52 1997 @@ -7,7 +7,7 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.12 1997/04/29 09:38:45 mj Exp $ + * $Id: ipv6_sockglue.c,v 1.13 1997/05/15 18:55:10 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -121,7 +121,7 @@ } break; - case IPV6_RXINFO: + case IPV6_PKTINFO: np->rxinfo = val; retv = 0; break; diff -u --recursive --new-file v2.1.37/linux/net/ipv6/mcast.c linux/net/ipv6/mcast.c --- v2.1.37/linux/net/ipv6/mcast.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/mcast.c Wed May 14 15:01:21 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: mcast.c,v 1.9 1997/04/29 09:38:46 mj Exp $ + * $Id: mcast.c,v 1.10 1997/05/07 09:40:22 davem Exp $ * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * @@ -187,7 +187,8 @@ hash = ipv6_addr_hash(addr); for (mc = inet6_mcast_lst[hash]; mc; mc = mc->next) { - if (ipv6_addr_cmp(&mc->mca_addr, addr) == 0) { + if ((ipv6_addr_cmp(&mc->mca_addr, addr) == 0) && + (mc->dev->ifindex == dev->ifindex)) { atomic_inc(&mc->mca_users); return 0; } diff -u --recursive --new-file v2.1.37/linux/net/netsyms.c linux/net/netsyms.c --- v2.1.37/linux/net/netsyms.c Tue May 13 22:41:24 1997 +++ linux/net/netsyms.c Wed May 14 15:01:21 1997 @@ -73,9 +73,6 @@ #include #endif -extern char *skb_push_errstr; -extern char *skb_put_errstr; - /* Skbuff symbols. */ EXPORT_SYMBOL(skb_push_errstr); EXPORT_SYMBOL(skb_put_errstr); diff -u --recursive --new-file v2.1.37/linux/net/socket.c linux/net/socket.c --- v2.1.37/linux/net/socket.c Tue May 13 22:41:25 1997 +++ linux/net/socket.c Wed May 14 15:01:21 1997 @@ -39,6 +39,8 @@ * for sockets. May have errors at the * moment. * Kevin Buhr : Fixed the dumb errors in the above. + * Andi Kleen : Some small cleanups, optimizations, + * and fixed a copy_from_user() bug. * * * This program is free software; you can redistribute it and/or @@ -180,7 +182,7 @@ * "fromlen shall refer to the value before truncation.." * 1003.1g */ - return put_user(klen, ulen); + return __put_user(klen, ulen); } /* @@ -365,6 +367,7 @@ if (size==0) /* Match SYS5 behaviour */ return 0; + /* FIXME: I think this can be removed now. */ if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0) return err; msg.msg_name=NULL; @@ -398,7 +401,8 @@ if(size==0) /* Match SYS5 behaviour */ return 0; - + + /* FIXME: I think this can be removed now */ if ((err=verify_area(VERIFY_READ,ubuf,size))<0) return err; @@ -797,7 +801,6 @@ { if (!(newsock = sock_alloc())) { - printk(KERN_WARNING "accept: no more sockets\n"); err=-EMFILE; goto out; } @@ -1130,6 +1133,7 @@ struct msghdr msg_sys; int err= -EINVAL; int total_len; + unsigned char *ctl_buf = ctl; lock_kernel(); @@ -1149,22 +1153,26 @@ if (msg_sys.msg_controllen) { - if (msg_sys.msg_controllen > sizeof(ctl)) + /* XXX We just limit the buffer and assume that the + * skbuff accounting stops it from going too far. + * I hope this is correct. + */ + if (msg_sys.msg_controllen > sizeof(ctl) && + msg_sys.msg_controllen <= 256) { - char *tmp = kmalloc(msg_sys.msg_controllen, GFP_KERNEL); - if (tmp == NULL) + ctl_buf = kmalloc(msg_sys.msg_controllen, GFP_KERNEL); + if (ctl_buf == NULL) { err = -ENOBUFS; goto failed2; } - err = copy_from_user(tmp, msg_sys.msg_control, msg_sys.msg_controllen); - msg_sys.msg_control = tmp; - } else { - err = copy_from_user(ctl, msg_sys.msg_control, msg_sys.msg_controllen); - msg_sys.msg_control = ctl; } - if (err) + if (copy_from_user(ctl_buf, msg_sys.msg_control, + msg_sys.msg_controllen)) { + err = -EFAULT; goto failed; + } + msg_sys.msg_control = ctl_buf; } msg_sys.msg_flags = flags; if (current->files->fd[fd]->f_flags & O_NONBLOCK) @@ -1177,8 +1185,8 @@ } failed: - if (msg_sys.msg_controllen && msg_sys.msg_control != ctl) - kfree(msg_sys.msg_control); + if (ctl_buf != ctl) + kfree_s(ctl_buf, msg_sys.msg_controllen); failed2: if (msg_sys.msg_iov != iov) kfree(msg_sys.msg_iov); @@ -1240,7 +1248,6 @@ if (current->files->fd[fd]->f_flags&O_NONBLOCK) flags |= MSG_DONTWAIT; - if ((sock = sockfd_lookup(fd, &err))!=NULL) { err=sock_recvmsg(sock, &msg_sys, total_len, flags); @@ -1253,9 +1260,12 @@ if (uaddr != NULL && err>=0) err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); - if (err>=0 && (put_user(msg_sys.msg_flags, &msg->msg_flags) || - put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, &msg->msg_controllen))) - err = -EFAULT; + if (err>=0) { + err = __put_user(msg_sys.msg_flags, &msg->msg_flags); + if (!err) + err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, + &msg->msg_controllen); + } out: unlock_kernel(); if(err<0) @@ -1280,33 +1290,33 @@ return(-EINVAL); } +/* Argument list sizes for sys_socketcall */ +#define AL(x) ((x) * sizeof(unsigned long)) +static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), + AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; +#undef AL /* * System call vectors. * * Argument checking cleaned up. Saved 20% in size. + * This function doesn't need to set the kernel lock because + * it is set by the callees. */ asmlinkage int sys_socketcall(int call, unsigned long *args) { - unsigned char nargs[18]={0,3,3,3,2,3,3,3, - 4,4,4,6,6,2,5,5,3,3}; unsigned long a[6]; unsigned long a0,a1; - int err = -EINVAL; - - lock_kernel(); + int err; + if(call<1||call>SYS_RECVMSG) - goto out; - err = -EFAULT; + return -EINVAL; - /* - * Ideally we want to precompute the maths, but unsigned long - * isnt a fixed size.... - */ - - if ((copy_from_user(a, args, nargs[call] * sizeof(unsigned long)))) - goto out; + /* copy_from_user should be SMP safe. */ + if (copy_from_user(a, args, nargs[call])) + return -EFAULT; a0=a[0]; a1=a[1]; @@ -1370,11 +1380,8 @@ err = -EINVAL; break; } -out: - unlock_kernel(); return err; } - /* * This function is called by a protocol handler that wants to diff -u --recursive --new-file v2.1.37/linux/net/sunrpc/pmap_clnt.c linux/net/sunrpc/pmap_clnt.c --- v2.1.37/linux/net/sunrpc/pmap_clnt.c Wed Apr 16 14:15:00 1997 +++ linux/net/sunrpc/pmap_clnt.c Wed May 14 15:01:21 1997 @@ -79,6 +79,8 @@ } #ifdef CONFIG_ROOT_NFS +char *in_ntoa(__u32 in); + int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) {