diff -u --recursive --new-file v2.1.7/linux/CREDITS linux/CREDITS --- v2.1.7/linux/CREDITS Fri Nov 1 17:13:13 1996 +++ linux/CREDITS Wed Nov 6 11:49:21 1996 @@ -707,12 +707,11 @@ S: USA N: Alain L. Knaff -E: Alain.Knaff@imag.fr +E: Alain.Knaff@poboxes.com D: floppy driver -S: Appartement 310B -S: 11, rue General Mangin -S: 38100 Grenoble -S: France +S: 2a, rue de l'Acier +S: L-4505 Differdange +S: Luxembourg N: Harald Koenig E: koenig@tat.physik.uni-tuebingen.de diff -u --recursive --new-file v2.1.7/linux/Documentation/Configure.help linux/Documentation/Configure.help --- v2.1.7/linux/Documentation/Configure.help Fri Nov 1 17:13:13 1996 +++ linux/Documentation/Configure.help Sat Nov 2 13:57:09 1996 @@ -1476,24 +1476,37 @@ of PCI-SCSI controllers. This driver supports parity checking, tagged command queuing, fast scsi II transfer up to 10 MB/s with narrow scsi devices and 20 MB/s with wide scsi devices. - This driver has been tested OK with linux/i386 and is currently - untested under linux/Alpha. If you intend to use this driver under - linux/Alpha, just try it first with read-only or mounted read-only - devices. Memory mapped io is currently not supported under - linux/Alpha. Please read drivers/scsi/README.ncr53c8xx for more - information. + Linux/i386 and Linux/Alpha are supported by this driver. + Memory mapped io is currently untested under Linux/Alpha. + Please read drivers/scsi/README.ncr53c8xx for more information. -force normal IO +synchronous data transfers frequency +CONFIG_SCSI_NCR53C8XX_SYNC + SCSI-2 specifications allow scsi devices to negotiate a synchronous + transfer period of 25 nano-seconds or more. + The transfer period value is 4 times the agreed transfer period. + So, data can be transferred at a 10 MHz frequency, allowing 10 MB/second + throughput with 8 bits scsi-2 devices and 20 MB/second with wide16 devices. + This frequency can be used safely with differential devices but may cause + problems with singled-ended devices. + Specify 0 if you want to only use asynchronous data transfers. + Otherwise, specify a value between 5 and 10. + Commercial O/Ses generally use 5 Mhz frequency for synchronous transfers. + It is a reasonnable default value. + However, a flawless singled-ended scsi bus supports 10 MHz data transfers. + Regardless the value choosen in the Linux configuration, the synchronous + period can be changed after boot-up through the /proc/scsi file system. + The generic command is: + echo "setsync #target period" >/proc/scsi/ncr53c8xx/0 + Use a 25 ns period for 10 Mhz synchronous data transfers. + +use normal IO CONFIG_SCSI_NCR53C8XX_IOMAPPED - Under linux/Alpha only normal io is currently supported. - Under linux/i386, this option allows you to force the driver to use - normal IO. Memory mapped IO has less latency than normal IO. - During the initialization phase, the driver first tries to use - memory mapped io. If nothing seems wrong, it will use memory mapped - io. If a flaw is detected, it will use normal io. However, it's - possible that memory mapped does not work properly for you and the - driver has not detected the problem; then you would want to say Y - here. The normal answer therefore is N. + Warning! Under linux/Alpha only normal io has been currently tested. + This option allows you to force the driver to use normal IO. + Memory mapped IO has less latency than normal IO and works for most + Intel-based hardware. + The normal answer therefore is N. not allow targets to disconnect CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT @@ -1518,17 +1531,13 @@ The safe answer therefore is N. The normal answer therefore is Y. -force asynchronous transfer mode -CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS - This option allows you to force asynchronous transfer mode for all - devices at linux startup. You can enable synchronous negotiation - with the "setsync" control command after boot-up, for example: - echo "setsync 2 25" >/proc/scsi/ncr53c8xx/0 - asks the driver to set the period to 25 ns (10MB/sec) for target 2 - of controller 0 (please read drivers/scsi/README.ncr53c8xx for more - information). The safe answer therefore is Y. The normal answer - therefore is N. - +maximum number of queued commands +CONFIG_SCSI_NCR53C8XX_MAX_TAGS + This option allows you to specify the maximum number of commands that + can be queud to a device, when tagged command queuing is possible. + The default value is 4. Minimum is 2, maximum is 12. + The normal answer therefore is the default one. + force synchronous negotiation CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO Some scsi-2 devices support synchronous negotiations but do not diff -u --recursive --new-file v2.1.7/linux/Documentation/ide.txt linux/Documentation/ide.txt --- v2.1.7/linux/Documentation/ide.txt Wed Sep 25 11:11:47 1996 +++ linux/Documentation/ide.txt Wed Nov 6 14:49:30 1996 @@ -1,4 +1,4 @@ -ide.txt -- Information regarding the Enhanced IDE drive in Linux 2.0.xx +ide.txt -- Information regarding the Enhanced IDE drive in Linux 2.1.xx =============================================================================== Supported by: Mark Lord -- disks, interfaces, probing @@ -12,17 +12,18 @@ See description later on below for handling BIG IDE drives with >1024 cyls. -Major features of ide.c & ide-cd.c ("NEW!" marks changes since 1.2.13): +Major features of the 2.1.xx IDE driver ("NEW!" marks changes since 2.0.xx): -NEW! - support for IDE ATAPI *tape* drives, courtesy of Gadi Oxman +NEW! - support for IDE ATAPI *floppy* drives + - support for IDE ATAPI *tape* drives, courtesy of Gadi Oxman (re-run MAKEDEV.ide to create the tape device entries in /dev/) -NEW! - support for up to *four* IDE interfaces on one or more IRQs -NEW! - support for any mix of up to *eight* disk and/or cdrom drives + - support for up to *four* IDE interfaces on one or more IRQs + - support for any mix of up to *eight* IDE drives - support for reading IDE ATAPI cdrom drives (NEC,MITSUMI,VERTOS,SONY) - support for audio functions - auto-detection of interfaces, drives, IRQs, and disk geometries - "single" drives should be jumpered as "master", not "slave" -NEW! (both are now probed for) + (both are now probed for) - support for BIOSs which report "more than 16 heads" on disk drives - uses LBA (slightly faster) on disk drives which support it - support for lots of fancy (E)IDE drive functions with hdparm utility @@ -32,45 +33,43 @@ - improved handshaking and error detection/recovery - can co-exist with hd.c controlling the first interface - run-time selectable 32bit interface support (using hdparm-2.3) -NEW! - support for reliable operation of buggy RZ1000 interfaces + - support for reliable operation of buggy RZ1000 interfaces - PCI support is automatic when rz1000 support is configured -NEW! - support for reliable operation of buggy CMD-640 interfaces + - support for reliable operation of buggy CMD-640 interfaces - PCI support is automatic when cmd640 support is configured - for VLB, use kernel command line option: ide0=cmd640_vlb - this support also enables the secondary i/f when needed - interface PIO timing & prefetch parameter support -NEW! - experimental support for UMC 8672 interfaces -NEW! - support for secondary interface on the FGI/Holtek HT-6560B VLB i/f + - experimental support for UMC 8672 interfaces + - support for secondary interface on the FGI/Holtek HT-6560B VLB i/f - use kernel command line option: ide0=ht6560 -NEW! - experimental support for various IDE chipsets + - experimental support for various IDE chipsets - use appropriate kernel command line option from list below -NEW! - support for drives with a stuck WRERR_STAT bit -NEW! - support for removable devices, including door lock/unlock -NEW! - transparent support for DiskManager 6.0x and "Dynamic Disk Overlay" + - support for drives with a stuck WRERR_STAT bit + - support for removable devices, including door lock/unlock + - transparent support for DiskManager 6.0x and "Dynamic Disk Overlay" - works with Linux fdisk, LILO, loadlin, bootln, etc.. -NEW! - mostly transparent support for EZ-Drive disk translation software -NEW! - to use LILO with EZ, install LILO on the linux partition + - mostly transparent support for EZ-Drive disk translation software + - to use LILO with EZ, install LILO on the linux partition rather than on the master boot record, and then mark the linux partition as "bootable" or "active" using fdisk. (courtesy of Juha Laiho ). -NEW! - auto-detect of disk translations by examining partition table -NEW! - ide-cd.c now compiles separate from ide.c -NEW! - Bus-Master DMA support for Intel PCI Triton chipset IDE interfaces + - auto-detect of disk translations by examining partition table + - ide-cd.c now compiles separate from ide.c + - Bus-Master DMA support for Intel PCI Triton chipset IDE interfaces - for details, see comments at top of triton.c -NEW! - ide-cd.c now supports door locking and auto-loading. + - ide-cd.c now supports door locking and auto-loading. - Also preliminary support for multisession and direct reads of audio data. -NEW! - experimental support for Promise DC4030VL caching interface card -NEW! - email thanks/problems to: peterd@pnd-pc.demon.co.uk -NEW! - the hdparm-3.1 package can be used to set PIO modes for some chipsets. - -For work in progress, see the comments in ide.c, ide-cd.c, and triton.c. - -Note that there is now a group actively working on support for the Promise -caching IDE cards, such as the DC4030VL, and early results are encouraging. -Look for this support to be added to the kernel soon. + - experimental support for Promise DC4030VL caching interface card + - email thanks/problems to: peterd@pnd-pc.demon.co.uk + - the hdparm-3.1 package can be used to set PIO modes for some chipsets. +NEW! - support for the OPTi 82C621 chipset, courtesy of Jaromir Koutek. +NEW! - support for loadable modules +For work in progress, see the comments in ide.c, ide-cd.c, triton.c, ... + *** IMPORTANT NOTICES: BUGGY IDE CHIPSETS CAN CORRUPT DATA!! *** ================= *** PCI versions of the CMD640 and RZ1000 interfaces are now detected @@ -258,7 +257,7 @@ "hdx=slow" : insert a huge pause after each access to the data port. Should be used only as a last resort. - "idebus=xx" : inform IDE driver of VESA/PCI bus speed in Mhz, + "idebus=xx" : inform IDE driver of VESA/PCI bus speed in MHz, where "xx" is between 20 and 66 inclusive, used when tuning chipset PIO modes. For PCI bus, 25 is correct for a P75 system, @@ -311,7 +310,7 @@ beyond the basics. When purchasing a localbus IDE interface, avoid cards with an onboard BIOS and those which require special drivers. Instead, look for a card which uses hardware switches/jumpers to select the interface timing speed, -to allow much faster data transfers than the original 8Mhz ISA bus allows. +to allow much faster data transfers than the original 8MHz ISA bus allows. ATA = AT (the old IBM 286 computer) Attachment Interface, a draft American National Standard for connecting hard drives to PCs. This is the official diff -u --recursive --new-file v2.1.7/linux/Makefile linux/Makefile --- v2.1.7/linux/Makefile Fri Nov 1 17:13:13 1996 +++ linux/Makefile Sun Nov 3 11:04:41 1996 @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 1 -SUBLEVEL = 7 +SUBLEVEL = 8 ARCH = i386 @@ -313,7 +313,7 @@ rm -f core `find . -type f -name 'core' -print` rm -f vmlinux System.map rm -f .tmp* drivers/sound/configure - rm -fr modules/* + rm -f modules/* rm -f submenu* mrproper: clean diff -u --recursive --new-file v2.1.7/linux/arch/alpha/lib/Makefile linux/arch/alpha/lib/Makefile --- v2.1.7/linux/arch/alpha/lib/Makefile Sat Oct 19 10:07:28 1996 +++ linux/arch/alpha/lib/Makefile Thu Nov 7 17:09:01 1996 @@ -6,7 +6,7 @@ checksum.o csum_partial_copy.o strlen.o \ strcat.o strcpy.o strncat.o strncpy.o stxcpy.o stxncpy.o \ strchr.o strrchr.o \ - copy_user.o clear_user.o strncpy_from_user.o + copy_user.o clear_user.o strncpy_from_user.o strlen_user.o lib.a: $(OBJS) $(AR) rcs lib.a $(OBJS) diff -u --recursive --new-file v2.1.7/linux/arch/alpha/lib/strlen_user.S linux/arch/alpha/lib/strlen_user.S --- v2.1.7/linux/arch/alpha/lib/strlen_user.S Thu Jan 1 02:00:00 1970 +++ linux/arch/alpha/lib/strlen_user.S Thu Nov 7 17:10:30 1996 @@ -0,0 +1,65 @@ +/* + * arch/alpha/lib/strlen_user.S + * + * Just like strlen except returns -EFAULT if an exception occurs + * before the terminator is found. + */ + +#include + + +/* Allow an exception for an insn; exit if we get one. */ +#define EX(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .gprel32 99b; \ + lda zero, $exception-99b(v0); \ + .text + + + .set noreorder + .set noat + .text + + .globl strlen_user + .ent strlen_user + .frame sp, 0, ra + + .align 3 +strlen_user: + .prologue 0 + + EX( ldq_u t0, 0(a0) ) # load first quadword (a0 may be misaligned) + lda t1, -1(zero) + insqh t1, a0, t1 + andnot a0, 7, v0 + or t1, t0, t0 + nop # dual issue the next two on ev5 + cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne t1, $found + +$loop: EX( ldq t0, 8(v0) ) + addq v0, 8, v0 # addr += 8 + cmpbge zero, t0, t1 + beq t1, $loop + +$found: negq t1, t2 # clear all but least set bit + and t1, t2, t1 + + and t1, 0xf0, t2 # binary search for that set bit + and t1, 0xcc, t3 + and t1, 0xaa, t4 + cmovne t2, 4, t2 + cmovne t3, 2, t3 + cmovne t4, 1, t4 + addq t2, t3, t2 + addq v0, t4, v0 + addq v0, t2, v0 + nop # dual issue next two on ev4 and ev5 + + subq v0, a0, v0 + +$exception: + ret + + .end strlen_user diff -u --recursive --new-file v2.1.7/linux/arch/i386/defconfig linux/arch/i386/defconfig --- v2.1.7/linux/arch/i386/defconfig Wed Oct 16 10:48:06 1996 +++ linux/arch/i386/defconfig Wed Nov 6 14:49:30 1996 @@ -39,8 +39,10 @@ # Please see Documentation/ide.txt for help/info on IDE drives # # CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_BLK_DEV_IDE_PCMCIA is not set CONFIG_BLK_DEV_CMD640=y # CONFIG_BLK_DEV_CMD640_ENHANCED is not set diff -u --recursive --new-file v2.1.7/linux/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- v2.1.7/linux/arch/i386/kernel/entry.S Wed Oct 16 10:48:06 1996 +++ linux/arch/i386/kernel/entry.S Wed Nov 6 14:24:44 1996 @@ -271,15 +271,11 @@ #ifdef __SMP__ ENTER_KERNEL #endif - movl $-ENOSYS,EAX(%esp) cmpl $(NR_syscalls),%eax - jae ret_from_sys_call - movl SYMBOL_NAME(sys_call_table)(,%eax,4),%eax - testl %eax,%eax - je ret_from_sys_call + jae badsys testb $0x20,flags(%ebx) # PF_TRACESYS jne tracesys - call *%eax + call SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value ALIGN .globl ret_from_sys_call @@ -327,6 +323,9 @@ movl %eax,EAX(%esp) # save the return value call SYMBOL_NAME(syscall_trace) jmp ret_from_sys_call +badsys: + movl $-ENOSYS,EAX(%esp) + jmp ret_from_sys_call ENTRY(divide_error) @@ -452,6 +451,11 @@ pushl $ SYMBOL_NAME(do_page_fault) jmp error_code +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code + .data ENTRY(sys_call_table) .long SYMBOL_NAME(sys_setup) /* 0 */ @@ -591,7 +595,7 @@ .long SYMBOL_NAME(sys_bdflush) .long SYMBOL_NAME(sys_sysfs) /* 135 */ .long SYMBOL_NAME(sys_personality) - .long 0 /* for afs_syscall */ + .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ .long SYMBOL_NAME(sys_setfsuid) .long SYMBOL_NAME(sys_setfsgid) .long SYMBOL_NAME(sys_llseek) /* 140 */ @@ -620,4 +624,6 @@ .long SYMBOL_NAME(sys_mremap) .long SYMBOL_NAME(sys_setresuid) .long SYMBOL_NAME(sys_getresuid) - .space (NR_syscalls-165)*4 + .rept NR_syscalls-165 + .long SYMBOL_NAME(sys_ni_syscall) + .endr diff -u --recursive --new-file v2.1.7/linux/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c --- v2.1.7/linux/arch/i386/kernel/irq.c Wed Oct 9 08:55:17 1996 +++ linux/arch/i386/kernel/irq.c Thu Nov 7 11:25:55 1996 @@ -522,7 +522,7 @@ irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21; #ifdef DEBUG - printk("probe_irq_off: irqs=0x%04x irqmask=0x%04x\n", irqs, irqmask); + printk("probe_irq_off: irqs=0x%04lx irqmask=0x%04x\n", irqs, irqmask); #endif irqs &= irqmask; if (!irqs) diff -u --recursive --new-file v2.1.7/linux/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- v2.1.7/linux/arch/i386/kernel/setup.c Tue Oct 29 19:58:02 1996 +++ linux/arch/i386/kernel/setup.c Sat Nov 2 13:57:50 1996 @@ -185,7 +185,7 @@ #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE) { - initrd_start = INITRD_START + PAGE_OFFSET; + initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; initrd_end = initrd_start+INITRD_SIZE; if (initrd_end > memory_end) { printk("initrd extends beyond end of memory " diff -u --recursive --new-file v2.1.7/linux/arch/i386/kernel/time.c linux/arch/i386/kernel/time.c --- v2.1.7/linux/arch/i386/kernel/time.c Tue Oct 29 19:58:02 1996 +++ linux/arch/i386/kernel/time.c Thu Nov 7 19:49:34 1996 @@ -20,10 +20,13 @@ #include #include #include +#include +#include #include #include #include +#include #include #include @@ -462,6 +465,21 @@ needs more debugging. */ if (x86_capability & 16) { do_gettimeoffset = do_fast_gettimeoffset; + + if( strcmp( x86_vendor_id, "AuthenticAMD" ) == 0 ) { + if( x86 == 5 ) { + if( x86_model == 0 ) { + /* turn on cycle counters during power down */ + __asm__ __volatile__ (" movl $0x83, %%ecx \n \ + rdmsr \n \ + orl $1,%%eax \n \ + wrmsr \n " + : : : "ax", "cx", "dx" ); + udelay(500); + } + } + } + /* read Pentium cycle counter */ __asm__(".byte 0x0f,0x31" :"=a" (init_timer_cc.low), diff -u --recursive --new-file v2.1.7/linux/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- v2.1.7/linux/arch/i386/kernel/traps.c Tue Oct 29 19:58:02 1996 +++ linux/arch/i386/kernel/traps.c Wed Nov 6 11:54:35 1996 @@ -81,6 +81,7 @@ asmlinkage void coprocessor_error(void); asmlinkage void reserved(void); asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); int kstack_depth_to_print = 24; @@ -174,8 +175,8 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS, current) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present, current) DO_ERROR(12, SIGBUS, "stack segment", stack_segment, current) -DO_ERROR(15, SIGSEGV, "reserved", reserved, current) DO_ERROR(17, SIGSEGV, "alignment check", alignment_check, current) +DO_ERROR(18, SIGSEGV, "reserved", reserved, current) asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) { @@ -259,6 +260,12 @@ math_error(); } +asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ + printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); +} + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task @@ -344,7 +351,7 @@ set_trap_gate(12,&stack_segment); set_trap_gate(13,&general_protection); set_trap_gate(14,&page_fault); - set_trap_gate(15,&reserved); + set_trap_gate(15,&spurious_interrupt_bug); set_trap_gate(16,&coprocessor_error); set_trap_gate(17,&alignment_check); for (i=18;i<48;i++) diff -u --recursive --new-file v2.1.7/linux/arch/i386/mm/extable.c linux/arch/i386/mm/extable.c --- v2.1.7/linux/arch/i386/mm/extable.c Fri Nov 1 17:13:14 1996 +++ linux/arch/i386/mm/extable.c Wed Nov 6 14:24:44 1996 @@ -2,6 +2,8 @@ * linux/arch/i386/mm/extable.c */ +#include +#include #include extern const struct exception_table_entry __start___ex_table[]; @@ -37,6 +39,9 @@ search_exception_table(unsigned long addr) { unsigned long ret; +#ifdef CONFIG_MODULES + struct module *mp; +#endif /* Search the kernel's table first. */ ret = search_one_table(__start___ex_table, @@ -44,7 +49,15 @@ if (ret) return ret; - /* FIXME -- search the module's tables here */ - +#ifdef CONFIG_MODULES + for (mp = module_list; mp != NULL; mp = mp->next) { + if (mp->exceptinfo.start != NULL) { + ret = search_one_table(mp->exceptinfo.start, + mp->exceptinfo.stop-1, addr); + if (ret) + return ret; + } + } +#endif return 0; } diff -u --recursive --new-file v2.1.7/linux/arch/m68k/config.in linux/arch/m68k/config.in --- v2.1.7/linux/arch/m68k/config.in Wed Sep 25 10:47:39 1996 +++ linux/arch/m68k/config.in Wed Nov 6 14:49:31 1996 @@ -71,9 +71,12 @@ comment 'Floppy, IDE, and other block devices' tristate 'Normal floppy disk support' CONFIG_BLK_DEV_FD -bool 'IDE harddisk support' CONFIG_BLK_DEV_IDE -if [ "$CONFIG_BLK_DEV_IDE" = "y" ]; then - bool ' Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD +tristate 'Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support' CONFIG_BLK_DEV_IDE +if [ "$CONFIG_BLK_DEV_IDE" != "n" ]; then + dep_tristate ' Include IDE/ATA-2 DISK support' CONFIG_BLK_DEV_IDEDISK $CONFIG_BLK_DEV_IDE + dep_tristate ' Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD $CONFIG_BLK_DEV_IDE + dep_tristate ' Include IDE/ATAPI TAPE support' CONFIG_BLK_DEV_IDETAPE $CONFIG_BLK_DEV_IDE + dep_tristate ' Include IDE/ATAPI FLOPPY support' CONFIG_BLK_DEV_IDEFLOPPY $CONFIG_BLK_DEV_IDE fi if [ "$CONFIG_AMIGA" = "y" ]; then tristate 'Amiga Zorro II ramdisk support' CONFIG_AMIGA_Z2RAM diff -u --recursive --new-file v2.1.7/linux/drivers/block/Config.in linux/drivers/block/Config.in --- v2.1.7/linux/drivers/block/Config.in Mon Aug 5 08:12:25 1996 +++ linux/drivers/block/Config.in Wed Nov 6 14:49:31 1996 @@ -5,34 +5,40 @@ comment 'Floppy, IDE, and other block devices' tristate 'Normal floppy disk support' CONFIG_BLK_DEV_FD -bool 'Enhanced IDE/MFM/RLL disk/cdrom/tape support' CONFIG_BLK_DEV_IDE +tristate 'Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support' CONFIG_BLK_DEV_IDE comment 'Please see Documentation/ide.txt for help/info on IDE drives' if [ "$CONFIG_BLK_DEV_IDE" = "n" ]; then bool 'Old harddisk (MFM/RLL/IDE) driver' CONFIG_BLK_DEV_HD_ONLY else bool ' Use old disk-only driver on primary interface' CONFIG_BLK_DEV_HD_IDE - bool ' Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD - bool ' Include IDE/ATAPI TAPE support' CONFIG_BLK_DEV_IDETAPE - bool ' Support removable IDE interfaces (PCMCIA)' CONFIG_BLK_DEV_IDE_PCMCIA - bool ' CMD640 chipset bugfix/support' CONFIG_BLK_DEV_CMD640 - if [ "$CONFIG_BLK_DEV_CMD640" = "y" ]; then - bool ' CMD640 enhanced support' CONFIG_BLK_DEV_CMD640_ENHANCED - fi - if [ "$CONFIG_PCI" = "y" ]; then - bool ' RZ1000 chipset bugfix/support' CONFIG_BLK_DEV_RZ1000 - bool ' Intel 82371 PIIX (Triton I/II) DMA support' CONFIG_BLK_DEV_TRITON - fi - bool ' Other IDE chipset support' CONFIG_IDE_CHIPSETS - if [ "$CONFIG_IDE_CHIPSETS" = "y" ]; then - comment 'Note: most of these also require special kernel boot parameters' - bool ' ALI M14xx support' CONFIG_BLK_DEV_ALI14XX - bool ' DTC-2278 support' CONFIG_BLK_DEV_DTC2278 - bool ' Holtek HT6560B support' CONFIG_BLK_DEV_HT6560B - if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' PROMISE DC4030 support (EXPERIMENTAL)' CONFIG_BLK_DEV_PROMISE + dep_tristate ' Include IDE/ATA-2 DISK support' CONFIG_BLK_DEV_IDEDISK $CONFIG_BLK_DEV_IDE + dep_tristate ' Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD $CONFIG_BLK_DEV_IDE + dep_tristate ' Include IDE/ATAPI TAPE support' CONFIG_BLK_DEV_IDETAPE $CONFIG_BLK_DEV_IDE + dep_tristate ' Include IDE/ATAPI FLOPPY support' CONFIG_BLK_DEV_IDEFLOPPY $CONFIG_BLK_DEV_IDE + if [ "$CONFIG_BLK_DEV_IDE" = "y" ]; then + bool ' CMD640 chipset bugfix/support' CONFIG_BLK_DEV_CMD640 + if [ "$CONFIG_BLK_DEV_CMD640" = "y" ]; then + bool ' CMD640 enhanced support' CONFIG_BLK_DEV_CMD640_ENHANCED + fi + if [ "$CONFIG_PCI" = "y" ]; then + bool ' RZ1000 chipset bugfix/support' CONFIG_BLK_DEV_RZ1000 + bool ' Intel 82371 PIIX (Triton I/II) DMA support' CONFIG_BLK_DEV_TRITON + fi + bool ' Other IDE chipset support' CONFIG_IDE_CHIPSETS + if [ "$CONFIG_IDE_CHIPSETS" = "y" ]; then + comment 'Note: most of these also require special kernel boot parameters' + bool ' ALI M14xx support' CONFIG_BLK_DEV_ALI14XX + bool ' DTC-2278 support' CONFIG_BLK_DEV_DTC2278 + bool ' Holtek HT6560B support' CONFIG_BLK_DEV_HT6560B + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' PROMISE DC4030 support (EXPERIMENTAL)' CONFIG_BLK_DEV_PROMISE + if [ "$CONFIG_PCI" = "y" ]; then + bool ' OPTi 82C621 support (EXPERIMENTAL)' CONFIG_BLK_DEV_OPTI621 + fi + fi + bool ' QDI QD6580 support' CONFIG_BLK_DEV_QD6580 + bool ' UMC 8672 support' CONFIG_BLK_DEV_UMC8672 fi - bool ' QDI QD6580 support' CONFIG_BLK_DEV_QD6580 - bool ' UMC 8672 support' CONFIG_BLK_DEV_UMC8672 fi fi diff -u --recursive --new-file v2.1.7/linux/drivers/block/Makefile linux/drivers/block/Makefile --- v2.1.7/linux/drivers/block/Makefile Wed Aug 7 12:31:22 1996 +++ linux/drivers/block/Makefile Wed Nov 6 14:49:31 1996 @@ -50,7 +50,11 @@ endif ifeq ($(CONFIG_BLK_DEV_IDE),y) -L_OBJS += ide.o +L_OBJS += ide.o ide-probe.o +else + ifeq ($(CONFIG_BLK_DEV_IDE),m) + M_OBJS += ide.o ide-probe.o + endif endif ifeq ($(CONFIG_BLK_DEV_RZ1000),y) @@ -89,12 +93,40 @@ L_OBJS += promise.o endif +ifeq ($(CONFIG_BLK_DEV_OPTI621),y) +L_OBJS += opti621.o +endif + +ifeq ($(CONFIG_BLK_DEV_IDEDISK),y) +L_OBJS += ide-disk.o +else + ifeq ($(CONFIG_BLK_DEV_IDEDISK),m) + M_OBJS += ide-disk.o + endif +endif + ifeq ($(CONFIG_BLK_DEV_IDECD),y) L_OBJS += ide-cd.o +else + ifeq ($(CONFIG_BLK_DEV_IDECD),m) + M_OBJS += ide-cd.o + endif endif ifeq ($(CONFIG_BLK_DEV_IDETAPE),y) L_OBJS += ide-tape.o +else + ifeq ($(CONFIG_BLK_DEV_IDETAPE),m) + M_OBJS += ide-tape.o + endif +endif + +ifeq ($(CONFIG_BLK_DEV_IDEFLOPPY),y) +L_OBJS += ide-floppy.o +else + ifeq ($(CONFIG_BLK_DEV_IDEFLOPPY),m) + M_OBJS += ide-floppy.o + endif endif ifeq ($(CONFIG_BLK_DEV_XD),y) diff -u --recursive --new-file v2.1.7/linux/drivers/block/cmd640.c linux/drivers/block/cmd640.c --- v2.1.7/linux/drivers/block/cmd640.c Mon Sep 2 08:41:25 1996 +++ linux/drivers/block/cmd640.c Wed Nov 6 14:49:31 1996 @@ -431,9 +431,9 @@ for (i = 0; i < MAX_HWIFS; i++) { ide_hwif_t *hwif = &ide_hwifs[i]; if (hwif->chipset == ide_unknown || hwif->chipset == ide_generic) { - if (hwif->io_base == 0x1f0) + if (hwif->io_ports[IDE_DATA_OFFSET] == 0x1f0) cmd_hwif0 = hwif; - else if (hwif->io_base == 0x170) + else if (hwif->io_ports[IDE_DATA_OFFSET] == 0x170) cmd_hwif1 = hwif; } } @@ -678,11 +678,10 @@ (void) ide_get_best_pio_mode (drive, mode_wanted, 5, &d); cmd640_set_mode (index, d.pio_mode, d.cycle_time); - printk ("%s: selected cmd640 PIO mode%d (%dns) %s/IORDY%s", + printk ("%s: selected cmd640 PIO mode%d (%dns)%s", drive->name, d.pio_mode, d.cycle_time, - d.use_iordy ? "w" : "wo", d.overridden ? " (overriding vendor mode)" : ""); display_clocks(index); } diff -u --recursive --new-file v2.1.7/linux/drivers/block/floppy.c linux/drivers/block/floppy.c --- v2.1.7/linux/drivers/block/floppy.c Tue Oct 29 19:58:03 1996 +++ linux/drivers/block/floppy.c Wed Nov 6 11:49:21 1996 @@ -107,7 +107,6 @@ /* do print messages for unexpected interrupts */ static int print_unex=1; -#include #include /* the following is the mask of allowed drives. By default units 2 and @@ -132,21 +131,6 @@ #include #include -#define OLDFDRAWCMD 0x020d /* send a raw command to the FDC */ - -struct old_floppy_raw_cmd { - void *data; - long length; - - unsigned char rate; - unsigned char flags; - unsigned char cmd_count; - unsigned char cmd[9]; - unsigned char reply_count; - unsigned char reply[7]; - int track; -}; - #include #include #include @@ -2419,6 +2403,17 @@ #endif } +static inline int check_dma_crossing(char *start, + unsigned long length, char *message) +{ + if (CROSS_64KB(start, length)) { + printk("DMA xfer crosses 64KB boundary in %s %p-%p\n", + message, start, start+length); + return 1; + } else + return 0; +} + /* * Formulate a read/write request. * this routine decides where to load the data (directly to buffer, or to @@ -2570,6 +2565,9 @@ indirect, direct, sector_t); return 0; } + check_dma_crossing(raw_cmd->kernel_data, + raw_cmd->length, + "end of make_raw_request [1]"); return 2; } } @@ -2615,6 +2613,8 @@ raw_cmd->length = ((raw_cmd->length -1)|(ssize-1))+1; raw_cmd->length <<= 9; #ifdef FLOPPY_SANITY_CHECK + check_dma_crossing(raw_cmd->kernel_data, raw_cmd->length, + "end of make_raw_request"); if ((raw_cmd->length < current_count_sectors << 9) || (raw_cmd->kernel_data != CURRENT->buffer && CT(COMMAND) == FD_WRITE && @@ -2850,13 +2850,11 @@ return copy_from_user(address, param, size) ? -EFAULT : 0; } -static inline int write_user_long(unsigned long useraddr, unsigned long value) -{ - return put_user(value, (unsigned long *)useraddr) ? -EFAULT : 0; -} +#define _COPYOUT(x) (copy_to_user((void *)param, &(x), sizeof(x)) ? -EFAULT : 0) +#define _COPYIN(x) (copy_from_user(&(x), (void *)param, sizeof(x)) ? -EFAULT : 0) -#define COPYOUT(x) ECALL(fd_copyout((void *)param, &(x), sizeof(x))) -#define COPYIN(x) ECALL(fd_copyin((void *)param, &(x), sizeof(x))) +#define COPYOUT(x) ECALL(_COPYOUT(x)) +#define COPYIN(x) ECALL(_COPYIN(x)) static inline const char *drive_name(int type, int drive) { @@ -2927,24 +2925,11 @@ static inline int raw_cmd_copyout(int cmd, char *param, struct floppy_raw_cmd *ptr) { - struct old_floppy_raw_cmd old_raw_cmd; int ret; while(ptr) { - if (cmd == OLDFDRAWCMD) { - old_raw_cmd.flags = ptr->flags; - old_raw_cmd.data = ptr->data; - old_raw_cmd.length = ptr->length; - old_raw_cmd.rate = ptr->rate; - old_raw_cmd.reply_count = ptr->reply_count; - memcpy(old_raw_cmd.reply, ptr->reply, 7); - COPYOUT(old_raw_cmd); - param += sizeof(old_raw_cmd); - } else { - COPYOUT(*ptr); - param += sizeof(struct floppy_raw_cmd); - } - + COPYOUT(*ptr); + param += sizeof(struct floppy_raw_cmd); if ((ptr->flags & FD_RAW_READ) && ptr->buffer_length){ if (ptr->length>=0 && ptr->length<=ptr->buffer_length) ECALL(fd_copyout(ptr->data, @@ -2981,7 +2966,6 @@ struct floppy_raw_cmd **rcmd) { struct floppy_raw_cmd *ptr; - struct old_floppy_raw_cmd old_raw_cmd; int ret; int i; @@ -2992,37 +2976,20 @@ if (!ptr) return -ENOMEM; *rcmd = ptr; - if (cmd == OLDFDRAWCMD){ - COPYIN(old_raw_cmd); - ptr->flags = old_raw_cmd.flags; - ptr->data = old_raw_cmd.data; - ptr->length = old_raw_cmd.length; - ptr->rate = old_raw_cmd.rate; - ptr->cmd_count = old_raw_cmd.cmd_count; - ptr->track = old_raw_cmd.track; - ptr->phys_length = 0; - ptr->next = 0; - ptr->buffer_length = 0; - memcpy(ptr->cmd, old_raw_cmd.cmd, 9); - param += sizeof(struct old_floppy_raw_cmd); - if (ptr->cmd_count > 9) - return -EINVAL; - } else { - COPYIN(*ptr); - ptr->next = 0; - ptr->buffer_length = 0; - param += sizeof(struct floppy_raw_cmd); - if (ptr->cmd_count > 33) - /* the command may now also take up the space - * initially intended for the reply & the - * reply count. Needed for long 82078 commands - * such as RESTORE, which takes ... 17 command - * bytes. Murphy's law #137: When you reserve - * 16 bytes for a structure, you'll one day - * discover that you really need 17... - */ - return -EINVAL; - } + COPYIN(*ptr); + ptr->next = 0; + ptr->buffer_length = 0; + param += sizeof(struct floppy_raw_cmd); + if (ptr->cmd_count > 33) + /* the command may now also take up the space + * initially intended for the reply & the + * reply count. Needed for long 82078 commands + * such as RESTORE, which takes ... 17 command + * bytes. Murphy's law #137: When you reserve + * 16 bytes for a structure, you'll one day + * discover that you really need 17... + */ + return -EINVAL; for (i=0; i< 16; i++) ptr->reply[i] = 0; @@ -3037,9 +3004,6 @@ return -ENOMEM; ptr->buffer_length = ptr->length; } - if ( ptr->flags & FD_RAW_READ ) - ECALL( verify_area( VERIFY_WRITE, ptr->data, - ptr->length )); if (ptr->flags & FD_RAW_WRITE) ECALL(fd_copyin(ptr->data, ptr->kernel_data, ptr->length)); @@ -3181,47 +3145,42 @@ } /* handle obsolete ioctl's */ -static struct translation_entry { - int newcmd; - int oldcmd; - int oldsize; /* size of 0x00xx-style ioctl. Reflects old structures, thus - * use numeric values. NO SIZEOFS */ -} translation_table[]= { - {FDCLRPRM, 0, 0}, - {FDSETPRM, 1, 28}, - {FDDEFPRM, 2, 28}, - {FDGETPRM, 3, 28}, - {FDMSGON, 4, 0}, - {FDMSGOFF, 5, 0}, - {FDFMTBEG, 6, 0}, - {FDFMTTRK, 7, 12}, - {FDFMTEND, 8, 0}, - {FDSETEMSGTRESH, 10, 0}, - {FDFLUSH, 11, 0}, - {FDSETMAXERRS, 12, 20}, - {OLDFDRAWCMD, 30, 0}, - {FDGETMAXERRS, 14, 20}, - {FDGETDRVTYP, 16, 16}, - {FDSETDRVPRM, 20, 88}, - {FDGETDRVPRM, 21, 88}, - {FDGETDRVSTAT, 22, 52}, - {FDPOLLDRVSTAT, 23, 52}, - {FDRESET, 24, 0}, - {FDGETFDCSTAT, 25, 40}, - {FDWERRORCLR, 27, 0}, - {FDWERRORGET, 28, 24}, - {FDRAWCMD, 0, 0}, - {FDEJECT, 0, 0}, - {FDTWADDLE, 40, 0} }; +int ioctl_table[]= { + FDCLRPRM, + FDSETPRM, + FDDEFPRM, + FDGETPRM, + FDMSGON, + FDMSGOFF, + FDFMTBEG, + FDFMTTRK, + FDFMTEND, + FDSETEMSGTRESH, + FDFLUSH, + FDSETMAXERRS, + FDGETMAXERRS, + FDGETDRVTYP, + FDSETDRVPRM, + FDGETDRVPRM, + FDGETDRVSTAT, + FDPOLLDRVSTAT, + FDRESET, + FDGETFDCSTAT, + FDWERRORCLR, + FDWERRORGET, + FDRAWCMD, + FDEJECT, + FDTWADDLE +}; -static inline int normalize_0x02xx_ioctl(int *cmd, int *size) +static inline int normalize_ioctl(int *cmd, int *size) { int i; - for (i=0; i < ARRAY_SIZE(translation_table); i++) { - if ((*cmd & 0xffff) == (translation_table[i].newcmd & 0xffff)){ + for (i=0; i < ARRAY_SIZE(ioctl_table); i++) { + if ((*cmd & 0xffff) == (ioctl_table[i] & 0xffff)){ *size = _IOC_SIZE(*cmd); - *cmd = translation_table[i].newcmd; + *cmd = ioctl_table[i]; if (*size > _IOC_SIZE(*cmd)) { printk("ioctl not yet supported\n"); return -EFAULT; @@ -3232,31 +3191,6 @@ return -EINVAL; } -static inline int xlate_0x00xx_ioctl(int *cmd, int *size) -{ - int i; - /* old ioctls' for kernels <= 1.3.33 */ - /* When the next even release will come around, we'll start - * warning against these. - * When the next odd release will come around, we'll fail with - * -EINVAL */ - if(strcmp(system_utsname.version, "1.4.0") >= 0) - printk("obsolete floppy ioctl %x\n", *cmd); - if((system_utsname.version[0] == '1' && - strcmp(system_utsname.version, "1.5.0") >= 0) || - (system_utsname.version[0] >= '2' && - strcmp(system_utsname.version, "2.1.0") >= 0)) - return -EINVAL; - for (i=0; i < ARRAY_SIZE(translation_table); i++) { - if (*cmd == translation_table[i].oldcmd) { - *size = translation_table[i].oldsize; - *cmd = translation_table[i].newcmd; - return 0; - } - } - return -EINVAL; -} - static int get_floppy_geometry(int drive, int type, struct floppy_struct **g) { if (type) @@ -3315,25 +3249,24 @@ /* the following have been inspired by the corresponding * code for other block devices. */ struct floppy_struct *g; - struct hd_geometry *loc; - case HDIO_GETGEO: - loc = (struct hd_geometry *) param; + { + struct hd_geometry loc; ECALL(get_floppy_geometry(drive, type, &g)); - ECALL(verify_area(VERIFY_WRITE, loc, sizeof(*loc))); - put_user(g->head, &loc->heads); - put_user(g->sect, &loc->sectors); - put_user(g->track, &loc->cylinders); - put_user(0,&loc->start); - return 0; + loc.heads = g->head; + loc.sectors = g->sect; + loc.cylinders = g->track; + loc.start = 0; + return _COPYOUT(loc); + } case BLKRASET: if(!suser()) return -EACCES; if(param > 0xff) return -EINVAL; read_ahead[MAJOR(inode->i_rdev)] = param; return 0; case BLKRAGET: - return write_user_long(param, - read_ahead[MAJOR(inode->i_rdev)]); + return put_user(read_ahead[MAJOR(inode->i_rdev)], + (int *) param); case BLKFLSBUF: if(!suser()) return -EACCES; fsync_dev(inode->i_rdev); @@ -3342,16 +3275,14 @@ case BLKGETSIZE: ECALL(get_floppy_geometry(drive, type, &g)); - return write_user_long(param, g->size); + return put_user(g->size, (int *) param); /* BLKRRPART is not defined as floppies don't have * partition tables */ } /* convert the old style command into a new style command */ if ((cmd & 0xff00) == 0x0200) { - ECALL(normalize_0x02xx_ioctl(&cmd, &size)); - } else if ((cmd & 0xff00) == 0x0000) { - ECALL(xlate_0x00xx_ioctl(&cmd, &size)); + ECALL(normalize_ioctl(&cmd, &size)); } else return -EINVAL; @@ -3360,10 +3291,6 @@ ((cmd & 0x40) && !IOCTL_ALLOWED)) return -EPERM; - /* verify writability of result, and fail early */ - if (_IOC_DIR(cmd) & _IOC_READ) - ECALL(verify_area(VERIFY_WRITE,(void *) param, size)); - /* copyin */ CLEARSTRUCT(&inparam); if (_IOC_DIR(cmd) & _IOC_WRITE) @@ -3458,7 +3385,6 @@ return 0; OUT(FDWERRORGET,UDRWE); - case OLDFDRAWCMD: case FDRAWCMD: if (type) return -EINVAL; diff -u --recursive --new-file v2.1.7/linux/drivers/block/genhd.c linux/drivers/block/genhd.c --- v2.1.7/linux/drivers/block/genhd.c Tue Aug 20 16:44:45 1996 +++ linux/drivers/block/genhd.c Wed Nov 6 14:49:31 1996 @@ -66,7 +66,6 @@ const char *maj = hd->major_name; char unit = (minor >> hd->minor_shift) + 'a'; -#ifdef CONFIG_BLK_DEV_IDE /* * IDE devices use multiple major numbers, but the drives * are named as: {hda,hdb}, {hdc,hdd}, {hde,hdf}, {hdg,hdh}.. @@ -82,7 +81,6 @@ case IDE0_MAJOR: maj = "hd"; } -#endif part = minor & ((1 << hd->minor_shift) - 1); if (part) sprintf(buf, "%s%c%d", maj, unit, part); diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide-cd.c linux/drivers/block/ide-cd.c --- v2.1.7/linux/drivers/block/ide-cd.c Tue Oct 29 19:58:03 1996 +++ linux/drivers/block/ide-cd.c Wed Nov 6 14:49:31 1996 @@ -108,6 +108,7 @@ * 3.16 Jul 28, 1996 -- Fix from Gadi to reduce kernel stack usage for ioctl. * 3.17 Sep 17, 1996 -- Tweak audio reads for some drives. * Start changing CDROMLOADFROMSLOT to CDROM_SELECT_DISC. + * 3.17a Oct 31, 1996 -- Added module and DMA support. * * NOTE: Direct audio reads will only work on some types of drive. * So far, i've received reports of success for Sony and Toshiba drives. @@ -123,6 +124,7 @@ /***************************************************************************/ +#include #include #include #include @@ -255,41 +257,127 @@ #define CDROM_STATE_FLAGS(drive) ((struct ide_cd_state_flags *)&((drive)->bios_head)) -#define SECTOR_BUFFER_SIZE CD_FRAMESIZE +struct atapi_request_sense { + unsigned char error_code : 7; + unsigned char valid : 1; + byte reserved1; + unsigned char sense_key : 4; + unsigned char reserved2 : 1; + unsigned char ili : 1; + unsigned char reserved3 : 2; + byte info[4]; + byte sense_len; + byte command_info[4]; + byte asc; + byte ascq; + byte fru; + byte sense_key_specific[3]; +}; +struct packet_command { + char *buffer; + int buflen; + int stat; + struct atapi_request_sense *sense_data; + unsigned char c[12]; +}; - -/**************************************************************************** - * Routines to read and write data from/to the drive, using - * the routines input_ide_data() and output_ide_data() from ide.c. - * - * These routines will round up any request for an odd number of bytes, - * so if an odd bytecount is specified, be sure that there's at least one - * extra byte allocated for the buffer. - */ +/* Structure of a MSF cdrom address. */ +struct atapi_msf { + byte reserved; + byte minute; + byte second; + byte frame; +}; -static inline -void cdrom_in_bytes (ide_drive_t *drive, void *buffer, uint bytecount) -{ - ++bytecount; - ide_input_data (drive, buffer, bytecount / 4); - if ((bytecount & 0x03) >= 2) { - insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1); - } -} +/* Space to hold the disk TOC. */ -static inline -void cdrom_out_bytes (ide_drive_t *drive, void *buffer, uint bytecount) -{ - ++bytecount; - ide_output_data (drive, buffer, bytecount / 4); - if ((bytecount & 0x03) >= 2) { - outsw (IDE_DATA_REG, - ((byte *)buffer) + (bytecount & ~0x03), 1); - } -} +#define MAX_TRACKS 99 +struct atapi_toc_header { + unsigned short toc_length; + byte first_track; + byte last_track; +}; + +struct atapi_toc_entry { + byte reserved1; + unsigned control : 4; + unsigned adr : 4; + byte track; + byte reserved2; + union { + unsigned lba; + struct atapi_msf msf; + } addr; +}; + +struct atapi_toc { + int last_session_lba; + int xa_flag; + unsigned capacity; + struct atapi_toc_header hdr; + struct atapi_toc_entry ent[MAX_TRACKS+1]; + /* One extra for the leadout. */ +}; + + +/* This structure is annoyingly close to, but not identical with, + the cdrom_subchnl structure from cdrom.h. */ +struct atapi_cdrom_subchnl +{ + u_char acdsc_reserved; + u_char acdsc_audiostatus; + u_short acdsc_length; + u_char acdsc_format; + + u_char acdsc_adr: 4; + u_char acdsc_ctrl: 4; + u_char acdsc_trk; + u_char acdsc_ind; + union { + struct atapi_msf msf; + int lba; + } acdsc_absaddr; + union { + struct atapi_msf msf; + int lba; + } acdsc_reladdr; +}; + + +/* Extra per-device info for cdrom drives. */ +struct cdrom_info { + + /* Buffer for table of contents. NULL if we haven't allocated + a TOC buffer for this device yet. */ + + struct atapi_toc *toc; + + /* Sector buffer. If a read request wants only the first part + of a cdrom block, we cache the rest of the block here, + in the expectation that that data is going to be wanted soon. + SECTOR_BUFFERED is the number of the first buffered sector, + and NSECTORS_BUFFERED is the number of sectors in the buffer. + Before the buffer is allocated, we should have + SECTOR_BUFFER == NULL and NSECTORS_BUFFERED == 0. */ + + unsigned long sector_buffered; + unsigned long nsectors_buffered; + char *sector_buffer; + + /* The result of the last successful request sense command + on this device. */ + struct atapi_request_sense sense_data; + + struct request request_sense_request; + struct packet_command request_sense_pc; + int dma; +}; + + +#define SECTOR_BUFFER_SIZE CD_FRAMESIZE @@ -573,6 +661,7 @@ struct atapi_request_sense *reqbuf, struct packet_command *failed_command) { + struct cdrom_info *info = drive->driver_data; struct request *rq; struct packet_command *pc; int len; @@ -580,11 +669,11 @@ /* If the request didn't explicitly specify where to put the sense data, use the statically allocated structure. */ if (reqbuf == NULL) - reqbuf = &drive->cdrom_info.sense_data; + reqbuf = &info->sense_data; /* Make up a new request to retrieve sense information. */ - pc = &HWIF(drive)->request_sense_pc; + pc = &info->request_sense_pc; memset (pc, 0, sizeof (*pc)); /* The request_sense structure has an odd number of (16-bit) words, @@ -602,7 +691,7 @@ /* stuff the sense request in front of our current request */ - rq = &HWIF(drive)->request_sense_request; + rq = &info->request_sense_request; ide_init_drive_cmd (rq); rq->cmd = REQUEST_SENSE_COMMAND; rq->buffer = (char *)pc; @@ -641,9 +730,11 @@ buffers. */ static void cdrom_saw_media_change (ide_drive_t *drive) { + struct cdrom_info *info = drive->driver_data; + CDROM_STATE_FLAGS (drive)->media_changed = 1; CDROM_STATE_FLAGS (drive)->toc_valid = 0; - drive->cdrom_info.nsectors_buffered = 0; + info->nsectors_buffered = 0; } @@ -792,11 +883,16 @@ static int cdrom_start_packet_command (ide_drive_t *drive, int xferlen, ide_handler_t *handler) { + struct cdrom_info *info = drive->driver_data; + /* Wait for the controller to be idle. */ if (ide_wait_stat (drive, 0, BUSY_STAT, WAIT_READY)) return 1; + if (info->dma) + info->dma = !HWIF(drive)->dmaproc(ide_dma_read, drive); + /* Set up the controller registers. */ - OUT_BYTE (0, IDE_FEATURE_REG); + OUT_BYTE (info->dma, IDE_FEATURE_REG); OUT_BYTE (0, IDE_NSECTOR_REG); OUT_BYTE (0, IDE_SECTOR_REG); @@ -804,6 +900,9 @@ OUT_BYTE (xferlen >> 8 , IDE_HCYL_REG); OUT_BYTE (drive->ctl, IDE_CONTROL_REG); + if (info->dma) + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) { ide_set_handler (drive, handler, WAIT_CMD); OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */ @@ -842,7 +941,7 @@ ide_set_handler (drive, handler, WAIT_CMD); /* Send the command to the device. */ - cdrom_out_bytes (drive, cmd_buf, cmd_len); + atapi_output_bytes (drive, cmd_buf, cmd_len); return 0; } @@ -863,7 +962,7 @@ static void cdrom_buffer_sectors (ide_drive_t *drive, unsigned long sector, int sectors_to_transfer) { - struct cdrom_info *info = &drive->cdrom_info; + struct cdrom_info *info = drive->driver_data; /* Number of sectors to read into the buffer. */ int sectors_to_buffer = MIN (sectors_to_transfer, @@ -892,7 +991,7 @@ /* Read the data into the buffer. */ dest = info->sector_buffer + info->nsectors_buffered * SECTOR_SIZE; while (sectors_to_buffer > 0) { - cdrom_in_bytes (drive, dest, SECTOR_SIZE); + atapi_input_bytes (drive, dest, SECTOR_SIZE); --sectors_to_buffer; --sectors_to_transfer; ++info->nsectors_buffered; @@ -902,7 +1001,7 @@ /* Throw away any remaining data. */ while (sectors_to_transfer > 0) { char dum[SECTOR_SIZE]; - cdrom_in_bytes (drive, dum, sizeof (dum)); + atapi_input_bytes (drive, dum, sizeof (dum)); --sectors_to_transfer; } } @@ -929,7 +1028,7 @@ and quit this request. */ while (len > 0) { int dum = 0; - cdrom_out_bytes (drive, &dum, sizeof (dum)); + atapi_output_bytes (drive, &dum, sizeof (dum)); len -= sizeof (dum); } } else { @@ -950,12 +1049,34 @@ { int stat; int ireason, len, sectors_to_transfer, nskip; + struct cdrom_info *info = drive->driver_data; + int i, dma = info->dma, dma_error = 0; struct request *rq = HWGROUP(drive)->rq; /* Check for errors. */ + if (dma) { + info->dma = 0; + if ((dma_error = HWIF(drive)->dmaproc(ide_dma_status_bad, drive))) { + printk ("%s: disabled DMA\n", drive->name); + drive->using_dma = 0; + } + (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive)); + } + if (cdrom_decode_status (drive, 0, &stat)) return; + if (dma) { + if (!dma_error) { + for (i = rq->nr_sectors; i > 0;) { + i -= rq->current_nr_sectors; + ide_end_request(1, HWGROUP(drive)); + } + } else + ide_error (drive, "dma error", stat); + return; + } + /* Read the interrupt reason and the transfer length. */ ireason = IN_BYTE (IDE_NSECTOR_REG); len = IN_BYTE (IDE_LCYL_REG) + 256 * IN_BYTE (IDE_HCYL_REG); @@ -1000,7 +1121,7 @@ while (nskip > 0) { /* We need to throw away a sector. */ char dum[SECTOR_SIZE]; - cdrom_in_bytes (drive, dum, sizeof (dum)); + atapi_input_bytes (drive, dum, sizeof (dum)); --rq->current_nr_sectors; --nskip; @@ -1033,8 +1154,8 @@ /* Read this_transfer sectors into the current buffer. */ while (this_transfer > 0) { - cdrom_in_bytes (drive - , rq->buffer, SECTOR_SIZE); + atapi_input_bytes (drive, + rq->buffer, SECTOR_SIZE); rq->buffer += SECTOR_SIZE; --rq->nr_sectors; --rq->current_nr_sectors; @@ -1057,7 +1178,7 @@ */ static int cdrom_read_from_buffer (ide_drive_t *drive) { - struct cdrom_info *info = &drive->cdrom_info; + struct cdrom_info *info = drive->driver_data; struct request *rq = HWGROUP(drive)->rq; /* Can't do anything if there's no buffer. */ @@ -1177,6 +1298,7 @@ */ static void cdrom_start_read (ide_drive_t *drive, unsigned int block) { + struct cdrom_info *info = drive->driver_data; struct request *rq = HWGROUP(drive)->rq; int minor = MINOR (rq->rq_dev); @@ -1197,7 +1319,12 @@ return; /* Clear the local sector buffer. */ - drive->cdrom_info.nsectors_buffered = 0; + info->nsectors_buffered = 0; + + if (drive->using_dma && (rq->sector % SECTORS_PER_FRAME == 0) && (rq->nr_sectors % SECTORS_PER_FRAME == 0)) + info->dma = 1; + else + info->dma = 0; /* Start sending the read request to the drive. */ cdrom_start_packet_command (drive, 32768, @@ -1274,13 +1401,13 @@ } /* Transfer the data. */ - cdrom_out_bytes (drive, pc->buffer, thislen); + atapi_output_bytes (drive, pc->buffer, thislen); /* If we haven't moved enough data to satisfy the drive, add some padding. */ while (len > thislen) { int dum = 0; - cdrom_out_bytes (drive, &dum, sizeof (dum)); + atapi_output_bytes (drive, &dum, sizeof (dum)); len -= sizeof (dum); } @@ -1301,13 +1428,13 @@ } /* Transfer the data. */ - cdrom_in_bytes (drive, pc->buffer, thislen); + atapi_input_bytes (drive, pc->buffer, thislen); /* If we haven't moved enough data to satisfy the drive, add some padding. */ while (len > thislen) { int dum = 0; - cdrom_in_bytes (drive, &dum, sizeof (dum)); + atapi_input_bytes (drive, &dum, sizeof (dum)); len -= sizeof (dum); } @@ -1342,6 +1469,9 @@ int len; struct request *rq = HWGROUP(drive)->rq; struct packet_command *pc = (struct packet_command *)rq->buffer; + struct cdrom_info *info = drive->driver_data; + + info->dma = 0; len = pc->buflen; if (len < 0) len = -len; @@ -1432,10 +1562,8 @@ * cdrom driver request routine. */ -void ide_do_rw_cdrom (ide_drive_t *drive, unsigned long block) +void ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, unsigned long block) { - struct request *rq = HWGROUP(drive)->rq; - if (rq -> cmd == PACKET_COMMAND || rq -> cmd == REQUEST_SENSE_COMMAND) cdrom_do_packet_command (drive); else if (rq -> cmd == RESET_DRIVE_COMMAND) { @@ -1670,7 +1798,8 @@ struct atapi_request_sense *reqbuf) { int stat, ntracks, i; - struct atapi_toc *toc = drive->cdrom_info.toc; + struct cdrom_info *info = drive->driver_data; + struct atapi_toc *toc = info->toc; struct { struct atapi_toc_header hdr; struct atapi_toc_entry ent; @@ -1680,7 +1809,7 @@ /* Try to allocate space. */ toc = (struct atapi_toc *) kmalloc (sizeof (struct atapi_toc), GFP_KERNEL); - drive->cdrom_info.toc = toc; + info->toc = toc; } if (toc == NULL) { @@ -1904,6 +2033,7 @@ struct atapi_toc_entry **ent, struct atapi_request_sense *reqbuf) { + struct cdrom_info *info = drive->driver_data; int stat, ntracks; struct atapi_toc *toc; @@ -1911,7 +2041,7 @@ stat = cdrom_read_toc (drive, reqbuf); if (stat) return stat; - toc = drive->cdrom_info.toc; + toc = info->toc; /* Check validity of requested track number. */ ntracks = toc->hdr.last_track - toc->hdr.first_track + 1; @@ -2026,6 +2156,8 @@ int ide_cdrom_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { + struct cdrom_info *info = drive->driver_data; + switch (cmd) { case CDROMEJECT: { int stat; @@ -2133,7 +2265,7 @@ stat = cdrom_read_toc (drive, NULL); if (stat) return stat; - toc = drive->cdrom_info.toc; + toc = info->toc; tochdr.cdth_trk0 = toc->hdr.first_track; tochdr.cdth_trk1 = toc->hdr.last_track; @@ -2300,7 +2432,7 @@ stat = cdrom_read_toc (drive, NULL); if (stat) return stat; - toc = drive->cdrom_info.toc; + toc = info->toc; if (ms_info.addr_format == CDROM_MSF) lba_to_msf (toc->last_session_lba, @@ -2330,7 +2462,7 @@ stat = cdrom_read_toc (drive, NULL); if (stat) return stat; - toc = drive->cdrom_info.toc; + toc = info->toc; stat = verify_area (VERIFY_READ, (char *)arg, sizeof (ra)); if (stat) return stat; @@ -2411,7 +2543,7 @@ stat = cdrom_read_toc (drive, NULL); if (stat) return stat; - toc = drive->cdrom_info.toc; + toc = info->toc; if (lba < 0 || lba >= toc->capacity) return -EINVAL; @@ -2569,6 +2701,8 @@ return -EROFS; } + MOD_INC_USE_COUNT; + /* If this is the first open, check the drive status. */ if (drive->usage == 1) { int stat; @@ -2613,6 +2747,7 @@ if (CDROM_STATE_FLAGS (drive)->eject_on_close) (void) cdrom_eject (drive, 0, NULL); } + MOD_DEC_USE_COUNT; } @@ -2623,6 +2758,8 @@ void ide_cdrom_setup (ide_drive_t *drive) { + struct cdrom_info *info = drive->driver_data; + blksize_size[HWIF(drive)->major][drive->select.b.unit << PARTN_BITS] = CD_FRAMESIZE; @@ -2705,12 +2842,97 @@ } #endif /* not STANDARD_ATAPI */ - drive->cdrom_info.toc = NULL; - drive->cdrom_info.sector_buffer = NULL; - drive->cdrom_info.sector_buffered = 0; - drive->cdrom_info.nsectors_buffered = 0; + info->toc = NULL; + info->sector_buffer = NULL; + info->sector_buffered = 0; + info->nsectors_buffered = 0; +} + +int ide_cdrom_cleanup(ide_drive_t *drive) +{ + struct cdrom_info *info = drive->driver_data; + + if (ide_unregister_subdriver (drive)) + return 1; + if (info->sector_buffer != NULL) + kfree (info->sector_buffer); + if (info->toc != NULL) + kfree (info->toc); + kfree (info); + drive->driver_data = NULL; + return 0; +} + +int ide_cdrom_init (void); +static ide_module_t ide_cdrom_module = { + IDE_DRIVER_MODULE, + ide_cdrom_init, + NULL +}; + +static ide_driver_t ide_cdrom_driver = { + ide_cdrom, /* media */ + 0, /* busy */ + 1, /* supports_dma */ + ide_cdrom_cleanup, /* cleanup */ + ide_do_rw_cdrom, /* do_request */ + NULL, /* ??? or perhaps cdrom_end_request? */ + ide_cdrom_ioctl, /* ioctl */ + ide_cdrom_open, /* open */ + ide_cdrom_release, /* release */ + ide_cdrom_check_media_change, /* media_change */ + NULL, /* pre_reset */ + NULL, /* capacity */ + NULL /* special */ +}; + +int ide_cdrom_init (void) +{ + ide_drive_t *drive; + struct cdrom_info *info; + int failed = 0; + + MOD_INC_USE_COUNT; + while ((drive = ide_scan_devices (ide_cdrom, NULL, failed++)) != NULL) { + info = (struct cdrom_info *) kmalloc (sizeof (struct cdrom_info), GFP_KERNEL); + if (info == NULL) { + printk ("%s: Can't allocate a cdrom structure\n", drive->name); + continue; + } + if (ide_register_subdriver (drive, &ide_cdrom_driver, IDE_SUBDRIVER_VERSION)) { + printk ("%s: Failed to register the driver with ide.c\n", drive->name); + kfree (info); + continue; + } + failed--; + memset (info, 0, sizeof (struct cdrom_info)); + drive->driver_data = info; + ide_cdrom_setup (drive); + } + ide_register_module(&ide_cdrom_module); + MOD_DEC_USE_COUNT; + return 0; +} + +#ifdef MODULE +int init_module (void) +{ + return ide_cdrom_init(); } +void cleanup_module(void) +{ + ide_drive_t *drive; + int failed = 0; + + while ((drive = ide_scan_devices (ide_cdrom, &ide_cdrom_driver, failed)) != NULL) + if (ide_cdrom_cleanup (drive)) { + printk ("%s: cleanup_module() called while still busy\n", drive->name); + failed++; + } + ide_unregister_module (&ide_cdrom_module); +} +#endif /* MODULE */ /* @@ -2720,7 +2942,6 @@ * Query the drive to find what features are available * before trying to use them. * Integrate spindown time adjustment patch. - * Modularize. * CDROMRESET ioctl. * Better support for changers. */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide-disk.c linux/drivers/block/ide-disk.c --- v2.1.7/linux/drivers/block/ide-disk.c Thu Jan 1 02:00:00 1970 +++ linux/drivers/block/ide-disk.c Wed Nov 6 14:49:31 1996 @@ -0,0 +1,660 @@ +/* + * linux/drivers/block/ide-disk.c Version 1.0 Oct 6, 1996 + * + * Copyright (C) 1994-1996 Linus Torvalds & authors (see below) + */ + +/* + * Maintained by Mark Lord + * and Gadi Oxman + * + * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c. + * + * From hd.c: + * | + * | It traverses the request-list, using interrupts to jump between functions. + * | As nearly all functions can be called within interrupts, we may not sleep. + * | Special care is recommended. Have Fun! + * | + * | modified by Drew Eckhardt to check nr of hd's from the CMOS. + * | + * | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug + * | in the early extended-partition checks and added DM partitions. + * | + * | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI). + * | + * | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", + * | and general streamlining by Mark Lord (mlord@pobox.com). + * + * October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by: + * + * Mark Lord (mlord@pobox.com) (IDE Perf.Pkg) + * Delman Lee (delman@mipg.upenn.edu) ("Mr. atdisk2") + * Scott Snyder (snyder@fnald0.fnal.gov) (ATAPI IDE cd-rom) + * + * This was a rewrite of just about everything from hd.c, though some original + * code is still sprinkled about. Think of it as a major evolution, with + * inspiration from lots of linux users, esp. hamish@zot.apana.org.au + * + * Version 1.0 move disk only code from ide.c to ide-disk.c + * support optional byte-swapping of all data + */ + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ide.h" + +static void idedisk_bswap_data (void *buffer, int wcount) +{ + u16 *p = buffer; + + while (wcount--) { + *p++ = *p << 8 | *p >> 8; + *p++ = *p << 8 | *p >> 8; + } +} + +static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + ide_input_data(drive, buffer, wcount); + if (drive->bswap) + idedisk_bswap_data(buffer, wcount); +} + +static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + ide_output_data(drive, buffer, wcount); + if (drive->bswap) + idedisk_bswap_data(buffer, wcount); +} + +/* + * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity" + * value for this drive (from its reported identification information). + * + * Returns: 1 if lba_capacity looks sensible + * 0 otherwise + */ +static int lba_capacity_is_ok (struct hd_driveid *id) +{ + unsigned long lba_sects = id->lba_capacity; + unsigned long chs_sects = id->cyls * id->heads * id->sectors; + unsigned long _10_percent = chs_sects / 10; + + /* perform a rough sanity check on lba_sects: within 10% is "okay" */ + if ((lba_sects - chs_sects) < _10_percent) + return 1; /* lba_capacity is good */ + + /* some drives have the word order reversed */ + lba_sects = (lba_sects << 16) | (lba_sects >> 16); + if ((lba_sects - chs_sects) < _10_percent) { + id->lba_capacity = lba_sects; /* fix it */ + return 1; /* lba_capacity is (now) good */ + } + return 0; /* lba_capacity value is bad */ +} + +/* + * read_intr() is the handler for disk read/multread interrupts + */ +static void read_intr (ide_drive_t *drive) +{ + byte stat; + int i; + unsigned int msect, nsect; + struct request *rq; + + if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { + ide_error(drive, "read_intr", stat); + return; + } + msect = drive->mult_count; +read_next: + rq = HWGROUP(drive)->rq; + if (msect) { + if ((nsect = rq->current_nr_sectors) > msect) + nsect = msect; + msect -= nsect; + } else + nsect = 1; + idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); +#ifdef DEBUG + printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", + drive->name, rq->sector, rq->sector+nsect-1, + (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); +#endif + rq->sector += nsect; + rq->buffer += nsect<<9; + rq->errors = 0; + i = (rq->nr_sectors -= nsect); + if ((rq->current_nr_sectors -= nsect) <= 0) + ide_end_request(1, HWGROUP(drive)); + if (i > 0) { + if (msect) + goto read_next; + ide_set_handler (drive, &read_intr, WAIT_CMD); + } +} + +/* + * write_intr() is the handler for disk write interrupts + */ +static void write_intr (ide_drive_t *drive) +{ + byte stat; + int i; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq = hwgroup->rq; + + if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { +#ifdef DEBUG + printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n", + drive->name, rq->sector, (unsigned long) rq->buffer, + rq->nr_sectors-1); +#endif + if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { + rq->sector++; + rq->buffer += 512; + rq->errors = 0; + i = --rq->nr_sectors; + --rq->current_nr_sectors; + if (rq->current_nr_sectors <= 0) + ide_end_request(1, hwgroup); + if (i > 0) { + idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + ide_set_handler (drive, &write_intr, WAIT_CMD); + } + return; + } + } + ide_error(drive, "write_intr", stat); +} + +/* + * ide_multwrite() transfers a block of up to mcount sectors of data + * to a drive as part of a disk multiple-sector write operation. + */ +void ide_multwrite (ide_drive_t *drive, unsigned int mcount) +{ + struct request *rq = &HWGROUP(drive)->wrq; + + do { + unsigned int nsect = rq->current_nr_sectors; + if (nsect > mcount) + nsect = mcount; + mcount -= nsect; + + idedisk_output_data(drive, rq->buffer, nsect<<7); +#ifdef DEBUG + printk("%s: multwrite: sector %ld, buffer=0x%08lx, count=%d, remaining=%ld\n", + drive->name, rq->sector, (unsigned long) rq->buffer, + nsect, rq->nr_sectors - nsect); +#endif + if ((rq->nr_sectors -= nsect) <= 0) + break; + if ((rq->current_nr_sectors -= nsect) == 0) { + if ((rq->bh = rq->bh->b_reqnext) != NULL) { + rq->current_nr_sectors = rq->bh->b_size>>9; + rq->buffer = rq->bh->b_data; + } else { + panic("%s: buffer list corrupted\n", drive->name); + break; + } + } else { + rq->buffer += nsect << 9; + } + } while (mcount); +} + +/* + * multwrite_intr() is the handler for disk multwrite interrupts + */ +static void multwrite_intr (ide_drive_t *drive) +{ + byte stat; + int i; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq = &hwgroup->wrq; + + if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { + if (stat & DRQ_STAT) { + if (rq->nr_sectors) { + ide_multwrite(drive, drive->mult_count); + ide_set_handler (drive, &multwrite_intr, WAIT_CMD); + return; + } + } else { + if (!rq->nr_sectors) { /* all done? */ + rq = hwgroup->rq; + for (i = rq->nr_sectors; i > 0;){ + i -= rq->current_nr_sectors; + ide_end_request(1, hwgroup); + } + return; + } + } + } + ide_error(drive, "multwrite_intr", stat); +} + +/* + * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. + */ +static void set_multmode_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + + if (OK_STAT(stat,READY_STAT,BAD_STAT)) { + drive->mult_count = drive->mult_req; + } else { + drive->mult_req = drive->mult_count = 0; + drive->special.b.recalibrate = 1; + (void) ide_dump_status(drive, "set_multmode", stat); + } +} + +/* + * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. + */ +static void set_geometry_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + + if (!OK_STAT(stat,READY_STAT,BAD_STAT)) + ide_error(drive, "set_geometry_intr", stat); +} + +/* + * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. + */ +static void recal_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + + if (!OK_STAT(stat,READY_STAT,BAD_STAT)) + ide_error(drive, "recal_intr", stat); +} + +/* + * do_rw_disk() issues READ and WRITE commands to a disk, + * using LBA if supported, or CHS otherwise, to address sectors. + * It also takes care of issuing special DRIVE_CMDs. + */ +static void do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ +#ifdef CONFIG_BLK_DEV_PROMISE + ide_hwif_t *hwif = HWIF(drive); + int use_promise_io = 0; +#endif /* CONFIG_BLK_DEV_PROMISE */ + + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); + OUT_BYTE(rq->nr_sectors,IDE_NSECTOR_REG); +#ifdef CONFIG_BLK_DEV_PROMISE + if (IS_PROMISE_DRIVE) { + if (hwif->is_promise2 || rq->cmd == READ) { + use_promise_io = 1; + } + } + if (drive->select.b.lba || use_promise_io) { +#else /* !CONFIG_BLK_DEV_PROMISE */ + if (drive->select.b.lba) { +#endif /* CONFIG_BLK_DEV_PROMISE */ +#ifdef DEBUG + printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n", + drive->name, (rq->cmd==READ)?"read":"writ", + block, rq->nr_sectors, (unsigned long) rq->buffer); +#endif + OUT_BYTE(block,IDE_SECTOR_REG); + OUT_BYTE(block>>=8,IDE_LCYL_REG); + OUT_BYTE(block>>=8,IDE_HCYL_REG); + OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG); + } else { + unsigned int sect,head,cyl,track; + track = block / drive->sect; + sect = block % drive->sect + 1; + OUT_BYTE(sect,IDE_SECTOR_REG); + head = track % drive->head; + cyl = track / drive->head; + OUT_BYTE(cyl,IDE_LCYL_REG); + OUT_BYTE(cyl>>8,IDE_HCYL_REG); + OUT_BYTE(head|drive->select.all,IDE_SELECT_REG); +#ifdef DEBUG + printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n", + drive->name, (rq->cmd==READ)?"read":"writ", cyl, + head, sect, rq->nr_sectors, (unsigned long) rq->buffer); +#endif + } +#ifdef CONFIG_BLK_DEV_PROMISE + if (use_promise_io) { + do_promise_io (drive, rq); + return; + } +#endif /* CONFIG_BLK_DEV_PROMISE */ + if (rq->cmd == READ) { +#ifdef CONFIG_BLK_DEV_TRITON + if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive))) + return; +#endif /* CONFIG_BLK_DEV_TRITON */ + ide_set_handler(drive, &read_intr, WAIT_CMD); + OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG); + return; + } + if (rq->cmd == WRITE) { +#ifdef CONFIG_BLK_DEV_TRITON + if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive))) + return; +#endif /* CONFIG_BLK_DEV_TRITON */ + OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG); + if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, + drive->mult_count ? "MULTWRITE" : "WRITE"); + return; + } + if (!drive->unmask) + cli(); + if (drive->mult_count) { + HWGROUP(drive)->wrq = *rq; /* scratchpad */ + ide_set_handler (drive, &multwrite_intr, WAIT_CMD); + ide_multwrite(drive, drive->mult_count); + } else { + ide_set_handler (drive, &write_intr, WAIT_CMD); + idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + } + return; + } + printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); +} + +static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + MOD_INC_USE_COUNT; + if (drive->removable && drive->usage == 1) { + byte door_lock[] = {WIN_DOORLOCK,0,0,0}; + struct request rq; + check_disk_change(inode->i_rdev); + ide_init_drive_cmd (&rq); + rq.buffer = door_lock; + /* + * Ignore the return code from door_lock, + * since the open() has already succeeded, + * and the door_lock is irrelevant at this point. + */ + (void) ide_do_drive_cmd(drive, &rq, ide_wait); + } + return 0; +} + +static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + if (drive->removable && !drive->usage) { + byte door_unlock[] = {WIN_DOORUNLOCK,0,0,0}; + struct request rq; + invalidate_buffers(inode->i_rdev); + ide_init_drive_cmd (&rq); + rq.buffer = door_unlock; + (void) ide_do_drive_cmd(drive, &rq, ide_wait); + } + MOD_DEC_USE_COUNT; +} + +static int idedisk_media_change (ide_drive_t *drive) +{ + return drive->removable; /* if removable, always assume it was changed */ +} + +/* + * current_capacity() returns the capacity (in sectors) of a drive + * according to its current geometry/LBA settings. + */ +static unsigned long idedisk_capacity (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + unsigned long capacity = drive->cyl * drive->head * drive->sect; + + drive->select.b.lba = 0; + /* Determine capacity, and use LBA if the drive properly supports it */ + if (id != NULL && (id->capability & 2) && lba_capacity_is_ok(id)) { + if (id->lba_capacity >= capacity) { + capacity = id->lba_capacity; + drive->select.b.lba = 1; + } + } + return (capacity - drive->sect0); +} + +static void idedisk_special (ide_drive_t *drive) +{ + special_t *s = &drive->special; + + if (s->b.set_geometry) { + s->b.set_geometry = 0; + OUT_BYTE(drive->sect,IDE_SECTOR_REG); + OUT_BYTE(drive->cyl,IDE_LCYL_REG); + OUT_BYTE(drive->cyl>>8,IDE_HCYL_REG); + OUT_BYTE(((drive->head-1)|drive->select.all)&0xBF,IDE_SELECT_REG); + if (!IS_PROMISE_DRIVE) + ide_cmd(drive, WIN_SPECIFY, drive->sect, &set_geometry_intr); + } else if (s->b.recalibrate) { + s->b.recalibrate = 0; + if (!IS_PROMISE_DRIVE) + ide_cmd(drive, WIN_RESTORE, drive->sect, &recal_intr); + } else if (s->b.set_multmode) { + s->b.set_multmode = 0; + if (drive->id && drive->mult_req > drive->id->max_multsect) + drive->mult_req = drive->id->max_multsect; + if (!IS_PROMISE_DRIVE) + ide_cmd(drive, WIN_SETMULT, drive->mult_req, &set_multmode_intr); + } else if (s->all) { + int special = s->all; + s->all = 0; + printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special); + } +} + +static void idedisk_pre_reset (ide_drive_t *drive) +{ + drive->special.all = 0; + drive->special.b.set_geometry = 1; + drive->special.b.recalibrate = 1; + if (OK_TO_RESET_CONTROLLER) + drive->mult_count = 0; + if (!drive->keep_settings) + drive->mult_req = 0; + if (drive->mult_req != drive->mult_count) + drive->special.b.set_multmode = 1; +} + +int idedisk_init (void); +static ide_module_t idedisk_module = { + IDE_DRIVER_MODULE, + idedisk_init, + NULL +}; + +/* + * IDE subdriver functions, registered with ide.c + */ +static ide_driver_t idedisk_driver = { + ide_disk, /* media */ + 0, /* busy */ + 1, /* supports_dma */ + NULL, /* cleanup */ + do_rw_disk, /* do_request */ + NULL, /* end_request */ + NULL, /* ioctl */ + idedisk_open, /* open */ + idedisk_release, /* release */ + idedisk_media_change, /* media_change */ + idedisk_pre_reset, /* pre_reset */ + idedisk_capacity, /* capacity */ + idedisk_special /* special */ +}; + +static int idedisk_cleanup (ide_drive_t *drive) +{ + return ide_unregister_subdriver(drive); +} + +static int idedisk_identify_device (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + + if (id == NULL) + return 0; + + /* SunDisk drives: force one unit */ + if (id->model[0] == 'S' && id->model[1] == 'u' && (drive->select.all & (1<<4))) + return 1; + + return 0; +} + +static void idedisk_setup (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + unsigned long capacity, check; + + if (id == NULL) + return; + + /* check for removable disks (eg. SYQUEST), ignore 'WD' drives */ + if (id->config & (1<<7)) { /* removable disk ? */ + if (id->model[0] != 'W' || id->model[1] != 'D') + drive->removable = 1; + } + + /* SunDisk drives: treat as non-removable */ + if (id->model[0] == 'S' && id->model[1] == 'u') + drive->removable = 0; + + /* Extract geometry if we did not already have one for the drive */ + if (!drive->cyl || !drive->head || !drive->sect) { + drive->cyl = drive->bios_cyl = id->cyls; + drive->head = drive->bios_head = id->heads; + drive->sect = drive->bios_sect = id->sectors; + } + /* Handle logical geometry translation by the drive */ + if ((id->field_valid & 1) && id->cur_cyls && id->cur_heads + && (id->cur_heads <= 16) && id->cur_sectors) + { + /* + * Extract the physical drive geometry for our use. + * Note that we purposely do *not* update the bios info. + * This way, programs that use it (like fdisk) will + * still have the same logical view as the BIOS does, + * which keeps the partition table from being screwed. + * + * An exception to this is the cylinder count, + * which we reexamine later on to correct for 1024 limitations. + */ + drive->cyl = id->cur_cyls; + drive->head = id->cur_heads; + drive->sect = id->cur_sectors; + + /* check for word-swapped "capacity" field in id information */ + capacity = drive->cyl * drive->head * drive->sect; + check = (id->cur_capacity0 << 16) | id->cur_capacity1; + if (check == capacity) { /* was it swapped? */ + /* yes, bring it into little-endian order: */ + id->cur_capacity0 = (capacity >> 0) & 0xffff; + id->cur_capacity1 = (capacity >> 16) & 0xffff; + } + } + /* Use physical geometry if what we have still makes no sense */ + if ((!drive->head || drive->head > 16) && id->heads && id->heads <= 16) { + drive->cyl = id->cyls; + drive->head = id->heads; + drive->sect = id->sectors; + } + /* Correct the number of cyls if the bios value is too small */ + if (drive->sect == drive->bios_sect && drive->head == drive->bios_head) { + if (drive->cyl > drive->bios_cyl) + drive->bios_cyl = drive->cyl; + } + + (void) idedisk_capacity (drive); /* initialize LBA selection */ + + printk (KERN_INFO "%s: %.40s, %ldMB w/%dkB Cache, %sCHS=%d/%d/%d%s\n", + drive->name, id->model, idedisk_capacity(drive)/2048L, id->buf_size/2, + drive->select.b.lba ? "LBA, " : "", + drive->bios_cyl, drive->bios_head, drive->bios_sect, + drive->using_dma ? ", DMA" : ""); + + drive->mult_count = 0; + if (id->max_multsect) { + drive->mult_req = INITIAL_MULT_COUNT; + if (drive->mult_req > id->max_multsect) + drive->mult_req = id->max_multsect; + if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect)) + drive->special.b.set_multmode = 1; + } +} + +int idedisk_init (void) +{ + ide_drive_t *drive; + int failed = 0; + + MOD_INC_USE_COUNT; + while ((drive = ide_scan_devices (ide_disk, NULL, failed++)) != NULL) { + if (idedisk_identify_device (drive)) + continue; + if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name); + continue; + } + idedisk_setup(drive); + if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { + printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head); + (void) idedisk_cleanup(drive); + continue; + } + failed--; + } + ide_register_module(&idedisk_module); + MOD_DEC_USE_COUNT; + return 0; +} + +#ifdef MODULE +int init_module (void) +{ + return idedisk_init(); +} + +void cleanup_module (void) +{ + ide_drive_t *drive; + int failed = 0; + + while ((drive = ide_scan_devices (ide_disk, &idedisk_driver, failed)) != NULL) + if (idedisk_cleanup (drive)) { + printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name); + failed++; + } + ide_unregister_module(&idedisk_module); +} +#endif /* MODULE */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide-floppy.c linux/drivers/block/ide-floppy.c --- v2.1.7/linux/drivers/block/ide-floppy.c Thu Jan 1 02:00:00 1970 +++ linux/drivers/block/ide-floppy.c Wed Nov 6 14:49:31 1996 @@ -0,0 +1,1432 @@ +/* + * linux/drivers/block/ide-floppy.c Version 0.2 - ALPHA Oct 31, 1996 + * + * Copyright (C) 1996 Gadi Oxman + */ + +/* + * IDE ATAPI floppy driver. + * + * The driver currently doesn't have any fancy features, just the bare + * minimum read/write support. + * + * Many thanks to Lode Leroy , who tested so many + * ALPHA patches to this driver on an EASYSTOR LS-120 ATAPI floppy drive. + * + * Ver 0.1 Oct 17 96 Initial test version, mostly based on ide-tape.c. + * Ver 0.2 Oct 31 96 Minor changes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * Main Linux ide driver include file + */ +#include "ide.h" + +/* + * The following are used to debug the driver. + */ +#define IDEFLOPPY_DEBUG_LOG 0 +#define IDEFLOPPY_DEBUG_INFO 0 +#define IDEFLOPPY_DEBUG_BUGS 1 + +/* + * After each failed packet command we issue a request sense command + * and retry the packet command IDEFLOPPY_MAX_PC_RETRIES times. + */ +#define IDEFLOPPY_MAX_PC_RETRIES 3 + +/* + * With each packet command, we allocate a buffer of + * IDEFLOPPY_PC_BUFFER_SIZE bytes. + */ +#define IDEFLOPPY_PC_BUFFER_SIZE 256 + +/* + * In various places in the driver, we need to allocate storage + * for packet commands and requests, which will remain valid while + * we leave the driver to wait for an interrupt or a timeout event. + */ +#define IDEFLOPPY_PC_STACK (10 + IDEFLOPPY_MAX_PC_RETRIES) + +/* + * Our view of a packet command. + */ +typedef struct idefloppy_packet_command_s { + u8 c[12]; /* Actual packet bytes */ + int retries; /* On each retry, we increment retries */ + int error; /* Error code */ + int request_transfer; /* Bytes to transfer */ + int actually_transferred; /* Bytes actually transferred */ + int buffer_size; /* Size of our data buffer */ + char *b_data; /* Pointer which runs on the buffers */ + int b_count; /* Missing/Available data on the current buffer */ + struct request *rq; /* The corresponding request */ + byte *buffer; /* Data buffer */ + byte *current_position; /* Pointer into the above buffer */ + void (*callback) (ide_drive_t *); /* Called when this packet command is completed */ + byte pc_buffer[IDEFLOPPY_PC_BUFFER_SIZE]; /* Temporary buffer */ + unsigned int flags; /* Status/Action bit flags */ +} idefloppy_pc_t; + +/* + * Packet command flag bits. + */ +#define PC_ABORT 0 /* Set when an error is considered normal - We won't retry */ +#define PC_DMA_RECOMMENDED 2 /* 1 when we prefer to use DMA if possible */ +#define PC_DMA_IN_PROGRESS 3 /* 1 while DMA in progress */ +#define PC_DMA_ERROR 4 /* 1 when encountered problem during DMA */ +#define PC_WRITING 5 /* Data direction */ + +/* + * Removable Block Access Capabilities Page + */ +typedef struct { + unsigned page_code :6; /* Page code - Should be 0x1b */ + unsigned reserved1_6 :1; /* Reserved */ + unsigned ps :1; /* Should be 0 */ + u8 page_length; /* Page Length - Should be 0xa */ + unsigned reserved2 :6; + unsigned srfp :1; /* Supports reporting progress of format */ + unsigned sflp :1; /* System floppy type device */ + unsigned tlun :3; /* Total logical units supported by the device */ + unsigned reserved3 :3; + unsigned sml :1; /* Single / Multiple lun supported */ + unsigned ncd :1; /* Non cd optical device */ + u8 reserved[8]; +} idefloppy_capabilities_page_t; + +/* + * Flexible disk page. + */ +typedef struct { + unsigned page_code :6; /* Page code - Should be 0x5 */ + unsigned reserved1_6 :1; /* Reserved */ + unsigned ps :1; /* The device is capable of saving the page */ + u8 page_length; /* Page Length - Should be 0x1e */ + u16 transfer_rate; /* In kilobits per second */ + u8 heads, sectors; /* Number of heads, Number of sectors per track */ + u16 sector_size; /* Byes per sector */ + u16 cyls; /* Number of cylinders */ + u8 reserved10[10]; + u8 motor_delay; /* Motor off delay */ + u8 reserved21[7]; + u16 rpm; /* Rotations per minute */ + u8 reserved30[2]; +} idefloppy_flexible_disk_page_t; + +/* + * Format capacity + */ +typedef struct { + u8 reserved[3]; + u8 length; /* Length of the following descriptors in bytes */ +} idefloppy_capacity_header_t; + +typedef struct { + u32 blocks; /* Number of blocks */ + unsigned dc :2; /* Descriptor Code */ + unsigned reserved :6; + u8 length_msb; /* Block Length (MSB)*/ + u16 length; /* Block Length */ +} idefloppy_capacity_descriptor_t; + +#define CAPACITY_INVALID 0x00 +#define CAPACITY_UNFORMATTED 0x01 +#define CAPACITY_CURRENT 0x02 +#define CAPACITY_NO_CARTRIDGE 0x03 + +/* + * Most of our global data which we need to save even as we leave the + * driver due to an interrupt or a timer event is stored in a variable + * of type idefloppy_floppy_t, defined below. + */ +typedef struct { + ide_drive_t *drive; + + idefloppy_pc_t *pc; /* Current packet command */ + idefloppy_pc_t *failed_pc; /* Last failed packet command */ + idefloppy_pc_t pc_stack[IDEFLOPPY_PC_STACK];/* Packet command stack */ + int pc_stack_index; /* Next free packet command storage space */ + struct request rq_stack[IDEFLOPPY_PC_STACK]; + int rq_stack_index; /* We implement a circular array */ + + /* + * Last error information + */ + byte sense_key, asc, ascq; + + /* + * Device information + */ + int blocks, block_size, bs_factor; /* Current format */ + idefloppy_capacity_descriptor_t capacity; /* Last format capacity */ + idefloppy_flexible_disk_page_t flexible_disk_page; /* Copy of the flexible disk page */ + + unsigned int flags; /* Status/Action flags */ +} idefloppy_floppy_t; + +/* + * Floppy flag bits values. + */ +#define IDEFLOPPY_DRQ_INTERRUPT 0 /* DRQ interrupt device */ +#define IDEFLOPPY_MEDIA_CHANGED 1 /* Media may have changed */ +#define IDEFLOPPY_USE_READ12 2 /* Use READ12/WRITE12 or READ10/WRITE10 */ + +/* + * ATAPI floppy drive packet commands + */ +#define IDEFLOPPY_FORMAT_UNIT_CMD 0x04 +#define IDEFLOPPY_INQUIRY_CMD 0x12 +#define IDEFLOPPY_MODE_SELECT_CMD 0x55 +#define IDEFLOPPY_MODE_SENSE_CMD 0x5a +#define IDEFLOPPY_READ10_CMD 0x28 +#define IDEFLOPPY_READ12_CMD 0xa8 +#define IDEFLOPPY_READ_CAPACITY_CMD 0x23 +#define IDEFLOPPY_REQUEST_SENSE_CMD 0x03 +#define IDEFLOPPY_PREVENT_REMOVAL_CMD 0x1e +#define IDEFLOPPY_SEEK_CMD 0x2b +#define IDEFLOPPY_START_STOP_CMD 0x1b +#define IDEFLOPPY_TEST_UNIT_READY_CMD 0x00 +#define IDEFLOPPY_VERIFY_CMD 0x2f +#define IDEFLOPPY_WRITE10_CMD 0x2a +#define IDEFLOPPY_WRITE12_CMD 0xaa +#define IDEFLOPPY_WRITE_VERIFY_CMD 0x2e + +/* + * Defines for the mode sense command + */ +#define MODE_SENSE_CURRENT 0x00 +#define MODE_SENSE_CHANGEABLE 0x01 +#define MODE_SENSE_DEFAULT 0x02 +#define MODE_SENSE_SAVED 0x03 + +/* + * Special requests for our block device strategy routine. + */ +#define IDEFLOPPY_FIRST_RQ 90 + +/* + * IDEFLOPPY_PC_RQ is used to queue a packet command in the request queue. + */ +#define IDEFLOPPY_PC_RQ 90 + +#define IDEFLOPPY_LAST_RQ 90 + +/* + * A macro which can be used to check if a given request command + * originated in the driver or in the buffer cache layer. + */ +#define IDEFLOPPY_RQ_CMD(cmd) ((cmd >= IDEFLOPPY_FIRST_RQ) && (cmd <= IDEFLOPPY_LAST_RQ)) + +/* + * Error codes which are returned in rq->errors to the higher part + * of the driver. + */ +#define IDEFLOPPY_ERROR_GENERAL 101 + +/* + * The ATAPI Status Register. + */ +typedef union { + unsigned all :8; + struct { + unsigned check :1; /* Error occurred */ + unsigned idx :1; /* Reserved */ + unsigned corr :1; /* Correctable error occurred */ + unsigned drq :1; /* Data is request by the device */ + unsigned dsc :1; /* Media access command finished */ + unsigned reserved5 :1; /* Reserved */ + unsigned drdy :1; /* Ignored for ATAPI commands (ready to accept ATA command) */ + unsigned bsy :1; /* The device has access to the command block */ + } b; +} idefloppy_status_reg_t; + +/* + * The ATAPI error register. + */ +typedef union { + unsigned all :8; + struct { + unsigned ili :1; /* Illegal Length Indication */ + unsigned eom :1; /* End Of Media Detected */ + unsigned abrt :1; /* Aborted command - As defined by ATA */ + unsigned mcr :1; /* Media Change Requested - As defined by ATA */ + unsigned sense_key :4; /* Sense key of the last failed packet command */ + } b; +} idefloppy_error_reg_t; + +/* + * ATAPI Feature Register + */ +typedef union { + unsigned all :8; + struct { + unsigned dma :1; /* Using DMA or PIO */ + unsigned reserved321 :3; /* Reserved */ + unsigned reserved654 :3; /* Reserved (Tag Type) */ + unsigned reserved7 :1; /* Reserved */ + } b; +} idefloppy_feature_reg_t; + +/* + * ATAPI Byte Count Register. + */ +typedef union { + unsigned all :16; + struct { + unsigned low :8; /* LSB */ + unsigned high :8; /* MSB */ + } b; +} idefloppy_bcount_reg_t; + +/* + * ATAPI Interrupt Reason Register. + */ +typedef union { + unsigned all :8; + struct { + unsigned cod :1; /* Information transferred is command (1) or data (0) */ + unsigned io :1; /* The device requests us to read (1) or write (0) */ + unsigned reserved :6; /* Reserved */ + } b; +} idefloppy_ireason_reg_t; + +/* + * ATAPI floppy Drive Select Register + */ +typedef union { + unsigned all :8; + struct { + unsigned sam_lun :3; /* Logical unit number */ + unsigned reserved3 :1; /* Reserved */ + unsigned drv :1; /* The responding drive will be drive 0 (0) or drive 1 (1) */ + unsigned one5 :1; /* Should be set to 1 */ + unsigned reserved6 :1; /* Reserved */ + unsigned one7 :1; /* Should be set to 1 */ + } b; +} idefloppy_drivesel_reg_t; + +/* + * ATAPI Device Control Register + */ +typedef union { + unsigned all :8; + struct { + unsigned zero0 :1; /* Should be set to zero */ + unsigned nien :1; /* Device interrupt is disabled (1) or enabled (0) */ + unsigned srst :1; /* ATA software reset. ATAPI devices should use the new ATAPI srst. */ + unsigned one3 :1; /* Should be set to 1 */ + unsigned reserved4567 :4; /* Reserved */ + } b; +} idefloppy_control_reg_t; + +/* + * The following is used to format the general configuration word of + * the ATAPI IDENTIFY DEVICE command. + */ +struct idefloppy_id_gcw { + unsigned packet_size :2; /* Packet Size */ + unsigned reserved234 :3; /* Reserved */ + unsigned drq_type :2; /* Command packet DRQ type */ + unsigned removable :1; /* Removable media */ + unsigned device_type :5; /* Device type */ + unsigned reserved13 :1; /* Reserved */ + unsigned protocol :2; /* Protocol type */ +}; + +/* + * INQUIRY packet command - Data Format + */ +typedef struct { + unsigned device_type :5; /* Peripheral Device Type */ + unsigned reserved0_765 :3; /* Peripheral Qualifier - Reserved */ + unsigned reserved1_6t0 :7; /* Reserved */ + unsigned rmb :1; /* Removable Medium Bit */ + unsigned ansi_version :3; /* ANSI Version */ + unsigned ecma_version :3; /* ECMA Version */ + unsigned iso_version :2; /* ISO Version */ + unsigned response_format :4; /* Response Data Format */ + unsigned reserved3_45 :2; /* Reserved */ + unsigned reserved3_6 :1; /* TrmIOP - Reserved */ + unsigned reserved3_7 :1; /* AENC - Reserved */ + u8 additional_length; /* Additional Length (total_length-4) */ + u8 rsv5, rsv6, rsv7; /* Reserved */ + u8 vendor_id[8]; /* Vendor Identification */ + u8 product_id[16]; /* Product Identification */ + u8 revision_level[4]; /* Revision Level */ + u8 vendor_specific[20]; /* Vendor Specific - Optional */ + u8 reserved56t95[40]; /* Reserved - Optional */ + /* Additional information may be returned */ +} idefloppy_inquiry_result_t; + +/* + * REQUEST SENSE packet command result - Data Format. + */ +typedef struct { + unsigned error_code :7; /* Current error (0x70) */ + unsigned valid :1; /* The information field conforms to SFF-8070i */ + u8 reserved1 :8; /* Reserved */ + unsigned sense_key :4; /* Sense Key */ + unsigned reserved2_4 :1; /* Reserved */ + unsigned ili :1; /* Incorrect Length Indicator */ + unsigned reserved2_67 :2; + u32 information __attribute__ ((packed)); + u8 asl; /* Additional sense length (n-7) */ + u32 command_specific; /* Additional command specific information */ + u8 asc; /* Additional Sense Code */ + u8 ascq; /* Additional Sense Code Qualifier */ + u8 replaceable_unit_code; /* Field Replaceable Unit Code */ + u8 reserved[3]; + u8 pad[2]; /* Padding to 20 bytes */ +} idefloppy_request_sense_result_t; + +/* + * Pages of the SELECT SENSE / MODE SENSE packet commands. + */ +#define IDEFLOPPY_CAPABILITIES_PAGE 0x1b +#define IDEFLOPPY_FLEXIBLE_DISK_PAGE 0x05 + +/* + * Mode Parameter Header for the MODE SENSE packet command + */ +typedef struct { + u16 mode_data_length; /* Length of the following data transfer */ + u8 medium_type; /* Medium Type */ + unsigned reserved3 :7; + unsigned wp :1; /* Write protect */ + u8 reserved[4]; +} idefloppy_mode_parameter_header_t; + +#define IDEFLOPPY_MIN(a,b) ((a)<(b) ? (a):(b)) +#define IDEFLOPPY_MAX(a,b) ((a)>(b) ? (a):(b)) + +/* + * Too bad. The drive wants to send us data which we are not ready to accept. + * Just throw it away. + */ +static void idefloppy_discard_data (ide_drive_t *drive, unsigned int bcount) +{ + while (bcount--) + IN_BYTE (IDE_DATA_REG); +} + +#if IDEFLOPPY_DEBUG_BUGS +static void idefloppy_write_zeros (ide_drive_t *drive, unsigned int bcount) +{ + while (bcount--) + OUT_BYTE (0, IDE_DATA_REG); +} +#endif /* IDEFLOPPY_DEBUG_BUGS */ + +/* + * idefloppy_end_request is used to finish servicing a request. + * + * For read/write requests, we will call ide_end_request to pass to the + * next buffer. + */ +static void idefloppy_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +{ + ide_drive_t *drive = hwgroup->drive; + struct request *rq = hwgroup->rq; + +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "Reached idefloppy_end_request\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + if (!IDEFLOPPY_RQ_CMD (rq->cmd)) { + ide_end_request (uptodate, hwgroup); + return; + } + switch (uptodate) { + case 0: rq->errors = IDEFLOPPY_ERROR_GENERAL; break; + case 1: rq->errors = 0; break; + default: rq->errors = uptodate; + } + ide_end_drive_cmd (drive, 0, 0); +} + +static void idefloppy_input_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) +{ + struct request *rq = pc->rq; + struct buffer_head *bh = rq->bh; + int count; + + while (bcount) { +#if IDEFLOPPY_DEBUG_BUGS + if (bh == NULL) { + printk (KERN_ERR "%s: bh == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount); + idefloppy_discard_data (drive, bcount); + return; + } +#endif /* IDEFLOPPY_DEBUG_BUGS */ + count = IDEFLOPPY_MIN (bh->b_size - pc->b_count, bcount); + atapi_input_bytes (drive, bh->b_data + pc->b_count, count); + bcount -= count; pc->b_count += count; + if (pc->b_count == bh->b_size) { + rq->sector += rq->current_nr_sectors; + rq->nr_sectors -= rq->current_nr_sectors; + idefloppy_end_request (1, HWGROUP(drive)); + if ((bh = rq->bh) != NULL) + pc->b_count = 0; + } + } +} + +static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) +{ + struct request *rq = pc->rq; + struct buffer_head *bh = rq->bh; + int count; + + while (bcount) { +#if IDEFLOPPY_DEBUG_BUGS + if (bh == NULL) { + printk (KERN_ERR "%s: bh == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); + idefloppy_write_zeros (drive, bcount); + return; + } +#endif /* IDEFLOPPY_DEBUG_BUGS */ + count = IDEFLOPPY_MIN (pc->b_count, bcount); + atapi_output_bytes (drive, pc->b_data, count); + bcount -= count; pc->b_data += count; pc->b_count -= count; + if (!pc->b_count) { + rq->sector += rq->current_nr_sectors; + rq->nr_sectors -= rq->current_nr_sectors; + idefloppy_end_request (1, HWGROUP(drive)); + if ((bh = rq->bh) != NULL) { + pc->b_data = bh->b_data; + pc->b_count = bh->b_size; + } + } + } +} + +#ifdef CONFIG_BLK_DEV_TRITON +static void idefloppy_update_buffers (ide_drive_t *drive, idefloppy_pc_t *pc) +{ + struct request *rq = pc->rq; + struct buffer_head *bh = rq->bh; + + while ((bh = rq->bh) != NULL) + idefloppy_end_request (1, HWGROUP(drive)); +} +#endif /* CONFIG_BLK_DEV_TRITON */ + +/* + * idefloppy_queue_pc_head generates a new packet command request in front + * of the request queue, before the current request, so that it will be + * processed immediately, on the next pass through the driver. + */ +static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struct request *rq) +{ + ide_init_drive_cmd (rq); + rq->buffer = (char *) pc; + rq->cmd = IDEFLOPPY_PC_RQ; + (void) ide_do_drive_cmd (drive, rq, ide_preempt); +} + +static idefloppy_pc_t *idefloppy_next_pc_storage (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + if (floppy->pc_stack_index==IDEFLOPPY_PC_STACK) + floppy->pc_stack_index=0; + return (&floppy->pc_stack[floppy->pc_stack_index++]); +} + +static struct request *idefloppy_next_rq_storage (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + if (floppy->rq_stack_index==IDEFLOPPY_PC_STACK) + floppy->rq_stack_index=0; + return (&floppy->rq_stack[floppy->rq_stack_index++]); +} + +/* + * idefloppy_analyze_error is called on each failed packet command retry + * to analyze the request sense. + */ +static void idefloppy_analyze_error (ide_drive_t *drive,idefloppy_request_sense_result_t *result) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + floppy->sense_key = result->sense_key; floppy->asc = result->asc; floppy->ascq = result->ascq; +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: pc = %x, sense key = %x, asc = %x, ascq = %x\n",floppy->failed_pc->c[0],result->sense_key,result->asc,result->ascq); +#endif /* IDEFLOPPY_DEBUG_LOG */ +} + +static void idefloppy_request_sense_callback (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: Reached idefloppy_request_sense_callback\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + if (!floppy->pc->error) { + idefloppy_analyze_error (drive,(idefloppy_request_sense_result_t *) floppy->pc->buffer); + idefloppy_end_request (1,HWGROUP (drive)); + } else { + printk (KERN_ERR "Error in REQUEST SENSE itself - Aborting request!\n"); + idefloppy_end_request (0,HWGROUP (drive)); + } +} + +/* + * General packet command callback function. + */ +static void idefloppy_pc_callback (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: Reached idefloppy_pc_callback\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + idefloppy_end_request (floppy->pc->error ? 0:1, HWGROUP(drive)); +} + +/* + * idefloppy_init_pc initializes a packet command. + */ +static void idefloppy_init_pc (idefloppy_pc_t *pc) +{ + memset (pc->c, 0, 12); + pc->retries = 0; + pc->flags = 0; + pc->request_transfer = 0; + pc->buffer = pc->pc_buffer; + pc->buffer_size = IDEFLOPPY_PC_BUFFER_SIZE; + pc->b_data = NULL; + pc->callback = &idefloppy_pc_callback; +} + +static void idefloppy_create_request_sense_cmd (idefloppy_pc_t *pc) +{ + idefloppy_init_pc (pc); + pc->c[0] = IDEFLOPPY_REQUEST_SENSE_CMD; + pc->c[4] = 255; + pc->request_transfer = 18; + pc->callback = &idefloppy_request_sense_callback; +} + +/* + * idefloppy_retry_pc is called when an error was detected during the + * last packet command. We queue a request sense packet command in + * the head of the request list. + */ +static void idefloppy_retry_pc (ide_drive_t *drive) +{ + idefloppy_pc_t *pc; + struct request *rq; + idefloppy_error_reg_t error; + + error.all = IN_BYTE (IDE_ERROR_REG); + pc = idefloppy_next_pc_storage (drive); + rq = idefloppy_next_rq_storage (drive); + idefloppy_create_request_sense_cmd (pc); + idefloppy_queue_pc_head (drive, pc, rq); +} + +/* + * idefloppy_pc_intr is the usual interrupt handler which will be called + * during a packet command. + */ +static void idefloppy_pc_intr (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_status_reg_t status; + idefloppy_bcount_reg_t bcount; + idefloppy_ireason_reg_t ireason; + idefloppy_pc_t *pc=floppy->pc; + struct request *rq = pc->rq; + unsigned int temp; + +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: Reached idefloppy_pc_intr interrupt handler\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + +#ifdef CONFIG_BLK_DEV_TRITON + if (test_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { + if (HWIF(drive)->dmaproc(ide_dma_status_bad, drive)) { + set_bit (PC_DMA_ERROR, &pc->flags); + } else { + pc->actually_transferred=pc->request_transfer; + idefloppy_update_buffers (drive, pc); + } + (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive)); /* End DMA */ +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: DMA finished\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + } +#endif /* CONFIG_BLK_DEV_TRITON */ + + status.all = GET_STAT(); /* Clear the interrupt */ + + if (!status.b.drq) { /* No more interrupts */ +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred); +#endif /* IDEFLOPPY_DEBUG_LOG */ + clear_bit (PC_DMA_IN_PROGRESS, &pc->flags); + + ide_sti(); + + if (status.b.check || test_bit (PC_DMA_ERROR, &pc->flags)) { /* Error detected */ +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: %s: I/O error, ",drive->name); +#endif /* IDEFLOPPY_DEBUG_LOG */ + rq->errors++; + if (pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD) { + printk (KERN_ERR "ide-floppy: I/O error in request sense command\n"); + ide_do_reset (drive); + return; + } + idefloppy_retry_pc (drive); /* Retry operation */ + return; + } + pc->error = 0; + if (floppy->failed_pc == pc) + floppy->failed_pc=NULL; + pc->callback(drive); /* Command finished - Call the callback function */ + return; + } +#ifdef CONFIG_BLK_DEV_TRITON + if (clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { + printk (KERN_ERR "ide-floppy: The floppy wants to issue more interrupts in DMA mode\n"); + printk (KERN_ERR "ide-floppy: DMA disabled, reverting to PIO\n"); + drive->using_dma=0; + ide_do_reset (drive); + return; + } +#endif /* CONFIG_BLK_DEV_TRITON */ + bcount.b.high=IN_BYTE (IDE_BCOUNTH_REG); /* Get the number of bytes to transfer */ + bcount.b.low=IN_BYTE (IDE_BCOUNTL_REG); /* on this interrupt */ + ireason.all=IN_BYTE (IDE_IREASON_REG); + + if (ireason.b.cod) { + printk (KERN_ERR "ide-floppy: CoD != 0 in idefloppy_pc_intr\n"); + ide_do_reset (drive); + return; + } + if (ireason.b.io == test_bit (PC_WRITING, &pc->flags)) { /* Hopefully, we will never get here */ + printk (KERN_ERR "ide-floppy: We wanted to %s, ", ireason.b.io ? "Write":"Read"); + printk (KERN_ERR "but the floppy wants us to %s !\n",ireason.b.io ? "Read":"Write"); + ide_do_reset (drive); + return; + } + if (!test_bit (PC_WRITING, &pc->flags)) { /* Reading - Check that we have enough space */ + temp = pc->actually_transferred + bcount.all; + if ( temp > pc->request_transfer) { + if (temp > pc->buffer_size) { + printk (KERN_ERR "ide-floppy: The floppy wants to send us more data than expected - discarding data\n"); + idefloppy_discard_data (drive,bcount.all); + ide_set_handler (drive,&idefloppy_pc_intr,WAIT_CMD); + return; + } +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_NOTICE "ide-floppy: The floppy wants to send us more data than expected - allowing transfer\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + } + } + if (test_bit (PC_WRITING, &pc->flags)) { + if (pc->buffer != NULL) + atapi_output_bytes (drive,pc->current_position,bcount.all); /* Write the current buffer */ + else + idefloppy_output_buffers (drive, pc, bcount.all); + } else { + if (pc->buffer != NULL) + atapi_input_bytes (drive,pc->current_position,bcount.all); /* Read the current buffer */ + else + idefloppy_input_buffers (drive, pc, bcount.all); + } + pc->actually_transferred+=bcount.all; /* Update the current position */ + pc->current_position+=bcount.all; + + ide_set_handler (drive,&idefloppy_pc_intr,WAIT_CMD); /* And set the interrupt handler again */ +} + +static void idefloppy_transfer_pc (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_ireason_reg_t ireason; + + if (ide_wait_stat (drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) { + printk (KERN_ERR "ide-floppy: Strange, packet command initiated yet DRQ isn't asserted\n"); + return; + } + ireason.all=IN_BYTE (IDE_IREASON_REG); + if (!ireason.b.cod || ireason.b.io) { + printk (KERN_ERR "ide-floppy: (IO,CoD) != (0,1) while issuing a packet command\n"); + ide_do_reset (drive); + return; + } + ide_set_handler (drive, &idefloppy_pc_intr, WAIT_CMD); /* Set the interrupt routine */ + atapi_output_bytes (drive, floppy->pc->c, 12); /* Send the actual packet */ +} + +/* + * Issue a packet command + */ +static void idefloppy_issue_pc (ide_drive_t *drive, idefloppy_pc_t *pc) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_bcount_reg_t bcount; + int dma_ok = 0; + +#if IDEFLOPPY_DEBUG_BUGS + if (floppy->pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD && pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD) { + printk (KERN_ERR "ide-floppy: possible ide-floppy.c bug - Two request sense in serial were issued\n"); + } +#endif /* IDEFLOPPY_DEBUG_BUGS */ + + if (floppy->failed_pc == NULL && pc->c[0] != IDEFLOPPY_REQUEST_SENSE_CMD) + floppy->failed_pc=pc; + floppy->pc=pc; /* Set the current packet command */ + + if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES || test_bit (PC_ABORT, &pc->flags)) { + /* + * We will "abort" retrying a packet command in case + * a legitimate error code was received. + */ + if (!test_bit (PC_ABORT, &pc->flags)) { + printk (KERN_ERR "ide-floppy: %s: I/O error, pc = %2x, key = %2x, asc = %2x, ascq = %2x\n", + drive->name, pc->c[0], floppy->sense_key, floppy->asc, floppy->ascq); + pc->error = IDEFLOPPY_ERROR_GENERAL; /* Giving up */ + } + floppy->failed_pc=NULL; + pc->callback(drive); + return; + } +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "Retry number - %d\n",pc->retries); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + pc->retries++; + pc->actually_transferred=0; /* We haven't transferred any data yet */ + pc->current_position=pc->buffer; + bcount.all=pc->request_transfer; /* Request to transfer the entire buffer at once */ + +#ifdef CONFIG_BLK_DEV_TRITON + if (clear_bit (PC_DMA_ERROR, &pc->flags)) { + printk (KERN_WARNING "ide-floppy: DMA disabled, reverting to PIO\n"); + drive->using_dma=0; + } + if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) + dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); +#endif /* CONFIG_BLK_DEV_TRITON */ + + OUT_BYTE (drive->ctl,IDE_CONTROL_REG); + OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ + OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG); + OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG); + OUT_BYTE (drive->select.all,IDE_SELECT_REG); + +#ifdef CONFIG_BLK_DEV_TRITON + if (dma_ok) { /* Begin DMA, if necessary */ + set_bit (PC_DMA_IN_PROGRESS, &pc->flags); + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + } +#endif /* CONFIG_BLK_DEV_TRITON */ + + if (test_bit (IDEFLOPPY_DRQ_INTERRUPT, &floppy->flags)) { + ide_set_handler (drive, &idefloppy_transfer_pc, WAIT_CMD); + OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* Issue the packet command */ + } else { + OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); + idefloppy_transfer_pc (drive); + } +} + +static void idefloppy_rw_callback (ide_drive_t *drive) +{ +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: Reached idefloppy_rw_callback\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + return; +} + +static void idefloppy_create_prevent_cmd (idefloppy_pc_t *pc, int prevent) +{ +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "ide-floppy: creating prevent removal command, prevent = %d\n", prevent); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + idefloppy_init_pc (pc); + pc->c[0] = IDEFLOPPY_PREVENT_REMOVAL_CMD; + pc->c[4] = prevent; +} + +static void idefloppy_create_read_capacity_cmd (idefloppy_pc_t *pc) +{ + idefloppy_init_pc (pc); + pc->c[0] = IDEFLOPPY_READ_CAPACITY_CMD; + pc->c[7] = 255; + pc->c[8] = 255; +} + +/* + * A mode sense command is used to "sense" floppy parameters. + */ +static void idefloppy_create_mode_sense_cmd (idefloppy_pc_t *pc, byte page_code, byte type) +{ + unsigned short length = sizeof (idefloppy_mode_parameter_header_t); + + idefloppy_init_pc (pc); + pc->c[0] = IDEFLOPPY_MODE_SENSE_CMD; + pc->c[1] = 0; + pc->c[2] = page_code + (type << 6); + + switch (page_code) { + case IDEFLOPPY_CAPABILITIES_PAGE: + length += 12; + break; + case IDEFLOPPY_FLEXIBLE_DISK_PAGE: + length += 32; + break; + default: + printk (KERN_ERR "ide-floppy: unsupported page code in create_mode_sense_cmd\n"); + } + put_unaligned (htons (length), (unsigned short *) &pc->c[7]); + pc->request_transfer = length; +} + +static void idefloppy_create_start_stop_cmd (idefloppy_pc_t *pc, int start) +{ + idefloppy_init_pc (pc); + pc->c[0] = IDEFLOPPY_START_STOP_CMD; + pc->c[4] = start; +} + +static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t *pc, struct request *rq) +{ + int block = rq->sector / floppy->bs_factor; + int blocks = rq->nr_sectors / floppy->bs_factor; + +#if IDEFLOPPY_DEBUG_LOG + printk ("create_rw1%d_cmd: block == %d, blocks == %d\n", + 2 * test_bit (IDEFLOPPY_USE_READ12, &floppy->flags), block, blocks); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + idefloppy_init_pc (pc); + if (test_bit (IDEFLOPPY_USE_READ12, &floppy->flags)) { + pc->c[0] = rq->cmd == READ ? IDEFLOPPY_READ12_CMD : IDEFLOPPY_WRITE12_CMD; + put_unaligned (htonl (blocks), (unsigned int *) &pc->c[6]); + } else { + pc->c[0] = rq->cmd == READ ? IDEFLOPPY_READ10_CMD : IDEFLOPPY_WRITE10_CMD; + put_unaligned (htons (blocks), (unsigned short *) &pc->c[7]); + } + put_unaligned (htonl (block), (unsigned int *) &pc->c[2]); + pc->callback = &idefloppy_rw_callback; + pc->rq = rq; + pc->b_data = rq->buffer; + pc->b_count = rq->cmd == READ ? 0 : rq->bh->b_size; + if (rq->cmd == WRITE) + set_bit (PC_WRITING, &pc->flags); + pc->buffer = NULL; + pc->request_transfer = pc->buffer_size = blocks * floppy->block_size; + set_bit (PC_DMA_RECOMMENDED, &pc->flags); +} + +/* + * idefloppy_do_request is our request handling function. + */ +static void idefloppy_do_request (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_pc_t *pc; + +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "rq_status: %d, rq_dev: %u, cmd: %d, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->cmd,rq->errors); + printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %ld\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + if (rq->errors >= ERROR_MAX) { + printk (KERN_ERR "ide-floppy: %s: I/O error, pc = %2x, key = %2x, asc = %2x, ascq = %2x\n", + drive->name, floppy->failed_pc->c[0], floppy->sense_key, floppy->asc, floppy->ascq); + idefloppy_end_request (0, HWGROUP(drive)); + return; + } + switch (rq->cmd) { + case READ: + case WRITE: + if (rq->sector % floppy->bs_factor || rq->nr_sectors % floppy->bs_factor) { + printk ("%s: unsupported r/w request size\n", drive->name); + idefloppy_end_request (0, HWGROUP(drive)); + return; + } + pc = idefloppy_next_pc_storage (drive); + idefloppy_create_rw_cmd (floppy, pc, rq); + break; + case IDEFLOPPY_PC_RQ: + pc = (idefloppy_pc_t *) rq->buffer; + break; + default: + printk (KERN_ERR "ide-floppy: unsupported command %x in request queue\n", rq->cmd); + idefloppy_end_request (0,HWGROUP (drive)); + return; + } + pc->rq = rq; + idefloppy_issue_pc (drive, pc); +} + +/* + * idefloppy_queue_pc_tail adds a special packet command request to the + * tail of the request queue, and waits for it to be serviced. + */ +static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc) +{ + struct request rq; + + ide_init_drive_cmd (&rq); + rq.buffer = (char *) pc; + rq.cmd = IDEFLOPPY_PC_RQ; + return ide_do_drive_cmd (drive, &rq, ide_wait); +} + +/* + * Look at the flexible disk page parameters. We will ignore the CHS + * capacity parameters and use the LBA parameters instead. + */ +static int idefloppy_get_flexible_disk_page (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_pc_t pc; + idefloppy_mode_parameter_header_t *header; + idefloppy_flexible_disk_page_t *page; + int capacity; + + idefloppy_create_mode_sense_cmd (&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE, MODE_SENSE_CURRENT); + if (idefloppy_queue_pc_tail (drive,&pc)) { + printk (KERN_ERR "ide-floppy: Can't get flexible disk page parameters\n"); + return 1; + } + header = (idefloppy_mode_parameter_header_t *) pc.buffer; + page = (idefloppy_flexible_disk_page_t *) (header + 1); + + page->transfer_rate = ntohs (page->transfer_rate); + page->sector_size = ntohs (page->sector_size); + page->cyls = ntohs (page->cyls); + page->rpm = ntohs (page->rpm); + capacity = page->cyls * page->heads * page->sectors * page->sector_size; + if (memcmp (page, &floppy->flexible_disk_page, sizeof (idefloppy_flexible_disk_page_t))) { + printk (KERN_INFO "%s: %dkB, %d/%d/%d CHS, %d kBps, %d sector size, %d rpm\n", + drive->name, capacity / 1024, page->cyls, page->heads, page->sectors, + page->transfer_rate / 8, page->sector_size, page->rpm); + floppy->flexible_disk_page = *page; + if (capacity != floppy->blocks * floppy->block_size) + printk (KERN_NOTICE "%s: The drive reports both %d and %d bytes as its capacity\n", + drive->name, capacity, floppy->blocks * floppy->block_size); + } + return 0; +} + +/* + * Determine if a media is present in the floppy drive, and if so, + * its LBA capacity. + */ +static int idefloppy_get_capacity (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_pc_t pc; + idefloppy_capacity_header_t *header; + idefloppy_capacity_descriptor_t *descriptor; + int i, descriptors, rc = 1, blocks, length; + + idefloppy_create_read_capacity_cmd (&pc); + if (idefloppy_queue_pc_tail (drive, &pc)) { + printk (KERN_ERR "ide-floppy: Can't get floppy parameters\n"); + return 1; + } + header = (idefloppy_capacity_header_t *) pc.buffer; + descriptors = header->length / sizeof (idefloppy_capacity_descriptor_t); + descriptor = (idefloppy_capacity_descriptor_t *) (header + 1); + for (i = 0; i < descriptors; i++, descriptor++) { + blocks = descriptor->blocks = ntohl (descriptor->blocks); + length = descriptor->length = ntohs (descriptor->length); + if (!i && descriptor->dc == CAPACITY_CURRENT) { + if (memcmp (descriptor, &floppy->capacity, sizeof (idefloppy_capacity_descriptor_t))) { + printk (KERN_INFO "%s: %dkB, %d blocks, %d sector size\n", drive->name, blocks * length / 1024, blocks, length); + floppy->capacity = *descriptor; + } + if (!length || length % 512) + printk (KERN_ERR "%s: %d bytes block size not supported\n", drive->name, length); + else { + floppy->blocks = blocks; + floppy->block_size = length; + if ((floppy->bs_factor = length / 512) != 1) + printk (KERN_NOTICE "%s: warning: non 512 bytes block size not fully supported\n", drive->name); + drive->part[0].nr_sects = blocks * floppy->bs_factor; + if (length > BLOCK_SIZE) + blksize_size[HWIF(drive)->major][drive->select.b.unit << PARTN_BITS] = length; + rc = 0; + } + } +#if IDEFLOPPY_DEBUG_INFO + if (!i) printk (KERN_INFO "Descriptor 0 Code: %d\n", descriptor->dc); + printk (KERN_INFO "Descriptor %d: %dkB, %d blocks, %d sector size\n", i, blocks * length / 1024, blocks, length); +#endif /* IDEFLOPPY_DEBUG_INFO */ + } + (void) idefloppy_get_flexible_disk_page (drive); + return rc; +} + +/* + * Our special ide-floppy ioctl's. + * + * Currently there aren't any ioctl's. + */ +static int idefloppy_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return -EIO; +} + +/* + * Our open/release functions + */ +static int idefloppy_open (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_pc_t pc; + +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "Reached idefloppy_open\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + MOD_INC_USE_COUNT; + if (drive->usage == 1) { + idefloppy_create_start_stop_cmd (&pc, 1); + (void) idefloppy_queue_pc_tail (drive, &pc); + if (idefloppy_get_capacity (drive)) { + drive->usage--; + MOD_DEC_USE_COUNT; + return -EIO; + } + set_bit (IDEFLOPPY_MEDIA_CHANGED, &floppy->flags); + idefloppy_create_prevent_cmd (&pc, 1); + (void) idefloppy_queue_pc_tail (drive, &pc); + check_disk_change(inode->i_rdev); + } + return 0; +} + +static void idefloppy_release (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + idefloppy_pc_t pc; + +#if IDEFLOPPY_DEBUG_LOG + printk (KERN_INFO "Reached idefloppy_release\n"); +#endif /* IDEFLOPPY_DEBUG_LOG */ + + if (!drive->usage) { + invalidate_buffers (inode->i_rdev); + idefloppy_create_prevent_cmd (&pc, 0); + (void) idefloppy_queue_pc_tail (drive, &pc); + } + MOD_DEC_USE_COUNT; +} + +/* + * Check media change. Use a simple algorithm for now. + */ +static int idefloppy_media_change (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + return clear_bit (IDEFLOPPY_MEDIA_CHANGED, &floppy->flags); +} + +/* + * Return the current floppy capacity to ide.c. + */ +static unsigned long idefloppy_capacity (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + unsigned long capacity = floppy->blocks * floppy->bs_factor; + + return capacity ? capacity : 0x7fffffff; +} + +/* + * idefloppy_identify_device checks if we can support a drive, + * based on the ATAPI IDENTIFY command results. + */ +static int idefloppy_identify_device (ide_drive_t *drive,struct hd_driveid *id) +{ + struct idefloppy_id_gcw gcw; +#if IDEFLOPPY_DEBUG_INFO + unsigned short mask,i; + char buffer[80]; +#endif /* IDEFLOPPY_DEBUG_INFO */ + + *((unsigned short *) &gcw) = id->config; + +#if IDEFLOPPY_DEBUG_INFO + printk (KERN_INFO "Dumping ATAPI Identify Device floppy parameters\n"); + switch (gcw.protocol) { + case 0: case 1: sprintf (buffer, "ATA");break; + case 2: sprintf (buffer, "ATAPI");break; + case 3: sprintf (buffer, "Reserved (Unknown to ide-floppy)");break; + } + printk (KERN_INFO "Protocol Type: %s\n", buffer); + switch (gcw.device_type) { + case 0: sprintf (buffer, "Direct-access Device");break; + case 1: sprintf (buffer, "Streaming Tape Device");break; + case 2: case 3: case 4: sprintf (buffer, "Reserved");break; + case 5: sprintf (buffer, "CD-ROM Device");break; + case 6: sprintf (buffer, "Reserved"); + case 7: sprintf (buffer, "Optical memory Device");break; + case 0x1f: sprintf (buffer, "Unknown or no Device type");break; + default: sprintf (buffer, "Reserved"); + } + printk (KERN_INFO "Device Type: %x - %s\n", gcw.device_type, buffer); + printk (KERN_INFO "Removable: %s\n",gcw.removable ? "Yes":"No"); + switch (gcw.drq_type) { + case 0: sprintf (buffer, "Microprocessor DRQ");break; + case 1: sprintf (buffer, "Interrupt DRQ");break; + case 2: sprintf (buffer, "Accelerated DRQ");break; + case 3: sprintf (buffer, "Reserved");break; + } + printk (KERN_INFO "Command Packet DRQ Type: %s\n", buffer); + switch (gcw.packet_size) { + case 0: sprintf (buffer, "12 bytes");break; + case 1: sprintf (buffer, "16 bytes");break; + default: sprintf (buffer, "Reserved");break; + } + printk (KERN_INFO "Command Packet Size: %s\n", buffer); + printk (KERN_INFO "Model: %s\n",id->model); + printk (KERN_INFO "Firmware Revision: %s\n",id->fw_rev); + printk (KERN_INFO "Serial Number: %s\n",id->serial_no); + printk (KERN_INFO "Write buffer size(?): %d bytes\n",id->buf_size*512); + printk (KERN_INFO "DMA: %s",id->capability & 0x01 ? "Yes\n":"No\n"); + printk (KERN_INFO "LBA: %s",id->capability & 0x02 ? "Yes\n":"No\n"); + printk (KERN_INFO "IORDY can be disabled: %s",id->capability & 0x04 ? "Yes\n":"No\n"); + printk (KERN_INFO "IORDY supported: %s",id->capability & 0x08 ? "Yes\n":"Unknown\n"); + printk (KERN_INFO "ATAPI overlap supported: %s",id->capability & 0x20 ? "Yes\n":"No\n"); + printk (KERN_INFO "PIO Cycle Timing Category: %d\n",id->tPIO); + printk (KERN_INFO "DMA Cycle Timing Category: %d\n",id->tDMA); + printk (KERN_INFO "Single Word DMA supported modes:\n"); + for (i=0,mask=1;i<8;i++,mask=mask << 1) { + if (id->dma_1word & mask) + printk (KERN_INFO " Mode %d%s\n", i, (id->dma_1word & (mask << 8)) ? " (active)" : ""); + } + printk (KERN_INFO "Multi Word DMA supported modes:\n"); + for (i=0,mask=1;i<8;i++,mask=mask << 1) { + if (id->dma_mword & mask) + printk (KERN_INFO " Mode %d%s\n", i, (id->dma_mword & (mask << 8)) ? " (active)" : ""); + } + if (id->field_valid & 0x0002) { + printk (KERN_INFO "Enhanced PIO Modes: %s\n",id->eide_pio_modes & 1 ? "Mode 3":"None"); + if (id->eide_dma_min == 0) + sprintf (buffer, "Not supported"); + else + sprintf (buffer, "%d ns",id->eide_dma_min); + printk (KERN_INFO "Minimum Multi-word DMA cycle per word: %s\n", buffer); + if (id->eide_dma_time == 0) + sprintf (buffer, "Not supported"); + else + sprintf (buffer, "%d ns",id->eide_dma_time); + printk (KERN_INFO "Manufacturer\'s Recommended Multi-word cycle: %s\n", buffer); + if (id->eide_pio == 0) + sprintf (buffer, "Not supported"); + else + sprintf (buffer, "%d ns",id->eide_pio); + printk (KERN_INFO "Minimum PIO cycle without IORDY: %s\n", buffer); + if (id->eide_pio_iordy == 0) + sprintf (buffer, "Not supported"); + else + sprintf (buffer, "%d ns",id->eide_pio_iordy); + printk (KERN_INFO "Minimum PIO cycle with IORDY: %s\n", buffer); + } else + printk (KERN_INFO "According to the device, fields 64-70 are not valid.\n"); +#endif /* IDEFLOPPY_DEBUG_INFO */ + + if (gcw.protocol != 2) + printk (KERN_ERR "ide-floppy: Protocol is not ATAPI\n"); + else if (gcw.device_type != 0) + printk (KERN_ERR "ide-floppy: Device type is not set to floppy\n"); + else if (!gcw.removable) + printk (KERN_ERR "ide-floppy: The removable flag is not set\n"); + else if (gcw.drq_type == 3) { + printk (KERN_ERR "ide-floppy: Sorry, DRQ type %d not supported\n", gcw.drq_type); + } else if (gcw.packet_size != 0) { + printk (KERN_ERR "ide-floppy: Packet size is not 12 bytes long\n"); + } else + return 1; + return 0; +} + +/* + * idefloppy_get_capabilities asks the floppy about its various + * parameters. + */ +static void idefloppy_get_capabilities (ide_drive_t *drive) +{ + idefloppy_pc_t pc; + idefloppy_mode_parameter_header_t *header; + idefloppy_capabilities_page_t *capabilities; + + idefloppy_create_mode_sense_cmd (&pc, IDEFLOPPY_CAPABILITIES_PAGE, MODE_SENSE_CURRENT); + if (idefloppy_queue_pc_tail (drive,&pc)) { + printk (KERN_ERR "ide-floppy: Can't get drive capabilities\n"); + return; + } + header = (idefloppy_mode_parameter_header_t *) pc.buffer; + capabilities = (idefloppy_capabilities_page_t *) (header + 1); + + if (!capabilities->sflp) + printk (KERN_INFO "%s: Warning - system floppy device bit is not set\n", drive->name); + +#if IDEFLOPPY_DEBUG_INFO + printk (KERN_INFO "Dumping the results of the MODE SENSE packet command\n"); + printk (KERN_INFO "Mode Parameter Header:\n"); + printk (KERN_INFO "Mode Data Length - %d\n",header->mode_data_length); + printk (KERN_INFO "Medium Type - %d\n",header->medium_type); + printk (KERN_INFO "WP - %d\n",header->wp); + + printk (KERN_INFO "Capabilities Page:\n"); + printk (KERN_INFO "Page code - %d\n",capabilities->page_code); + printk (KERN_INFO "Page length - %d\n",capabilities->page_length); + printk (KERN_INFO "PS - %d\n",capabilities->ps); + printk (KERN_INFO "System Floppy Type device - %s\n",capabilities->sflp ? "Yes":"No"); + printk (KERN_INFO "Supports Reporting progress of Format - %s\n",capabilities->srfp ? "Yes":"No"); + printk (KERN_INFO "Non CD Optical device - %s\n",capabilities->ncd ? "Yes":"No"); + printk (KERN_INFO "Multiple LUN support - %s\n",capabilities->sml ? "Yes":"No"); + printk (KERN_INFO "Total LUN supported - %s\n",capabilities->tlun ? "Yes":"No"); +#endif /* IDEFLOPPY_DEBUG_INFO */ +} + +/* + * Driver initialization. + */ +static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) +{ + struct idefloppy_id_gcw gcw; + + *((unsigned short *) &gcw) = drive->id->config; + drive->driver_data = floppy; + drive->ready_stat = 0; + memset (floppy, 0, sizeof (idefloppy_floppy_t)); + floppy->drive = drive; + floppy->pc = floppy->pc_stack; + if (gcw.drq_type == 1) + set_bit (IDEFLOPPY_DRQ_INTERRUPT, &floppy->flags); + + idefloppy_get_capabilities (drive); + (void) idefloppy_get_capacity (drive); +} + +static int idefloppy_cleanup (ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + + if (ide_unregister_subdriver (drive)) + return 1; + drive->driver_data = NULL; + kfree (floppy); + return 0; +} + +int idefloppy_init (void); +static ide_module_t idefloppy_module = { + IDE_DRIVER_MODULE, + idefloppy_init, + NULL +}; + +/* + * IDE subdriver functions, registered with ide.c + */ +static ide_driver_t idefloppy_driver = { + ide_floppy, /* media */ + 0, /* busy */ + 1, /* supports_dma */ + idefloppy_cleanup, /* cleanup */ + idefloppy_do_request, /* do_request */ + idefloppy_end_request, /* end_request */ + idefloppy_ioctl, /* ioctl */ + idefloppy_open, /* open */ + idefloppy_release, /* release */ + idefloppy_media_change, /* media_change */ + NULL, /* pre_reset */ + idefloppy_capacity, /* capacity */ + NULL /* special */ +}; + +/* + * idefloppy_init will register the driver for each floppy. + */ +int idefloppy_init (void) +{ + ide_drive_t *drive; + idefloppy_floppy_t *floppy; + int failed = 0; + + MOD_INC_USE_COUNT; + while ((drive = ide_scan_devices (ide_floppy, NULL, failed++)) != NULL) { + if (!idefloppy_identify_device (drive, drive->id)) { + printk (KERN_ERR "ide-floppy: %s: not supported by this version of ide-floppy\n", drive->name); + continue; + } + if ((floppy = (idefloppy_floppy_t *) kmalloc (sizeof (idefloppy_floppy_t), GFP_KERNEL)) == NULL) { + printk (KERN_ERR "ide-floppy: %s: Can't allocate a floppy structure\n", drive->name); + continue; + } + if (ide_register_subdriver (drive, &idefloppy_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-floppy: %s: Failed to register the driver with ide.c\n", drive->name); + kfree (floppy); + continue; + } + idefloppy_setup (drive, floppy); + failed--; + } + ide_register_module(&idefloppy_module); + MOD_DEC_USE_COUNT; + return 0; +} + +#ifdef MODULE +int init_module (void) +{ + return idefloppy_init (); +} + +void cleanup_module (void) +{ + ide_drive_t *drive; + int failed = 0; + + while ((drive = ide_scan_devices (ide_floppy, &idefloppy_driver, failed)) != NULL) + if (idefloppy_cleanup (drive)) { + printk ("%s: cleanup_module() called while still busy\n", drive->name); + failed++; + } + ide_unregister_module(&idefloppy_module); +} +#endif /* MODULE */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide-probe.c linux/drivers/block/ide-probe.c --- v2.1.7/linux/drivers/block/ide-probe.c Thu Jan 1 02:00:00 1970 +++ linux/drivers/block/ide-probe.c Wed Nov 6 14:49:31 1996 @@ -0,0 +1,724 @@ +/* + * linux/drivers/block/ide-probe.c Version 1.0 Oct 31, 1996 + * + * Copyright (C) 1994-1996 Linus Torvalds & authors (see below) + */ + +/* + * Maintained by Mark Lord + * and Gadi Oxman + * + * This is the IDE probe module, as evolved from hd.c and ide.c. + * + * From hd.c: + * | + * | It traverses the request-list, using interrupts to jump between functions. + * | As nearly all functions can be called within interrupts, we may not sleep. + * | Special care is recommended. Have Fun! + * | + * | modified by Drew Eckhardt to check nr of hd's from the CMOS. + * | + * | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug + * | in the early extended-partition checks and added DM partitions. + * | + * | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI). + * | + * | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", + * | and general streamlining by Mark Lord (mlord@pobox.com). + * + * October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by: + * + * Mark Lord (mlord@pobox.com) (IDE Perf.Pkg) + * Delman Lee (delman@mipg.upenn.edu) ("Mr. atdisk2") + * Scott Snyder (snyder@fnald0.fnal.gov) (ATAPI IDE cd-rom) + * + * This was a rewrite of just about everything from hd.c, though some original + * code is still sprinkled about. Think of it as a major evolution, with + * inspiration from lots of linux users, esp. hamish@zot.apana.org.au + * + * Version 1.0 move drive probing code from ide.c to ide-probe.c + */ + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ide.h" + +static inline void do_identify (ide_drive_t *drive, byte cmd) +{ + int bswap = 1; + struct hd_driveid *id; + + id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_KERNEL); + ide_input_data(drive, id, SECTOR_WORDS); /* read 512 bytes of id info */ + sti(); + ide_fix_driveid(id); + +#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) + /* + * EATA SCSI controllers do a hardware ATA emulation: + * Ignore them if there is a driver for them available. + */ + if ((id->model[0] == 'P' && id->model[1] == 'M') + || (id->model[0] == 'S' && id->model[1] == 'K')) { + printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model); + drive->present = 0; + return; + } +#endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */ + + /* + * WIN_IDENTIFY returns little-endian info, + * WIN_PIDENTIFY *usually* returns little-endian info. + */ + if (cmd == WIN_PIDENTIFY) { + if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */ + || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */ + || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */ + bswap ^= 1; /* Vertos drives may still be weird */ + } + ide_fixstring (id->model, sizeof(id->model), bswap); + ide_fixstring (id->fw_rev, sizeof(id->fw_rev), bswap); + ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap); + + drive->present = 1; + printk("%s: %s, ", drive->name, id->model); + + /* + * Check for an ATAPI device + */ + if (cmd == WIN_PIDENTIFY) { + byte type = (id->config >> 8) & 0x1f; + printk("ATAPI "); +#ifdef CONFIG_BLK_DEV_PROMISE + if (HWIF(drive)->is_promise2) { + printk(" -- not supported on 2nd Promise port\n"); + drive->present = 0; + return; + } +#endif /* CONFIG_BLK_DEV_PROMISE */ + switch (type) { + case ide_floppy: + if (strstr (id->model, "oppy") || strstr (id->model, "poyp")) { + printk ("FLOPPY"); + break; + } + printk ("cdrom or floppy?, assuming "); + type = ide_cdrom; /* Early cdrom models used zero */ + case ide_cdrom: + printk ("CDROM"); + drive->removable = 1; + break; + case ide_tape: + printk ("TAPE"); + break; + default: + printk("UNKNOWN (type %d)", type); + break; + } + printk (" drive\n"); + drive->media = type; + return; + } + + drive->media = ide_disk; + printk("ATA DISK drive\n"); + return; +} + +/* + * Delay for *at least* 50ms. As we don't know how much time is left + * until the next tick occurs, we wait an extra tick to be safe. + * This is used only during the probing/polling for drives at boot time. + */ +static void delay_50ms (void) +{ + unsigned long timer = jiffies + ((HZ + 19)/20) + 1; + while (timer > jiffies); +} + +/* + * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive + * and waits for a response. It also monitors irqs while this is + * happening, in hope of automatically determining which one is + * being used by the interface. + * + * Returns: 0 device was identified + * 1 device timed-out (no response to identify request) + * 2 device aborted the command (refused to identify itself) + */ +static int try_to_identify (ide_drive_t *drive, byte cmd) +{ + int rc; + ide_ioreg_t hd_status; + unsigned long timeout; + int irqs = 0; + + if (!HWIF(drive)->irq) { /* already got an IRQ? */ + probe_irq_off(probe_irq_on()); /* clear dangling irqs */ + irqs = probe_irq_on(); /* start monitoring irqs */ + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* enable device irq */ + } + + delay_50ms(); /* take a deep breath */ + if ((IN_BYTE(IDE_ALTSTATUS_REG) ^ IN_BYTE(IDE_STATUS_REG)) & ~INDEX_STAT) { + printk("%s: probing with STATUS instead of ALTSTATUS\n", drive->name); + hd_status = IDE_STATUS_REG; /* ancient Seagate drives */ + } else + hd_status = IDE_ALTSTATUS_REG; /* use non-intrusive polling */ + +#if CONFIG_BLK_DEV_PROMISE + if (IS_PROMISE_DRIVE) { + if (promise_cmd(drive,PROMISE_IDENTIFY)) { + if (irqs) + (void) probe_irq_off(irqs); + return 1; + } + } else +#endif /* CONFIG_BLK_DEV_PROMISE */ + OUT_BYTE(cmd,IDE_COMMAND_REG); /* ask drive for ID */ + timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; + timeout += jiffies; + do { + if (jiffies > timeout) { + if (irqs) + (void) probe_irq_off(irqs); + return 1; /* drive timed-out */ + } + delay_50ms(); /* give drive a breather */ + } while (IN_BYTE(hd_status) & BUSY_STAT); + + delay_50ms(); /* wait for IRQ and DRQ_STAT */ + if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) { + unsigned long flags; + save_flags(flags); + cli(); /* some systems need this */ + do_identify(drive, cmd); /* drive returned ID */ + rc = 0; /* drive responded with ID */ + (void) GET_STAT(); /* clear drive IRQ */ + restore_flags(flags); + } else + rc = 2; /* drive refused ID */ + if (!HWIF(drive)->irq) { + irqs = probe_irq_off(irqs); /* get our irq number */ + if (irqs > 0) { + HWIF(drive)->irq = irqs; /* save it for later */ + irqs = probe_irq_on(); + OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* mask device irq */ + udelay(5); + (void) probe_irq_off(irqs); + (void) probe_irq_off(probe_irq_on()); /* clear self-inflicted irq */ + (void) GET_STAT(); /* clear drive IRQ */ + + } else { /* Mmmm.. multiple IRQs.. don't know which was ours */ + printk("%s: IRQ probe failed (%d)\n", drive->name, irqs); +#ifdef CONFIG_BLK_DEV_CMD640 +#ifdef CMD640_DUMP_REGS + if (HWIF(drive)->chipset == ide_cmd640) { + printk("%s: Hmmm.. probably a driver problem.\n", drive->name); + CMD640_DUMP_REGS; + } +#endif /* CMD640_DUMP_REGS */ +#endif /* CONFIG_BLK_DEV_CMD640 */ + } + } + return rc; +} + +/* + * do_probe() has the difficult job of finding a drive if it exists, + * without getting hung up if it doesn't exist, without trampling on + * ethernet cards, and without leaving any IRQs dangling to haunt us later. + * + * If a drive is "known" to exist (from CMOS or kernel parameters), + * but does not respond right away, the probe will "hang in there" + * for the maximum wait time (about 30 seconds), otherwise it will + * exit much more quickly. + * + * Returns: 0 device was identified + * 1 device timed-out (no response to identify request) + * 2 device aborted the command (refused to identify itself) + * 3 bad status from device (possible for ATAPI drives) + * 4 probe was not attempted because failure was obvious + */ +static int do_probe (ide_drive_t *drive, byte cmd) +{ + int rc; + ide_hwif_t *hwif = HWIF(drive); + if (drive->present) { /* avoid waiting for inappropriate probes */ + if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY)) + return 4; + } +#ifdef DEBUG + printk("probing for %s: present=%d, media=%d, probetype=%s\n", + drive->name, drive->present, drive->media, + (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI"); +#endif + SELECT_DRIVE(hwif,drive); + delay_50ms(); + if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) { + OUT_BYTE(0xa0,IDE_SELECT_REG); /* exit with drive0 selected */ + delay_50ms(); /* allow BUSY_STAT to assert & clear */ + return 3; /* no i/f present: avoid killing ethernet cards */ + } + + if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT) + || drive->present || cmd == WIN_PIDENTIFY) + { + if ((rc = try_to_identify(drive,cmd))) /* send cmd and wait */ + rc = try_to_identify(drive,cmd); /* failed: try again */ + if (rc == 1) + printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT()); + (void) GET_STAT(); /* ensure drive irq is clear */ + } else { + rc = 3; /* not present or maybe ATAPI */ + } + if (drive->select.b.unit != 0) { + OUT_BYTE(0xa0,IDE_SELECT_REG); /* exit with drive0 selected */ + delay_50ms(); + (void) GET_STAT(); /* ensure drive irq is clear */ + } + return rc; +} + +/* + * probe_for_drive() tests for existence of a given drive using do_probe(). + * + * Returns: 0 no device was found + * 1 device was found (note: drive->present might still be 0) + */ +static inline byte probe_for_drive (ide_drive_t *drive) +{ + if (drive->noprobe) /* skip probing? */ + return drive->present; + if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */ + (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */ + } + if (!drive->present) + return 0; /* drive not found */ + if (drive->id == NULL) { /* identification failed? */ + if (drive->media == ide_disk) { + printk ("%s: non-IDE drive, CHS=%d/%d/%d\n", + drive->name, drive->cyl, drive->head, drive->sect); + } else if (drive->media == ide_cdrom) { + printk("%s: ATAPI cdrom (?)\n", drive->name); + } else { + drive->present = 0; /* nuke it */ + } + } + return 1; /* drive was found */ +} + +/* + * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc + * controller that is BIOS compatible with ST-506, and thus showing up in our + * BIOS table, but not register compatible, and therefore not present in CMOS. + * + * Furthermore, we will assume that our ST-506 drives are the primary + * drives in the system -- the ones reflected as drive 1 or 2. The first + * drive is stored in the high nibble of CMOS byte 0x12, the second in the low + * nibble. This will be either a 4 bit drive type or 0xf indicating use byte + * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS. A non-zero value + * means we have an AT controller hard disk for that drive. + * + * Of course, there is no guarantee that either drive is actually on the + * "primary" IDE interface, but we don't bother trying to sort that out here. + * If a drive is not actually on the primary interface, then these parameters + * will be ignored. This results in the user having to supply the logical + * drive geometry as a boot parameter for each drive not on the primary i/f. + * + * The only "perfect" way to handle this would be to modify the setup.[cS] code + * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info + * for us during initialization. I have the necessary docs -- any takers? -ml + */ +static void probe_cmos_for_drives (ide_hwif_t *hwif) +{ +#ifdef __i386__ + extern struct drive_info_struct drive_info; + byte cmos_disks, *BIOS = (byte *) &drive_info; + int unit; + +#ifdef CONFIG_BLK_DEV_PROMISE + if (hwif->is_promise2) + return; +#endif /* CONFIG_BLK_DEV_PROMISE */ + outb_p(0x12,0x70); /* specify CMOS address 0x12 */ + cmos_disks = inb_p(0x71); /* read the data from 0x12 */ + /* Extract drive geometry from CMOS+BIOS if not already setup */ + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + if ((cmos_disks & (0xf0 >> (unit*4))) && !drive->present && !drive->nobios) { + drive->cyl = drive->bios_cyl = *(unsigned short *)BIOS; + drive->head = drive->bios_head = *(BIOS+2); + drive->sect = drive->bios_sect = *(BIOS+14); + drive->ctl = *(BIOS+8); + drive->present = 1; + } + BIOS += 16; + } +#endif +} + +/* + * This routine only knows how to look for drive units 0 and 1 + * on an interface, so any setting of MAX_DRIVES > 2 won't work here. + */ +static void probe_hwif (ide_hwif_t *hwif) +{ + unsigned int unit; + unsigned long flags; + + if (hwif->noprobe) + return; + if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) + probe_cmos_for_drives (hwif); +#if CONFIG_BLK_DEV_PROMISE + if (!hwif->is_promise2 && + (ide_check_region(hwif->io_ports[IDE_DATA_OFFSET],8) || ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET],1))) { +#else + if (ide_check_region(hwif->io_ports[IDE_DATA_OFFSET],8) || ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET],1)) { +#endif /* CONFIG_BLK_DEV_PROMISE */ + int msgout = 0; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + if (drive->present) { + drive->present = 0; + printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name); + msgout = 1; + } + } + if (!msgout) + printk("%s: ports already in use, skipping probe\n", hwif->name); + return; + } + + save_flags(flags); + sti(); /* needed for jiffies and irq probing */ + /* + * Second drive should only exist if first drive was found, + * but a lot of cdrom drives are configured as single slaves. + */ + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + (void) probe_for_drive (drive); + if (drive->present && !hwif->present) { + hwif->present = 1; + ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 8, hwif->name); + ide_request_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1, hwif->name); + } + } + if (hwif->reset) { + unsigned long timeout = jiffies + WAIT_WORSTCASE; + byte stat; + + printk("%s: reset\n", hwif->name); + OUT_BYTE(12, hwif->io_ports[IDE_CONTROL_OFFSET]); + udelay(10); + OUT_BYTE(8, hwif->io_ports[IDE_CONTROL_OFFSET]); + do { + delay_50ms(); + stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); + } while ((stat & BUSY_STAT) && jiffies < timeout); + } + restore_flags(flags); + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + if (drive->present) { + ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc; + if (tuneproc != NULL && drive->autotune == 1) + tuneproc(drive, 255); /* auto-tune PIO mode */ + } + } +} + +#if MAX_HWIFS > 1 +/* + * save_match() is used to simplify logic in init_irq() below. + * + * A loophole here is that we may not know about a particular + * hwif's irq until after that hwif is actually probed/initialized.. + * This could be a problem for the case where an hwif is on a + * dual interface that requires serialization (eg. cmd640) and another + * hwif using one of the same irqs is initialized beforehand. + * + * This routine detects and reports such situations, but does not fix them. + */ +static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match) +{ + ide_hwif_t *m = *match; + + if (m && m->hwgroup && m->hwgroup != new->hwgroup) { + if (!new->hwgroup) + return; + printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name); + } + if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */ + *match = new; +} +#endif /* MAX_HWIFS > 1 */ + +/* + * This routine sets up the irq for an ide interface, and creates a new + * hwgroup for the irq/hwif if none was previously assigned. + * + * Much of the code is for correctly detecting/handling irq sharing + * and irq serialization situations. This is somewhat complex because + * it handles static as well as dynamic (PCMCIA) IDE interfaces. + * + * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with + * interrupts completely disabled. This can be bad for interrupt latency, + * but anything else has led to problems on some machines. We re-enable + * interrupts as much as we can safely do in most places. + */ +static int init_irq (ide_hwif_t *hwif) +{ + unsigned long flags; +#if MAX_HWIFS > 1 + unsigned int index; +#endif /* MAX_HWIFS > 1 */ + ide_hwgroup_t *hwgroup; + ide_hwif_t *match = NULL; + + save_flags(flags); + cli(); + + hwif->hwgroup = NULL; +#if MAX_HWIFS > 1 + /* + * Group up with any other hwifs that share our irq(s). + */ + for (index = 0; index < MAX_HWIFS; index++) { + ide_hwif_t *h = &ide_hwifs[index]; + if (h->hwgroup) { /* scan only initialized hwif's */ + if (hwif->irq == h->irq) { + hwif->sharing_irq = h->sharing_irq = 1; + save_match(hwif, h, &match); + } + if (hwif->serialized) { + ide_hwif_t *mate = &ide_hwifs[hwif->index^1]; + if (index == mate->index || h->irq == mate->irq) + save_match(hwif, h, &match); + } + if (h->serialized) { + ide_hwif_t *mate = &ide_hwifs[h->index^1]; + if (hwif->irq == mate->irq) + save_match(hwif, h, &match); + } + } + } +#endif /* MAX_HWIFS > 1 */ + /* + * If we are still without a hwgroup, then form a new one + */ + if (match) { + hwgroup = match->hwgroup; + } else { + hwgroup = kmalloc(sizeof(ide_hwgroup_t), GFP_KERNEL); + hwgroup->hwif = hwgroup->next_hwif = hwif->next = hwif; + hwgroup->rq = NULL; + hwgroup->handler = NULL; + if (hwif->drives[0].present) + hwgroup->drive = &hwif->drives[0]; + else + hwgroup->drive = &hwif->drives[1]; + hwgroup->poll_timeout = 0; + init_timer(&hwgroup->timer); + hwgroup->timer.function = &ide_timer_expiry; + hwgroup->timer.data = (unsigned long) hwgroup; + } + + /* + * Allocate the irq, if not already obtained for another hwif + */ + if (!match || match->irq != hwif->irq) { + if (ide_request_irq(hwif->irq, &ide_intr, SA_INTERRUPT, hwif->name, hwgroup)) { + if (!match) + kfree(hwgroup); + restore_flags(flags); + return 1; + } + } + + /* + * Everything is okay, so link us into the hwgroup + */ + hwif->hwgroup = hwgroup; + hwif->next = hwgroup->hwif->next; + hwgroup->hwif->next = hwif; + + restore_flags(flags); /* safe now that hwif->hwgroup is set up */ + +#ifndef __mc68000__ + printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name, + hwif->io_ports[IDE_DATA_OFFSET], hwif->io_ports[IDE_DATA_OFFSET]+7, hwif->io_ports[IDE_CONTROL_OFFSET], hwif->irq); +#else + printk("%s at %p on irq 0x%08x", hwif->name, hwif->io_ports[IDE_DATA_OFFSET], hwif->irq); +#endif /* __mc68000__ */ + if (match) + printk(" (%sed with %s)", hwif->sharing_irq ? "shar" : "serializ", match->name); + printk("\n"); + return 0; +} + +/* + * init_gendisk() (as opposed to ide_geninit) is called for each major device, + * after probing for drives, to allocate partition tables and other data + * structures needed for the routines in genhd.c. ide_geninit() gets called + * somewhat later, during the partition check. + */ +static void init_gendisk (ide_hwif_t *hwif) +{ + struct gendisk *gd, **gdp; + unsigned int unit, units, minors; + int *bs; + + /* figure out maximum drive number on the interface */ + for (units = MAX_DRIVES; units > 0; --units) { + if (hwif->drives[units-1].present) + break; + } + minors = units * (1<sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); + gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); + bs = kmalloc (minors*sizeof(int), GFP_KERNEL); + + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + + /* cdroms and msdos f/s are examples of non-1024 blocksizes */ + blksize_size[hwif->major] = bs; + for (unit = 0; unit < minors; ++unit) + *bs++ = BLOCK_SIZE; + + for (unit = 0; unit < units; ++unit) + hwif->drives[unit].part = &gd->part[unit << PARTN_BITS]; + + gd->major = hwif->major; /* our major device number */ + gd->major_name = IDE_MAJOR_NAME; /* treated special in genhd.c */ + gd->minor_shift = PARTN_BITS; /* num bits for partitions */ + gd->max_p = 1<max_nr = units; /* max num real drives */ + gd->nr_real = units; /* current num real drives */ + gd->init = &ide_geninit; /* initialization function */ + gd->real_devices= hwif; /* ptr to internal data */ + gd->next = NULL; /* linked list of major devs */ + + for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ; + hwif->gd = *gdp = gd; /* link onto tail of list */ +} + +static int hwif_init (int h) +{ + ide_hwif_t *hwif = &ide_hwifs[h]; + void (*rfn)(void); + + if (!hwif->present) + return 0; + if (!hwif->irq) { + if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET]))) { + printk("%s: DISABLED, NO IRQ\n", hwif->name); + return (hwif->present = 0); + } + } +#ifdef CONFIG_BLK_DEV_HD + if (hwif->irq == HD_IRQ && hwif->io_ports[IDE_DATA_OFFSET] != HD_DATA) { + printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name); + return (hwif->present = 0); + } +#endif /* CONFIG_BLK_DEV_HD */ + + hwif->present = 0; /* we set it back to 1 if all is ok below */ + switch (hwif->major) { + case IDE0_MAJOR: rfn = &do_ide0_request; break; +#if MAX_HWIFS > 1 + case IDE1_MAJOR: rfn = &do_ide1_request; break; +#endif +#if MAX_HWIFS > 2 + case IDE2_MAJOR: rfn = &do_ide2_request; break; +#endif +#if MAX_HWIFS > 3 + case IDE3_MAJOR: rfn = &do_ide3_request; break; +#endif + default: + printk("%s: request_fn NOT DEFINED\n", hwif->name); + return (hwif->present = 0); + } + if (register_blkdev (hwif->major, hwif->name, ide_fops)) { + printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major); + } else if (init_irq (hwif)) { + printk("%s: UNABLE TO GET IRQ %d\n", hwif->name, hwif->irq); + (void) unregister_blkdev (hwif->major, hwif->name); + } else { + init_gendisk(hwif); + blk_dev[hwif->major].request_fn = rfn; + read_ahead[hwif->major] = 8; /* (4kB) */ + hwif->present = 1; /* success */ + } + return hwif->present; +} + +int ideprobe_init (void); +static ide_module_t ideprobe_module = { + IDE_PROBE_MODULE, + ideprobe_init, + NULL +}; + +int ideprobe_init (void) +{ + unsigned int index; + int probe[MAX_HWIFS]; + + MOD_INC_USE_COUNT; + memset(probe, 0, MAX_HWIFS * sizeof(int)); + for (index = 0; index < MAX_HWIFS; ++index) + probe[index] = !ide_hwifs[index].present; + + /* + * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports + */ + for (index = 0; index < MAX_HWIFS; ++index) + if (probe[index]) probe_hwif (&ide_hwifs[index]); + for (index = 0; index < MAX_HWIFS; ++index) + if (probe[index]) hwif_init (index); + ide_register_module(&ideprobe_module); + MOD_DEC_USE_COUNT; + return 0; +} + +#ifdef MODULE +int init_module (void) +{ + unsigned int index; + + for (index = 0; index < MAX_HWIFS; ++index) + ide_unregister(index); + return ideprobe_init(); +} + +void cleanup_module (void) +{ + ide_unregister_module(&ideprobe_module); +} +#endif /* MODULE */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide-tape.c linux/drivers/block/ide-tape.c --- v2.1.7/linux/drivers/block/ide-tape.c Tue Oct 29 19:58:03 1996 +++ linux/drivers/block/ide-tape.c Wed Nov 6 14:49:31 1996 @@ -1,5 +1,5 @@ /* - * linux/drivers/block/ide-tape.c Version 1.8 - ALPHA Sep 26, 1996 + * linux/drivers/block/ide-tape.c Version 1.10 - BETA Nov 5, 1996 * * Copyright (C) 1995, 1996 Gadi Oxman * @@ -27,13 +27,16 @@ * The block device major and minor numbers are determined from the * tape's relative position in the ide interfaces, as explained in ide.c. * - * The character device interface consists of two devices: + * The character device interface consists of the following devices: * - * ht0 major=37,minor=0 first IDE tape, rewind on close. - * nht0 major=37,minor=128 first IDE tape, no rewind on close. + * ht0 major 37, minor 0 first IDE tape, rewind on close. + * ht1 major 37, minor 1 second IDE tape, rewind on close. + * ... + * nht0 major 37, minor 128 first IDE tape, no rewind on close. + * nht1 major 37, minor 129 second IDE tape, no rewind on close. + * ... * - * Run /usr/src/linux/scripts/MAKEDEV.ide to create the above entries. - * We currently support only one ide tape drive. + * Run linux/scripts/MAKEDEV.ide to create the above entries. * * The general magnetic tape commands compatible interface, as defined by * include/linux/mtio.h, is accessible through the character device. @@ -49,13 +52,7 @@ * following scenario: * * 1. ide-tape is operating in the pipelined operation mode. - * 2. All character device read/write requests consist of an - * integral number of the tape's recommended data transfer unit - * (which is shown on initialization and can be received with - * an ioctl). - * As of version 1.3 of the driver, this is no longer as critical - * as it used to be. - * 3. No buffering is performed by the user backup program. + * 2. No buffering is performed by the user backup program. * * Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive. * @@ -189,15 +186,22 @@ * Ver 1.7 Sep 10 96 Minor changes for the CONNER CTT8000-A model. * Ver 1.8 Sep 26 96 Attempt to find a better balance between good * interactive response and high system throughput. - * - * We are currently in an *alpha* stage. The driver is not complete and not - * much tested. I would strongly suggest to: - * - * 1. Connect the tape to a separate interface and irq. - * 2. Be truly prepared for a kernel crash and the resulting data loss. - * 3. Don't rely too much on the resulting backups. - * - * Other than that, enjoy ! + * Ver 1.9 Nov 5 96 Automatically cross encountered filemarks rather + * than requiring an explicit FSF command. + * Abort pending requests at end of media. + * MTTELL was sometimes returning incorrect results. + * Return the real block size in the MTIOCGET ioctl. + * Some error recovery bug fixes. + * Ver 1.10 Nov 5 96 Major reorganization. + * Reduced CPU overhead a bit by eliminating internal + * bounce buffers. + * Added module support. + * Added multiple tape drives support. + * Added partition support. + * Rewrote DSC handling. + * Some portability fixes. + * Removed ide-tape.h. + * Additional minor changes. * * Here are some words from the first releases of hd.c, which are quoted * in ide.c and apply here as well: @@ -280,7 +284,7 @@ * pipelined mode might be the best option. * * You can enable/disable/tune the pipelined operation mode by adjusting - * the compile time parameters in ide-tape.h. + * the compile time parameters below. */ /* @@ -308,7 +312,7 @@ */ #include -#include +#include #include #include #include @@ -328,167 +332,443 @@ #include #include #include +#include +#include /* * Main Linux ide driver include file - * - * Automatically includes our include file - ide-tape.h. */ - -#include "ide.h" +#include "ide.h" /* - * Supported ATAPI tape drives packet commands + * For general magnetic tape device compatibility. */ +#include -#define IDETAPE_TEST_UNIT_READY_CMD 0x00 -#define IDETAPE_REWIND_CMD 0x01 -#define IDETAPE_REQUEST_SENSE_CMD 0x03 -#define IDETAPE_READ_CMD 0x08 -#define IDETAPE_WRITE_CMD 0x0a -#define IDETAPE_WRITE_FILEMARK_CMD 0x10 -#define IDETAPE_SPACE_CMD 0x11 -#define IDETAPE_INQUIRY_CMD 0x12 -#define IDETAPE_ERASE_CMD 0x19 -#define IDETAPE_MODE_SENSE_CMD 0x1a -#define IDETAPE_LOAD_UNLOAD_CMD 0x1b -#define IDETAPE_LOCATE_CMD 0x2b -#define IDETAPE_READ_POSITION_CMD 0x34 +/**************************** Tunable parameters *****************************/ /* - * Some defines for the SPACE command + * Pipelined mode parameters. + * + * We try to use the minimum number of stages which is enough to + * keep the tape constantly streaming. To accomplish that, we implement + * a feedback loop around the maximum number of stages: * - * (The code field in the SPACE packet command). + * We start from MIN maximum stages (we will not even use MIN stages + * if we don't need them), increment it by RATE*(MAX-MIN) + * whenever we sense that the pipeline is empty, until we reach + * the optimum value or until we reach MAX. + * + * Setting the following parameter to 0 will disable the pipelined mode. */ - -#define IDETAPE_SPACE_OVER_FILEMARK 1 -#define IDETAPE_SPACE_TO_EOD 3 +#define IDETAPE_MIN_PIPELINE_STAGES 100 +#define IDETAPE_MAX_PIPELINE_STAGES 200 +#define IDETAPE_INCREASE_STAGES_RATE 20 /* - * Some defines for the LOAD UNLOAD command + * Assuming the tape shares an interface with another device, the default + * behavior is to service our pending pipeline requests as soon as + * possible, but to gracefully postpone them in favor of the other device + * when the tape is busy. This has the potential to maximize our + * throughput and in the same time, to make efficient use of the IDE bus. + * + * Note that when we transfer data to / from the tape, we co-operate with + * the relatively fast tape buffers and the tape will perform the + * actual media access in the background, without blocking the IDE + * bus. This means that as long as the maximum IDE bus throughput is much + * higher than the sum of our maximum throughput and the maximum + * throughput of the other device, we should probably leave the default + * behavior. + * + * However, if it is still desired to give the other device a share even + * in our own (small) bus bandwidth, you can set IDETAPE_LOW_TAPE_PRIORITY + * to 1. This will let the other device finish *all* its pending requests + * before we even check if we can service our next pending request. */ - -#define IDETAPE_LU_LOAD_MASK 1 -#define IDETAPE_LU_RETENSION_MASK 2 -#define IDETAPE_LU_EOT_MASK 4 +#define IDETAPE_LOW_TAPE_PRIORITY 0 /* - * Our ioctls - We will use 0x034n and 0x035n + * The following are used to debug the driver: + * + * Setting IDETAPE_DEBUG_LOG to 1 will log driver flow control. + * Setting IDETAPE_DEBUG_BUGS to 1 will enable self-sanity checks in + * some places. + * + * Setting them to 0 will restore normal operation mode: * - * Nothing special meanwhile. - * mtio.h MTIOCTOP compatible commands are supported on the character - * device interface. + * 1. Disable logging normal successful operations. + * 2. Disable self-sanity checks. + * 3. Errors will still be logged, of course. + * + * All the #if DEBUG code will be removed some day, when the driver + * is verified to be stable enough. This will make it much more + * esthetic. */ +#define IDETAPE_DEBUG_LOG 0 +#define IDETAPE_DEBUG_BUGS 1 /* - * Special requests for our block device strategy routine. - * - * In order to service a character device command, we add special - * requests to the tail of our block device request queue and wait - * for their completion. + * After each failed packet command we issue a request sense command + * and retry the packet command IDETAPE_MAX_PC_RETRIES times. * + * Setting IDETAPE_MAX_PC_RETRIES to 0 will disable retries. */ +#define IDETAPE_MAX_PC_RETRIES 3 -#define IDETAPE_FIRST_REQUEST 90 +/* + * With each packet command, we allocate a buffer of + * IDETAPE_PC_BUFFER_SIZE bytes. This is used for several packet + * commands (Not for READ/WRITE commands). + */ +#define IDETAPE_PC_BUFFER_SIZE 256 /* - * IDETAPE_PACKET_COMMAND_REQUEST_TYPE1 is used to queue a packet command - * in the request queue. We will wait for DSC before issuing the command - * if it is still not set. In that case, we will temporary replace the - * cmd field to type 2 and restore it back to type 1 when we receive DSC - * and can start with sending the command. + * In various places in the driver, we need to allocate storage + * for packet commands and requests, which will remain valid while + * we leave the driver to wait for an interrupt or a timeout event. */ - -#define IDETAPE_PACKET_COMMAND_REQUEST_TYPE1 90 -#define IDETAPE_PACKET_COMMAND_REQUEST_TYPE2 91 +#define IDETAPE_PC_STACK (10 + IDETAPE_MAX_PC_RETRIES) /* - * IDETAPE_READ_REQUEST and IDETAPE_WRITE_REQUEST are used by our - * character device interface to request read/write operations from - * our block device interface. + * DSC polling parameters. + * + * Polling for DSC (a single bit in the status register) is a very + * important function in ide-tape. There are two cases in which we + * poll for DSC: * - * In case a read or write request was requested by the buffer cache - * and not by our character device interface, the cmd field in the - * request will contain READ and WRITE instead. + * 1. Before a read/write packet command, to ensure that we + * can transfer data from/to the tape's data buffers, without + * causing an actual media access. In case the tape is not + * ready yet, we take out our request from the device + * request queue, so that ide.c will service requests from + * the other device on the same interface meanwhile. * - * We handle both cases in a similar way. The main difference is that - * in our own requests, buffer head is NULL and idetape_end_request - * will update the errors field if the request was not completed. + * 2. After the successful initialization of a "media access + * packet command", which is a command which can take a long + * time to complete (it can be several seconds or even an hour). + * + * Again, we postpone our request in the middle to free the bus + * for the other device. The polling frequency here should be + * lower than the read/write frequency since those media access + * commands are slow. We start from a "fast" frequency - + * IDETAPE_DSC_MA_FAST (one second), and if we don't receive DSC + * after IDETAPE_DSC_MA_THRESHOLD (5 minutes), we switch it to a + * lower frequency - IDETAPE_DSC_MA_SLOW (1 minute). + * + * We also set a timeout for the timer, in case something goes wrong. + * The timeout should be longer then the maximum execution time of a + * tape operation. + */ + +/* + * The following parameter is used to select the point in the internal + * tape fifo in which we will start to refill the buffer. Decreasing + * the following parameter will improve the system's latency and + * interactive response, while using a high value might improve sytem + * throughput. + */ +#define IDETAPE_FIFO_THRESHOLD 2 + +/* + * DSC timings. */ +#define IDETAPE_DSC_RW_MIN 5*HZ/100 /* 50 msec */ +#define IDETAPE_DSC_RW_MAX 40*HZ/100 /* 400 msec */ +#define IDETAPE_DSC_RW_TIMEOUT 2*60*HZ /* 2 minutes */ +#define IDETAPE_DSC_MA_FAST 2*HZ /* 2 seconds */ +#define IDETAPE_DSC_MA_THRESHOLD 5*60*HZ /* 5 minutes */ +#define IDETAPE_DSC_MA_SLOW 30*HZ /* 30 seconds */ +#define IDETAPE_DSC_MA_TIMEOUT 2*60*60*HZ /* 2 hours */ -#define IDETAPE_READ_REQUEST 92 -#define IDETAPE_WRITE_REQUEST 93 +/*************************** End of tunable parameters ***********************/ -#define IDETAPE_LAST_REQUEST 93 +typedef enum { + idetape_direction_none, + idetape_direction_read, + idetape_direction_write +} idetape_chrdev_direction_t; /* - * A macro which can be used to check if a we support a given - * request command. + * Our view of a packet command. */ +typedef struct idetape_packet_command_s { + u8 c[12]; /* Actual packet bytes */ + int retries; /* On each retry, we increment retries */ + int error; /* Error code */ + int request_transfer; /* Bytes to transfer */ + int actually_transferred; /* Bytes actually transferred */ + int buffer_size; /* Size of our data buffer */ + struct buffer_head *bh; + char *b_data; + int b_count; + byte *buffer; /* Data buffer */ + byte *current_position; /* Pointer into the above buffer */ + void (*callback) (ide_drive_t *); /* Called when this packet command is completed */ + byte pc_buffer[IDETAPE_PC_BUFFER_SIZE]; /* Temporary buffer */ + unsigned int flags; /* Status/Action bit flags */ +} idetape_pc_t; -#define IDETAPE_REQUEST_CMD(cmd) ((cmd >= IDETAPE_FIRST_REQUEST) && (cmd <= IDETAPE_LAST_REQUEST)) +/* + * Packet command flag bits. + */ +#define PC_ABORT 0 /* Set when an error is considered normal - We won't retry */ +#define PC_WAIT_FOR_DSC 1 /* 1 When polling for DSC on a media access command */ +#define PC_DMA_RECOMMENDED 2 /* 1 when we prefer to use DMA if possible */ +#define PC_DMA_IN_PROGRESS 3 /* 1 while DMA in progress */ +#define PC_DMA_ERROR 4 /* 1 when encountered problem during DMA */ +#define PC_WRITING 5 /* Data direction */ /* - * We are now able to postpone an idetape request in the stage - * where it is polling for DSC and service requests from the other - * ide device meanwhile. + * Capabilities and Mechanical Status Page + */ +typedef struct { + unsigned page_code :6; /* Page code - Should be 0x2a */ + unsigned reserved1_67 :2; + u8 page_length; /* Page Length - Should be 0x12 */ + u8 reserved2, reserved3; + unsigned ro :1; /* Read Only Mode */ + unsigned reserved4_1234 :4; + unsigned sprev :1; /* Supports SPACE in the reverse direction */ + unsigned reserved4_67 :2; + unsigned reserved5_012 :3; + unsigned efmt :1; /* Supports ERASE command initiated formatting */ + unsigned reserved5_4 :1; + unsigned qfa :1; /* Supports the QFA two partition formats */ + unsigned reserved5_67 :2; + unsigned lock :1; /* Supports locking the volume */ + unsigned locked :1; /* The volume is locked */ + unsigned prevent :1; /* The device defaults in the prevent state after power up */ + unsigned eject :1; /* The device can eject the volume */ + unsigned reserved6_45 :2; /* Reserved */ + unsigned ecc :1; /* Supports error correction */ + unsigned cmprs :1; /* Supports data compression */ + unsigned reserved7_0 :1; + unsigned blk512 :1; /* Supports 512 bytes block size */ + unsigned blk1024 :1; /* Supports 1024 bytes block size */ + unsigned reserved7_3_6 :4; + unsigned slowb :1; /* The device restricts the byte count for PIO */ + /* transfers for slow buffer memory ??? */ + u16 max_speed; /* Maximum speed supported in KBps */ + u8 reserved10, reserved11; + u16 ctl; /* Continuous Transfer Limit in blocks */ + u16 speed; /* Current Speed, in KBps */ + u16 buffer_size; /* Buffer Size, in 512 bytes */ + u8 reserved18, reserved19; +} idetape_capabilities_page_t; + +/* + * A pipeline stage. + */ +typedef struct idetape_stage_s { + struct request rq; /* The corresponding request */ + struct buffer_head *bh; /* The data buffers */ + struct idetape_stage_s *next; /* Pointer to the next stage */ +} idetape_stage_t; + +/* + * Most of our global data which we need to save even as we leave the + * driver due to an interrupt or a timer event is stored in a variable + * of type idetape_tape_t, defined below. */ +typedef struct { + ide_drive_t *drive; -#define IDETAPE_RQ_POSTPONED 0x1234 + /* + * Since a typical character device operation requires more + * than one packet command, we provide here enough memory + * for the maximum of interconnected packet commands. + * The packet commands are stored in the circular array pc_stack. + * pc_stack_index points to the last used entry, and warps around + * to the start when we get to the last array entry. + * + * pc points to the current processed packet command. + * + * failed_pc points to the last failed packet command, or contains + * NULL if we do not need to retry any packet command. This is + * required since an additional packet command is needed before the + * retry, to get detailed information on what went wrong. + */ + idetape_pc_t *pc; /* Current packet command */ + idetape_pc_t *failed_pc; /* Last failed packet command */ + idetape_pc_t pc_stack[IDETAPE_PC_STACK];/* Packet command stack */ + int pc_stack_index; /* Next free packet command storage space */ + struct request rq_stack[IDETAPE_PC_STACK]; + int rq_stack_index; /* We implement a circular array */ + + /* + * DSC polling variables. + * + * While polling for DSC we use postponed_rq to postpone the + * current request so that ide.c will be able to service + * pending requests on the other device. Note that at most + * we will have only one DSC (usually data transfer) request + * in the device request queue. Additional requests can be + * queued in our internal pipeline, but they will be visible + * to ide.c only one at a time. + */ + struct request *postponed_rq; + unsigned long dsc_polling_start; /* The time in which we started polling for DSC */ + struct timer_list dsc_timer; /* Timer used to poll for dsc */ + unsigned long best_dsc_rw_frequency; /* Read/Write dsc polling frequency */ + unsigned long dsc_polling_frequency; /* The current polling frequency */ + unsigned long dsc_timeout; /* Maximum waiting time */ + + /* + * Position information + */ + byte partition; + unsigned int block_address; /* Current block */ + + /* + * Last error information + */ + byte sense_key, asc, ascq; + + /* + * Character device operation + */ + unsigned int minor; + char name[4]; /* device name */ + idetape_chrdev_direction_t chrdev_direction; /* Current character device data transfer direction */ + + /* + * Device information + */ + unsigned short tape_block_size; /* Usually 512 or 1024 bytes */ + int user_bs_factor; + idetape_capabilities_page_t capabilities; /* Copy of the tape's Capabilities and Mechanical Page */ + + /* + * Active data transfer request parameters. + * + * At most, there is only one ide-tape originated data transfer + * request in the device request queue. This allows ide.c to + * easily service requests from the other device when we + * postpone our active request. In the pipelined operation + * mode, we use our internal pipeline structure to hold + * more data requests. + * + * The data buffer size is chosen based on the tape's + * recommendation. + */ + struct request *active_data_request; /* Pointer to the request which is waiting in the device request queue */ + int stage_size; /* Data buffer size (chosen based on the tape's recommendation */ + idetape_stage_t *merge_stage; + int merge_stage_size; + struct buffer_head *bh; + char *b_data; + int b_count; + + /* + * Pipeline parameters. + * + * To accomplish non-pipelined mode, we simply set the following + * variables to zero (or NULL, where appropriate). + */ + int nr_stages; /* Number of currently used stages */ + int nr_pending_stages; /* Number of pending stages */ + int max_stages; /* We will not allocate more than this number of stages */ + idetape_stage_t *first_stage; /* The first stage which will be removed from the pipeline */ + idetape_stage_t *active_stage; /* The currently active stage */ + idetape_stage_t *next_stage; /* Will be serviced after the currently active request */ + idetape_stage_t *last_stage; /* New requests will be added to the pipeline here */ + idetape_stage_t *cache_stage; /* Optional free stage which we can use */ + int pages_per_stage; + int excess_bh_size; /* Wasted space in each stage */ + + unsigned int flags; /* Status/Action flags */ +} idetape_tape_t; + +/* + * Tape flag bits values. + */ +#define IDETAPE_IGNORE_DSC 0 +#define IDETAPE_ADDRESS_VALID 1 /* 0 When the tape position is unknown */ +#define IDETAPE_BUSY 2 /* Device already opened */ +#define IDETAPE_PIPELINE_ERROR 3 /* Error detected in a pipeline stage */ +#define IDETAPE_DETECT_BS 4 /* Attempt to auto-detect the current user block size */ +#define IDETAPE_FILEMARK 5 /* Currently on a filemark */ /* - * Error codes which are returned in rq->errors to the higher part - * of the driver. + * Supported ATAPI tape drives packet commands */ - -#define IDETAPE_RQ_ERROR_GENERAL 1 -#define IDETAPE_RQ_ERROR_FILEMARK 2 -#define IDETAPE_RQ_ERROR_EOD 3 +#define IDETAPE_TEST_UNIT_READY_CMD 0x00 +#define IDETAPE_REWIND_CMD 0x01 +#define IDETAPE_REQUEST_SENSE_CMD 0x03 +#define IDETAPE_READ_CMD 0x08 +#define IDETAPE_WRITE_CMD 0x0a +#define IDETAPE_WRITE_FILEMARK_CMD 0x10 +#define IDETAPE_SPACE_CMD 0x11 +#define IDETAPE_INQUIRY_CMD 0x12 +#define IDETAPE_ERASE_CMD 0x19 +#define IDETAPE_MODE_SENSE_CMD 0x1a +#define IDETAPE_LOAD_UNLOAD_CMD 0x1b +#define IDETAPE_LOCATE_CMD 0x2b +#define IDETAPE_READ_POSITION_CMD 0x34 /* - * ATAPI Task File Registers (Re-definition of the ATA Task File - * Registers for an ATAPI packet command). - * From Table 3-2 of QIC-157C. + * Some defines for the SPACE command */ +#define IDETAPE_SPACE_OVER_FILEMARK 1 +#define IDETAPE_SPACE_TO_EOD 3 -/* Read Access */ +/* + * Some defines for the LOAD UNLOAD command + */ +#define IDETAPE_LU_LOAD_MASK 1 +#define IDETAPE_LU_RETENSION_MASK 2 +#define IDETAPE_LU_EOT_MASK 4 -#define IDETAPE_DATA_OFFSET (0) -#define IDETAPE_ERROR_OFFSET (1) -#define IDETAPE_IREASON_OFFSET (2) -#define IDETAPE_RESERVED3_OFFSET (3) -#define IDETAPE_BCOUNTL_OFFSET (4) -#define IDETAPE_BCOUNTH_OFFSET (5) -#define IDETAPE_DRIVESEL_OFFSET (6) -#define IDETAPE_STATUS_OFFSET (7) +/* + * Special requests for our block device strategy routine. + * + * In order to service a character device command, we add special + * requests to the tail of our block device request queue and wait + * for their completion. + * + */ +#define IDETAPE_FIRST_RQ 90 -#define IDETAPE_DATA_REG (HWIF(drive)->io_base+IDETAPE_DATA_OFFSET) -#define IDETAPE_ERROR_REG (HWIF(drive)->io_base+IDETAPE_ERROR_OFFSET) -#define IDETAPE_IREASON_REG (HWIF(drive)->io_base+IDETAPE_IREASON_OFFSET) -#define IDETAPE_RESERVED3_REG (HWIF(drive)->io_base+IDETAPE_RESERVED3_OFFSET) -#define IDETAPE_BCOUNTL_REG (HWIF(drive)->io_base+IDETAPE_BCOUNTL_OFFSET) -#define IDETAPE_BCOUNTH_REG (HWIF(drive)->io_base+IDETAPE_BCOUNTH_OFFSET) -#define IDETAPE_DRIVESEL_REG (HWIF(drive)->io_base+IDETAPE_DRIVESEL_OFFSET) -#define IDETAPE_STATUS_REG (HWIF(drive)->io_base+IDETAPE_STATUS_OFFSET) +/* + * IDETAPE_PC_RQ is used to queue a packet command in the request queue. + */ +#define IDETAPE_PC_RQ 90 -/* Write Access */ +/* + * IDETAPE_READ_RQ and IDETAPE_WRITE_RQ are used by our + * character device interface to request read/write operations from + * our block device interface. + */ +#define IDETAPE_READ_RQ 92 +#define IDETAPE_WRITE_RQ 93 +#define IDETAPE_ABORTED_WRITE_RQ 94 -#define IDETAPE_FEATURES_OFFSET (1) -#define IDETAPE_ATACOMMAND_OFFSET (7) +#define IDETAPE_LAST_RQ 94 -#define IDETAPE_FEATURES_REG (HWIF(drive)->io_base+IDETAPE_FEATURES_OFFSET) -#define IDETAPE_ATACOMMAND_REG (HWIF(drive)->io_base+IDETAPE_ATACOMMAND_OFFSET) -#define IDETAPE_CONTROL_REG (HWIF(drive)->ctl_port) +/* + * A macro which can be used to check if a we support a given + * request command. + */ +#define IDETAPE_RQ_CMD(cmd) ((cmd >= IDETAPE_FIRST_RQ) && (cmd <= IDETAPE_LAST_RQ)) +/* + * We are now able to postpone an idetape request in the stage + * where it is polling for DSC and service requests from the other + * ide device meanwhile. + */ +#define IDETAPE_RQ_POSTPONED 0x1234 /* - * Structure of the various task file registers + * Error codes which are returned in rq->errors to the higher part + * of the driver. */ +#define IDETAPE_ERROR_GENERAL 101 +#define IDETAPE_ERROR_FILEMARK 102 +#define IDETAPE_ERROR_EOD 103 /* * The ATAPI Status Register. */ - typedef union { unsigned all :8; struct { @@ -496,11 +776,9 @@ unsigned idx :1; /* Reserved */ unsigned corr :1; /* Correctable error occurred */ unsigned drq :1; /* Data is request by the device */ - unsigned dsc :1; /* Set when a media access command is finished */ - /* Reads / Writes are NOT media access commands */ + unsigned dsc :1; /* Buffer availability / Media access command finished */ unsigned reserved5 :1; /* Reserved */ - unsigned drdy :1; /* Ignored for ATAPI commands */ - /* (The device is ready to accept ATA command) */ + unsigned drdy :1; /* Ignored for ATAPI commands (ready to accept ATA command) */ unsigned bsy :1; /* The device has access to the command block */ } b; } idetape_status_reg_t; @@ -508,7 +786,6 @@ /* * The ATAPI error register. */ - typedef union { unsigned all :8; struct { @@ -523,7 +800,6 @@ /* * ATAPI Feature Register */ - typedef union { unsigned all :8; struct { @@ -537,7 +813,6 @@ /* * ATAPI Byte Count Register. */ - typedef union { unsigned all :16; struct { @@ -549,7 +824,6 @@ /* * ATAPI Interrupt Reason Register. */ - typedef union { unsigned all :8; struct { @@ -562,7 +836,6 @@ /* * ATAPI Drive Select Register */ - typedef union { unsigned all :8; struct { @@ -577,7 +850,6 @@ /* * ATAPI Device Control Register */ - typedef union { unsigned all :8; struct { @@ -593,126 +865,91 @@ * idetape_chrdev_t provides the link between out character device * interface and our block device interface and the corresponding * ide_drive_t structure. - * - * We currently support only one tape drive. - * */ - typedef struct { ide_drive_t *drive; - int major,minor; - char name[4]; } idetape_chrdev_t; /* * The following is used to format the general configuration word of * the ATAPI IDENTIFY DEVICE command. */ - struct idetape_id_gcw { - - unsigned packet_size :2; /* Packet Size */ - unsigned reserved2 :1; /* Reserved */ - unsigned reserved3 :1; /* Reserved */ - unsigned reserved4 :1; /* Reserved */ - unsigned drq_type :2; /* Command packet DRQ type */ - unsigned removable :1; /* Removable media */ - unsigned device_type :5; /* Device type */ - unsigned reserved13 :1; /* Reserved */ - unsigned protocol :2; /* Protocol type */ + unsigned packet_size :2; /* Packet Size */ + unsigned reserved234 :3; /* Reserved */ + unsigned drq_type :2; /* Command packet DRQ type */ + unsigned removable :1; /* Removable media */ + unsigned device_type :5; /* Device type */ + unsigned reserved13 :1; /* Reserved */ + unsigned protocol :2; /* Protocol type */ }; /* * INQUIRY packet command - Data Format (From Table 6-8 of QIC-157C) */ - typedef struct { - unsigned device_type :5; /* Peripheral Device Type */ - unsigned reserved0_765 :3; /* Peripheral Qualifier - Reserved */ - unsigned reserved1_6t0 :7; /* Reserved */ - unsigned rmb :1; /* Removable Medium Bit */ - unsigned ansi_version :3; /* ANSI Version */ - unsigned ecma_version :3; /* ECMA Version */ - unsigned iso_version :2; /* ISO Version */ - unsigned response_format :4; /* Response Data Format */ - unsigned reserved3_45 :2; /* Reserved */ - unsigned reserved3_6 :1; /* TrmIOP - Reserved */ - unsigned reserved3_7 :1; /* AENC - Reserved */ - byte additional_length; /* Additional Length (total_length-4) */ - byte reserved_5; /* Reserved */ - byte reserved_6; /* Reserved */ - unsigned reserved7_0 :1; /* SftRe - Reserved */ - unsigned reserved7_1 :1; /* CmdQue - Reserved */ - unsigned reserved7_2 :1; /* Reserved */ - unsigned reserved7_3 :1; /* Linked - Reserved */ - unsigned reserved7_4 :1; /* Sync - Reserved */ - unsigned reserved7_5 :1; /* WBus16 - Reserved */ - unsigned reserved7_6 :1; /* WBus32 - Reserved */ - unsigned reserved7_7 :1; /* RelAdr - Reserved */ - byte vendor_id [8]; /* Vendor Identification */ - byte product_id [16]; /* Product Identification */ - byte revision_level [4]; /* Revision Level */ - byte vendor_specific [20]; /* Vendor Specific - Optional */ - byte reserved56t95 [40]; /* Reserved - Optional */ - - /* Additional information may be returned */ + unsigned device_type :5; /* Peripheral Device Type */ + unsigned reserved0_765 :3; /* Peripheral Qualifier - Reserved */ + unsigned reserved1_6t0 :7; /* Reserved */ + unsigned rmb :1; /* Removable Medium Bit */ + unsigned ansi_version :3; /* ANSI Version */ + unsigned ecma_version :3; /* ECMA Version */ + unsigned iso_version :2; /* ISO Version */ + unsigned response_format :4; /* Response Data Format */ + unsigned reserved3_45 :2; /* Reserved */ + unsigned reserved3_6 :1; /* TrmIOP - Reserved */ + unsigned reserved3_7 :1; /* AENC - Reserved */ + u8 additional_length; /* Additional Length (total_length-4) */ + u8 rsv5, rsv6, rsv7; /* Reserved */ + u8 vendor_id[8]; /* Vendor Identification */ + u8 product_id[16]; /* Product Identification */ + u8 revision_level[4]; /* Revision Level */ + u8 vendor_specific[20]; /* Vendor Specific - Optional */ + u8 reserved56t95[40]; /* Reserved - Optional */ + /* Additional information may be returned */ } idetape_inquiry_result_t; /* * READ POSITION packet command - Data Format (From Table 6-57) */ - typedef struct { - unsigned reserved0_10 :2; /* Reserved */ - unsigned bpu :1; /* Block Position Unknown */ - unsigned reserved0_543 :3; /* Reserved */ - unsigned eop :1; /* End Of Partition */ - unsigned bop :1; /* Beginning Of Partition */ - byte partition_num; /* Partition Number */ - byte reserved_2; /* Reserved */ - byte reserved_3; /* Reserved */ - unsigned long first_block; /* First Block Location */ - unsigned long last_block; /* Last Block Location (Optional) */ - byte reserved_12; /* Reserved */ - byte blocks_in_buffer_2; /* Blocks In Buffer - MSB (Optional) */ - byte blocks_in_buffer_1; - byte blocks_in_buffer_0; /* Blocks In Buffer - LSB (Optional) */ - unsigned long bytes_in_buffer; /* Bytes In Buffer (Optional) */ + unsigned reserved0_10 :2; /* Reserved */ + unsigned bpu :1; /* Block Position Unknown */ + unsigned reserved0_543 :3; /* Reserved */ + unsigned eop :1; /* End Of Partition */ + unsigned bop :1; /* Beginning Of Partition */ + u8 partition; /* Partition Number */ + u8 reserved2, reserved3; /* Reserved */ + u32 first_block; /* First Block Location */ + u32 last_block; /* Last Block Location (Optional) */ + u8 reserved12; /* Reserved */ + u8 blocks_in_buffer[3]; /* Blocks In Buffer - (Optional) */ + u32 bytes_in_buffer; /* Bytes In Buffer (Optional) */ } idetape_read_position_result_t; /* * REQUEST SENSE packet command result - Data Format. */ - typedef struct { - unsigned error_code :7; /* Current of deferred errors */ - unsigned valid :1; /* The information field conforms to QIC-157C */ - unsigned reserved_1 :8; /* Segment Number - Reserved */ - unsigned sense_key :4; /* Sense Key */ - unsigned reserved2_4 :1; /* Reserved */ - unsigned ili :1; /* Incorrect Length Indicator */ - unsigned eom :1; /* End Of Medium */ - unsigned filemark :1; /* Filemark */ - - /* - * We can't use a 32 bit variable, since it will be re-aligned - * by GCC, as we are not on a 32 bit boundary. - */ - - byte information1; /* MSB - Information - Command specific */ - byte information2; - byte information3; - byte information4; /* LSB */ - byte asl; /* Additional sense length (n-7) */ - unsigned long command_specific; /* Additional command specific information */ - byte asc; /* Additional Sense Code */ - byte ascq; /* Additional Sense Code Qualifier */ - byte replaceable_unit_code; /* Field Replaceable Unit Code */ - unsigned sk_specific1 :7; /* Sense Key Specific */ - unsigned sksv :1; /* Sense Key Specific information is valid */ - byte sk_specific2; /* Sense Key Specific */ - byte sk_specific3; /* Sense Key Specific */ - byte pad [2]; /* Padding to 20 bytes */ + unsigned error_code :7; /* Current of deferred errors */ + unsigned valid :1; /* The information field conforms to QIC-157C */ + u8 reserved1 :8; /* Segment Number - Reserved */ + unsigned sense_key :4; /* Sense Key */ + unsigned reserved2_4 :1; /* Reserved */ + unsigned ili :1; /* Incorrect Length Indicator */ + unsigned eom :1; /* End Of Medium */ + unsigned filemark :1; /* Filemark */ + u32 information __attribute__ ((packed)); + u8 asl; /* Additional sense length (n-7) */ + u32 command_specific; /* Additional command specific information */ + u8 asc; /* Additional Sense Code */ + u8 ascq; /* Additional Sense Code Qualifier */ + u8 replaceable_unit_code; /* Field Replaceable Unit Code */ + unsigned sk_specific1 :7; /* Sense Key Specific */ + unsigned sksv :1; /* Sense Key Specific information is valid */ + u8 sk_specific2; /* Sense Key Specific */ + u8 sk_specific3; /* Sense Key Specific */ + u8 pad[2]; /* Padding to 20 bytes */ } idetape_request_sense_result_t; /* @@ -720,19 +957,16 @@ * packet commands. Those packet commands are still not supported * by ide-tape. */ - #define IDETAPE_CAPABILITIES_PAGE 0x2a /* * Mode Parameter Header for the MODE SENSE packet command */ - typedef struct { - byte mode_data_length; /* The length of the following data that is */ - /* available to be transferred */ - byte medium_type; /* Medium Type */ - byte dsp; /* Device Specific Parameter */ - byte bdl; /* Block Descriptor Length */ + u8 mode_data_length; /* Length of the following data transfer */ + u8 medium_type; /* Medium Type */ + u8 dsp; /* Device Specific Parameter */ + u8 bdl; /* Block Descriptor Length */ } idetape_mode_parameter_header_t; /* @@ -740,1708 +974,684 @@ * * Support for block descriptors is optional. */ - typedef struct { - byte density_code; /* Medium density code */ - byte blocks1; /* Number of blocks - MSB */ - byte blocks2; /* Number of blocks - Middle byte */ - byte blocks3; /* Number of blocks - LSB */ - byte reserved4; /* Reserved */ - byte length1; /* Block Length - MSB */ - byte length2; /* Block Length - Middle byte */ - byte length3; /* Block Length - LSB */ + u8 density_code; /* Medium density code */ + u8 blocks[3]; /* Number of blocks */ + u8 reserved4; /* Reserved */ + u8 length[3]; /* Block Length */ } idetape_parameter_block_descriptor_t; /* * The Data Compression Page, as returned by the MODE SENSE packet command. */ - typedef struct { - unsigned page_code :6; /* Page Code - Should be 0xf */ - unsigned reserved :1; /* Reserved */ - unsigned ps :1; - byte page_length; /* Page Length - Should be 14 */ - unsigned reserved2 :6; /* Reserved */ - unsigned dcc :1; /* Data Compression Capable */ - unsigned dce :1; /* Data Compression Enable */ - unsigned reserved3 :5; /* Reserved */ - unsigned red :2; /* Report Exception on Decompression */ - unsigned dde :1; /* Data Decompression Enable */ - unsigned long ca; /* Compression Algorithm */ - unsigned long da; /* Decompression Algorithm */ - byte reserved_12; /* Reserved */ - byte reserved_13; /* Reserved */ - byte reserved_14; /* Reserved */ - byte reserved_15; /* Reserved */ + unsigned page_code :6; /* Page Code - Should be 0xf */ + unsigned reserved0 :1; /* Reserved */ + unsigned ps :1; + u8 page_length; /* Page Length - Should be 14 */ + unsigned reserved2 :6; /* Reserved */ + unsigned dcc :1; /* Data Compression Capable */ + unsigned dce :1; /* Data Compression Enable */ + unsigned reserved3 :5; /* Reserved */ + unsigned red :2; /* Report Exception on Decompression */ + unsigned dde :1; /* Data Decompression Enable */ + u32 ca; /* Compression Algorithm */ + u32 da; /* Decompression Algorithm */ + u8 reserved[4]; /* Reserved */ } idetape_data_compression_page_t; /* * The Medium Partition Page, as returned by the MODE SENSE packet command. */ - typedef struct { - unsigned page_code :6; /* Page Code - Should be 0x11 */ - unsigned reserved1_6 :1; /* Reserved */ - unsigned ps :1; - byte page_length; /* Page Length - Should be 6 */ - byte map; /* Maximum Additional Partitions - Should be 0 */ - byte apd; /* Additional Partitions Defined - Should be 0 */ - unsigned reserved4_012 :3; /* Reserved */ - unsigned psum :2; /* Should be 0 */ - unsigned idp :1; /* Should be 0 */ - unsigned sdp :1; /* Should be 0 */ - unsigned fdp :1; /* Fixed Data Partitions */ - byte mfr; /* Medium Format Recognition */ - byte reserved6; /* Reserved */ - byte reserved7; /* Reserved */ + unsigned page_code :6; /* Page Code - Should be 0x11 */ + unsigned reserved1_6 :1; /* Reserved */ + unsigned ps :1; + u8 page_length; /* Page Length - Should be 6 */ + u8 map; /* Maximum Additional Partitions - Should be 0 */ + u8 apd; /* Additional Partitions Defined - Should be 0 */ + unsigned reserved4_012 :3; /* Reserved */ + unsigned psum :2; /* Should be 0 */ + unsigned idp :1; /* Should be 0 */ + unsigned sdp :1; /* Should be 0 */ + unsigned fdp :1; /* Fixed Data Partitions */ + u8 mfr; /* Medium Format Recognition */ + u8 reserved[2]; /* Reserved */ } idetape_medium_partition_page_t; -/* - * Prototypes of various functions in ide-tape.c - * - * The following functions are called from ide.c, and their prototypes - * are available in ide.h: - * - * idetape_identify_device - * idetape_setup - * idetape_blkdev_ioctl - * idetape_do_request - * idetape_blkdev_open - * idetape_blkdev_release - * idetape_register_chrdev (void); - */ - -/* - * The following functions are used to transfer data from / to the - * tape's data register. - */ - -void idetape_input_data (ide_drive_t *drive,void *buffer, unsigned long bcount); -void idetape_output_data (ide_drive_t *drive,void *buffer, unsigned long bcount); -void idetape_discard_data (ide_drive_t *drive, unsigned long bcount); - -/* - * Packet command related functions. - */ - -void idetape_issue_packet_command (ide_drive_t *drive,idetape_packet_command_t *pc,ide_handler_t *handler); -void idetape_pc_intr (ide_drive_t *drive); +#define IDETAPE_MIN(a,b) ((a)<(b) ? (a):(b)) +#define IDETAPE_MAX(a,b) ((a)>(b) ? (a):(b)) /* - * DSC handling functions. + * Run time configurable parameters. */ - -void idetape_postpone_request (ide_drive_t *drive); -void idetape_poll_for_dsc (unsigned long data); -void idetape_poll_for_dsc_direct (unsigned long data); -void idetape_put_back_postponed_request (ide_drive_t *drive); -void idetape_media_access_finished (ide_drive_t *drive); +typedef struct { + int dsc_rw_frequency; + int dsc_media_access_frequency; + int nr_stages; +} idetape_config_t; /* - * Some more packet command related functions. + * The variables below are used for the character device interface. + * Additional state variables are defined in our ide_drive_t structure. */ - -void idetape_pc_callback (ide_drive_t *drive); -void idetape_retry_pc (ide_drive_t *drive); -void idetape_zero_packet_command (idetape_packet_command_t *pc); -void idetape_queue_pc_head (ide_drive_t *drive,idetape_packet_command_t *pc,struct request *rq); -void idetape_analyze_error (ide_drive_t *drive,idetape_request_sense_result_t *result); - -idetape_packet_command_t *idetape_next_pc_storage (ide_drive_t *drive); -struct request *idetape_next_rq_storage (ide_drive_t *drive); +static idetape_chrdev_t idetape_chrdevs[MAX_HWIFS * MAX_DRIVES]; +static int idetape_chrdev_present = 0; /* - * Various packet commands - */ - -void idetape_create_inquiry_cmd (idetape_packet_command_t *pc); -void idetape_inquiry_callback (ide_drive_t *drive); -void idetape_create_locate_cmd (idetape_packet_command_t *pc,unsigned long block,byte partition); -void idetape_create_rewind_cmd (idetape_packet_command_t *pc); -void idetape_create_write_filemark_cmd (idetape_packet_command_t *pc,int write_filemark); -void idetape_create_load_unload_cmd (idetape_packet_command_t *pc,int cmd); -void idetape_create_space_cmd (idetape_packet_command_t *pc,long count,byte cmd); -void idetape_create_erase_cmd (idetape_packet_command_t *pc); -void idetape_create_test_unit_ready_cmd (idetape_packet_command_t *pc); -void idetape_create_read_position_cmd (idetape_packet_command_t *pc); -void idetape_read_position_callback (ide_drive_t *drive); -void idetape_create_read_cmd (idetape_packet_command_t *pc,unsigned long length); -void idetape_read_callback (ide_drive_t *drive); -void idetape_create_write_cmd (idetape_packet_command_t *pc,unsigned long length); -void idetape_write_callback (ide_drive_t *drive); -void idetape_create_request_sense_cmd (idetape_packet_command_t *pc); -void idetape_create_mode_sense_cmd (idetape_packet_command_t *pc,byte page_code); -void idetape_request_sense_callback (ide_drive_t *drive); - -void idetape_display_inquiry_result (byte *buffer); - -/* - * Character device callback functions. - * - * We currently support: - * - * OPEN, RELEASE, READ, WRITE and IOCTL. - */ - -int idetape_chrdev_read (struct inode *inode, struct file *file, char *buf, int count); -int idetape_chrdev_write (struct inode *inode, struct file *file, const char *buf, int count); -int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); -int idetape_chrdev_open (struct inode *inode, struct file *file); -void idetape_chrdev_release (struct inode *inode,struct file *file); - -/* - * idetape_mtioctop implements general magnetic tape io control - * commands, as defined in include/linux/mtio.h. Those commands are - * accessed through the character device interface, using the MTIOCTOP - * ioctl. + * Too bad. The drive wants to send us data which we are not ready to accept. + * Just throw it away. */ - -int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count); +static void idetape_discard_data (ide_drive_t *drive, unsigned int bcount) +{ + while (bcount--) + IN_BYTE (IDE_DATA_REG); +} -/* - * idetape_space_over_filemarks handles the MTFSF, MTFSFM, ... mtio.h - * commands. - */ - -int idetape_space_over_filemarks (ide_drive_t *drive,short mt_op,int mt_count); +static void idetape_input_buffers (ide_drive_t *drive, idetape_pc_t *pc, unsigned int bcount) +{ + struct buffer_head *bh = pc->bh; + int count; + + while (bcount) { +#if IDETAPE_DEBUG_BUGS + if (bh == NULL) { + printk (KERN_ERR "ide-tape: bh == NULL in idetape_input_buffers\n"); + idetape_discard_data (drive, bcount); + return; + } +#endif /* IDETAPE_DEBUG_BUGS */ + count = IDETAPE_MIN (bh->b_size - bh->b_count, bcount); + atapi_input_bytes (drive, bh->b_data + bh->b_count, count); + bcount -= count; bh->b_count += count; + if (bh->b_count == bh->b_size) { + bh = bh->b_reqnext; + if (bh) + bh->b_count = 0; + } + } + pc->bh = bh; +} -/* - * idetape_add_chrdev_read_request is called from idetape_chrdev_read - * to service a character device read request and add read-ahead - * requests to our pipeline. - */ - -int idetape_add_chrdev_read_request (ide_drive_t *drive,int blocks,char *buffer); +static void idetape_output_buffers (ide_drive_t *drive, idetape_pc_t *pc, unsigned int bcount) +{ + struct buffer_head *bh = pc->bh; + int count; + + while (bcount) { +#if IDETAPE_DEBUG_BUGS + if (bh == NULL) { + printk (KERN_ERR "ide-tape: bh == NULL in idetape_output_buffers\n"); + return; + } +#endif /* IDETAPE_DEBUG_BUGS */ + count = IDETAPE_MIN (pc->b_count, bcount); + atapi_output_bytes (drive, pc->b_data, count); + bcount -= count; pc->b_data += count; pc->b_count -= count; + if (!pc->b_count) { + pc->bh = bh = bh->b_reqnext; + if (bh) { + pc->b_data = bh->b_data; + pc->b_count = bh->b_count; + } + } + } +} -/* - * idetape_add_chrdev_write_request adds a character device write - * request to the pipeline. - */ - -int idetape_add_chrdev_write_request (ide_drive_t *drive,int blocks,char *buffer); +#ifdef CONFIG_BLK_DEV_TRITON +static void idetape_update_buffers (idetape_pc_t *pc) +{ + struct buffer_head *bh = pc->bh; + int count, bcount = pc->actually_transferred; -/* - * idetape_queue_rw_tail will add a command to the tail of the device - * request queue and wait for it to finish. This is used when we - * can not allocate pipeline stages (or in non-pipelined mode). - */ - -int idetape_queue_rw_tail (ide_drive_t *drive,int cmd,int blocks,char *buffer); + if (test_bit (PC_WRITING, &pc->flags)) + return; + while (bcount) { +#if IDETAPE_DEBUG_BUGS + if (bh == NULL) { + printk (KERN_ERR "ide-tape: bh == NULL in idetape_update_buffers\n"); + return; + } +#endif /* IDETAPE_DEBUG_BUGS */ + count = IDETAPE_MIN (bh->b_size, bcount); + bh->b_count = count; + if (bh->b_count == bh->b_size) + bh = bh->b_reqnext; + bcount -= count; + } + pc->bh = bh; +} +#endif /* CONFIG_BLK_DEV_TRITON */ /* - * Adds a packet command request to the tail of the device request - * queue and waits for it to be serviced. + * idetape_poll_for_dsc gets invoked by a timer (which was set + * by idetape_postpone_request) to reinsert our postponed request + * into the request queue. + * + * Note that the procedure done here is different than the method + * we are using in idetape_queue_pc_head - There we are putting + * request(s) before our currently called request. + * + * Here, on the other hand, HWGROUP(drive)->rq is not our request + * but rather a request to another device. Therefore, we will let + * it finish and only then service our postponed request --> We don't + * touch HWGROUP(drive)->rq. */ - -int idetape_queue_pc_tail (ide_drive_t *drive,idetape_packet_command_t *pc); +static void idetape_poll_for_dsc (unsigned long data) +{ + ide_drive_t *drive=(ide_drive_t *) data; + idetape_tape_t *tape = drive->driver_data; -int idetape_position_tape (ide_drive_t *drive,unsigned long block); -int idetape_rewind_tape (ide_drive_t *drive); -int idetape_flush_tape_buffers (ide_drive_t *drive); + del_timer (&tape->dsc_timer); -/* - * Used to get device information - */ +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "ide-tape: Putting back postponed request\n"); +#endif /* IDETAPE_DEBUG_LOG */ +#if IDETAPE_DEBUG_BUGS + if (tape->postponed_rq == NULL) { + printk (KERN_ERR "tape->postponed_rq is NULL in idetape_poll_for_dsc\n"); + return; + } +#endif /* IDETAPE_DEBUG_BUGS */ -void idetape_get_mode_sense_results (ide_drive_t *drive); + (void) ide_do_drive_cmd (drive, tape->postponed_rq, ide_next); +} /* - * General utility functions + * idetape_postpone_request postpones the current request so that + * ide.c will be able to service requests from another device on + * the same hwgroup while we are polling for DSC. */ - -unsigned long idetape_swap_long (unsigned long temp); -unsigned short idetape_swap_short (unsigned short temp); +static void idetape_postpone_request (ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + struct request *rq; + +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "Reached idetape_postpone_request\n"); +#endif /* IDETAPE_DEBUG_LOG */ +#if IDETAPE_DEBUG_BUGS + if (tape->postponed_rq != NULL) + printk (KERN_ERR "ide-tape.c bug - postponed_rq not NULL in idetape_postpone_request\n"); +#endif /* IDETAPE_DEBUG_BUGS */ -#define IDETAPE_MIN(a,b) ((a)<(b) ? (a):(b)) + /* + * Set the timer parameters. + */ + tape->dsc_timer.expires=jiffies + tape->dsc_polling_frequency; + tape->dsc_timer.data=(unsigned long) drive; + tape->dsc_timer.function = &idetape_poll_for_dsc; + init_timer (&tape->dsc_timer); -/* - * Pipeline related functions - */ + /* + * Remove current request from the request queue: + */ + tape->postponed_rq = rq = HWGROUP(drive)->rq; + rq->rq_status = IDETAPE_RQ_POSTPONED; + blk_dev[MAJOR(rq->rq_dev)].current_request = rq->next; + HWGROUP(drive)->rq = NULL; -idetape_pipeline_stage_t *idetape_kmalloc_stage (ide_drive_t *drive); -void idetape_kfree_stage (idetape_pipeline_stage_t *stage); -void idetape_copy_buffer_from_stage (idetape_pipeline_stage_t *stage,char *buffer); -void idetape_copy_buffer_to_stage (idetape_pipeline_stage_t *stage,char *buffer); -void idetape_increase_max_pipeline_stages (ide_drive_t *drive); -void idetape_add_stage_tail (ide_drive_t *drive,idetape_pipeline_stage_t *stage); -void idetape_remove_stage_head (ide_drive_t *drive); -void idetape_active_next_stage (ide_drive_t *drive); -void idetape_wait_for_pipeline (ide_drive_t *drive); -void idetape_discard_read_pipeline (ide_drive_t *drive); -void idetape_empty_write_pipeline (ide_drive_t *drive); -void idetape_insert_pipeline_into_queue (ide_drive_t *drive); + add_timer(&tape->dsc_timer); /* Activate the polling timer */ +} /* - * For general magnetic tape device compatibility. - */ - -#include - -/* - * Global variables - * - * The variables below are used for the character device interface. - * - * Additional state variables are defined in our ide_drive_t structure. - */ - -idetape_chrdev_t idetape_chrdev; /* Character device interface information */ -byte idetape_drive_already_found=0; /* 1 when the above data structure is initialized */ - -/* - * Our character device supporting functions, passed to register_chrdev. - */ - -static struct file_operations idetape_fops = { - NULL, /* lseek - default */ - idetape_chrdev_read, /* read */ - idetape_chrdev_write, /* write */ - NULL, /* readdir - bad */ - NULL, /* select */ - idetape_chrdev_ioctl, /* ioctl */ - NULL, /* mmap */ - idetape_chrdev_open, /* open */ - idetape_chrdev_release, /* release */ - NULL, /* fsync */ - NULL, /* fasync */ - NULL, /* check_media_change */ - NULL /* revalidate */ -}; - - -/* - * idetape_identify_device is called by do_identify in ide.c during - * the device probing stage to check the contents of the ATAPI IDENTIFY - * command results, in case the device type is tape. We return: - * - * 1 If the tape can be supported by us, based on the information - * we have so far. - * - * 0 If this tape driver is not currently supported by us. - * - * In case we decide to support the tape, we store the current drive - * pointer in our character device global variables, so that we can - * pass between both interfaces. - */ - -int idetape_identify_device (ide_drive_t *drive,struct hd_driveid *id) - -{ - struct idetape_id_gcw gcw; - unsigned short *ptr; - int support=1; -#if IDETAPE_DEBUG_LOG - unsigned short mask,i; -#endif /* IDETAPE_DEBUG_LOG */ - - ptr=(unsigned short *) &gcw; - *ptr=id->config; - -#if IDETAPE_DEBUG_LOG - printk ("Dumping ATAPI Identify Device tape parameters\n"); - - printk ("Protocol Type: "); - switch (gcw.protocol) { - case 0: case 1: printk ("ATA\n");break; - case 2: printk ("ATAPI\n");break; - case 3: printk ("Reserved (Unknown to ide-tape)\n");break; - } - - printk ("Device Type: %x - ",gcw.device_type); - switch (gcw.device_type) { - case 0: printk ("Direct-access Device\n");break; - case 1: printk ("Streaming Tape Device\n");break; - case 2: case 3: case 4: printk ("Reserved\n");break; - case 5: printk ("CD-ROM Device\n");break; - case 6: printk ("Reserved\n"); - case 7: printk ("Optical memory Device\n");break; - case 0x1f: printk ("Unknown or no Device type\n");break; - default: printk ("Reserved\n"); - } - printk ("Removable: %s",gcw.removable ? "Yes\n":"No\n"); - - printk ("Command Packet DRQ Type: "); - switch (gcw.drq_type) { - case 0: printk ("Microprocessor DRQ\n");break; - case 1: printk ("Interrupt DRQ\n");break; - case 2: printk ("Accelerated DRQ\n");break; - case 3: printk ("Reserved\n");break; - } - - printk ("Command Packet Size: "); - switch (gcw.packet_size) { - case 0: printk ("12 bytes\n");break; - case 1: printk ("16 bytes\n");break; - default: printk ("Reserved\n");break; - } - printk ("Model: %s\n",id->model); - printk ("Firmware Revision: %s\n",id->fw_rev); - printk ("Serial Number: %s\n",id->serial_no); - printk ("Write buffer size: %d bytes\n",id->buf_size*512); - printk ("DMA: %s",id->capability & 0x01 ? "Yes\n":"No\n"); - printk ("LBA: %s",id->capability & 0x02 ? "Yes\n":"No\n"); - printk ("IORDY can be disabled: %s",id->capability & 0x04 ? "Yes\n":"No\n"); - printk ("IORDY supported: %s",id->capability & 0x08 ? "Yes\n":"Unknown\n"); - printk ("ATAPI overlap supported: %s",id->capability & 0x20 ? "Yes\n":"No\n"); - printk ("PIO Cycle Timing Category: %d\n",id->tPIO); - printk ("DMA Cycle Timing Category: %d\n",id->tDMA); - printk ("Single Word DMA supported modes: "); - for (i=0,mask=1;i<8;i++,mask=mask << 1) { - if (id->dma_1word & mask) - printk ("%d ",i); - if (id->dma_1word & (mask << 8)) - printk ("(active) "); - } - printk ("\n"); - - printk ("Multi Word DMA supported modes: "); - for (i=0,mask=1;i<8;i++,mask=mask << 1) { - if (id->dma_mword & mask) - printk ("%d ",i); - if (id->dma_mword & (mask << 8)) - printk ("(active) "); - } - printk ("\n"); - - if (id->field_valid & 0x0002) { - printk ("Enhanced PIO Modes: %s\n",id->eide_pio_modes & 1 ? "Mode 3":"None"); - printk ("Minimum Multi-word DMA cycle per word: "); - if (id->eide_dma_min == 0) - printk ("Not supported\n"); - else - printk ("%d ns\n",id->eide_dma_min); - - printk ("Manufacturer\'s Recommended Multi-word cycle: "); - if (id->eide_dma_time == 0) - printk ("Not supported\n"); - else - printk ("%d ns\n",id->eide_dma_time); - - printk ("Minimum PIO cycle without IORDY: "); - if (id->eide_pio == 0) - printk ("Not supported\n"); - else - printk ("%d ns\n",id->eide_pio); - - printk ("Minimum PIO cycle with IORDY: "); - if (id->eide_pio_iordy == 0) - printk ("Not supported\n"); - else - printk ("%d ns\n",id->eide_pio_iordy); - - } - - else { - printk ("According to the device, fields 64-70 are not valid.\n"); - } -#endif /* IDETAPE_DEBUG_LOG */ - - /* Check that we can support this device */ - - if (gcw.protocol !=2 ) { - printk ("ide-tape: Protocol is not ATAPI\n");support=0; - } - - if (gcw.device_type != 1) { - printk ("ide-tape: Device type is not set to tape\n");support=0; - } - - if (!gcw.removable) { - printk ("ide-tape: The removable flag is not set\n");support=0; - } - - if (gcw.drq_type != 2) { - printk ("ide-tape: Sorry, DRQ types other than Accelerated DRQ\n"); - printk ("ide-tape: are still not supported by the driver\n");support=0; - } - - if (gcw.packet_size != 0) { - printk ("ide-tape: Packet size is not 12 bytes long\n"); - if (gcw.packet_size == 1) - printk ("ide-tape: Sorry, padding to 16 bytes is still not supported\n"); - support=0; - } - - if (idetape_drive_already_found) { - printk ("ide-tape: Sorry, only one ide tape drive is supported by the driver\n"); - support=0; - } - else { - idetape_drive_already_found=1; - idetape_chrdev.drive=drive; - idetape_chrdev.major=IDETAPE_MAJOR; - idetape_chrdev.minor=0; - idetape_chrdev.name[0]='h'; - idetape_chrdev.name[1]='t'; - idetape_chrdev.name[2]='0'; - idetape_chrdev.name[3]=0; - } - - return (support); /* In case support=0, we will not install the driver */ -} - -/* - * idetape_register_chrdev calls register_chrdev to register our character - * device interface. The connection to the ide_drive_t structure, which - * is used by the entire ide driver is provided by our global variable - * idetape_chrdev.drive, which was initialized earlier, during the device - * probing stage. - */ - -void idetape_register_chrdev (void) - -{ - int major,minor; - ide_drive_t *drive; - - if (!idetape_drive_already_found) - return; - - drive=idetape_chrdev.drive; - major=idetape_chrdev.major; - minor=idetape_chrdev.minor; - - if (register_chrdev (major,idetape_chrdev.name,&idetape_fops)) { - printk ("Unable to register character device interface !\n"); - /* ??? */ - } - else { - printk ("ide-tape: %s <-> %s : Character device interface on major = %d\n", - drive->name,idetape_chrdev.name,major); - } -} - -/* - * idetape_setup is called from the ide driver in the partition table - * identification stage, to: - * - * 1. Initialize our various state variables. - * 2. Ask the tape for its capabilities. - * 3. Allocate a buffer which will be used for data - * transfer. The buffer size is chosen based on - * the recommendation which we received in step (2). - * - * Note that at this point ide.c already assigned us an irq, so that - * we can queue requests here and wait for their completion. - */ - -void idetape_setup (ide_drive_t *drive) - -{ - idetape_tape_t *tape=&(drive->tape); - unsigned int allocation_length; -#if IDETAPE_ANTICIPATE_READ_WRITE_DSC - ide_hwif_t *hwif = HWIF(drive); - unsigned long t1, tmid, tn; -#endif /* IDETAPE_ANTICIPATE_READ_WRITE_DSC */ - -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_setup\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - drive->ready_stat = 0; /* With an ATAPI device, we can issue packet commands */ - /* regardless of the state of DRDY */ - HWIF(drive)->tape_drive=drive; - - tape->block_address=0; - tape->block_address_valid=0; - tape->pc_stack_index=0; - tape->failed_pc=NULL; - tape->postponed_rq=NULL; - tape->busy=0; - tape->active_data_request=NULL; - tape->current_number_of_stages=0; - tape->first_stage=tape->next_stage=tape->last_stage=NULL; - tape->error_in_pipeline_stage=0; - tape->request_status=0; - tape->chrdev_direction=idetape_direction_none; - tape->reset_issued=0; - tape->pc=&(tape->pc_stack [0]); - -#if IDETAPE_PIPELINE - tape->max_number_of_stages=IDETAPE_MIN_PIPELINE_STAGES; - printk ("ide-tape: Operating in pipelined (fast and tricky) operation mode.\n"); -#else - tape->max_number_of_stages=0; - printk ("ide-tape: Operating in non-pipelined (slow and safe) operation mode.\n"); -#endif /* IDETAPE_PIPELINE */ - - idetape_get_mode_sense_results (drive); - - tape->data_buffer_size = tape->capabilities.ctl * tape->tape_block_size; - while (tape->data_buffer_size > 0xffff) { - tape->capabilities.ctl /= 2; - tape->data_buffer_size = tape->capabilities.ctl * tape->tape_block_size; - } - allocation_length=tape->data_buffer_size; - if (tape->data_buffer_size % IDETAPE_ALLOCATION_BLOCK) - allocation_length+=IDETAPE_ALLOCATION_BLOCK; - -#if IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE - tape->data_buffer=tape->merge_buffer=NULL; -#else - tape->data_buffer=kmalloc (allocation_length,GFP_KERNEL); - tape->merge_buffer=kmalloc (allocation_length,GFP_KERNEL); - if (tape->data_buffer == NULL || tape->merge_buffer == NULL) { - printk ("ide-tape: FATAL - Can not allocate 2 buffers of %d bytes each\n",allocation_length); - printk ("ide-tape: Aborting character device installation\n"); - idetape_drive_already_found=0; - unregister_chrdev (idetape_chrdev.major,idetape_chrdev.name); - return; - } -#endif /* IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE */ - - tape->merge_buffer_size=tape->merge_buffer_offset=0; - -#if IDETAPE_ANTICIPATE_READ_WRITE_DSC - - /* - * Cleverly select the DSC read/write polling frequency, based - * on the tape's speed, its recommended transfer unit, its - * internal buffer size and our operation mode. - * - * In the pipelined operation mode we aim for "catching" the - * tape when its internal buffer is about 50% full. This will - * dramatically reduce our polling frequency and will also - * leave enough time for the ongoing request of the other device - * to complete before the buffer is completely empty. We will - * then completely refill the buffer with requests from our - * internal pipeline. - * - * When operating in the non-pipelined operation mode, we - * can't allow ourself this luxury. Instead, we will try to take - * full advantage of the internal tape buffer by waiting only - * for one request to complete. This will increase our load - * on linux but will usually still fail to keep the tape - * constantly streaming. - */ - - /* - * We will ignore the above algorithm for now, as it can have - * a bad effect on interactive response under some conditions. - * The following attempts to find a balance between good latency - * and good system throughput. It will be nice to have all this - * configurable in run time at some point. - */ - t1 = (tape->data_buffer_size * HZ) / (tape->capabilities.speed * 1000); - tmid = (tape->capabilities.buffer_size * 32 * HZ) / (tape->capabilities.speed * 125); - tn = (IDETAPE_FIFO_THRESHOLD * tape->data_buffer_size * HZ) / (tape->capabilities.speed * 1000); - - if (tape->max_number_of_stages) { - if (drive->using_dma) - tape->best_dsc_rw_frequency = tmid; - else { - if (hwif->drives[drive->select.b.unit ^ 1].present || hwif->next != hwif) - tape->best_dsc_rw_frequency = IDETAPE_MIN ((tn + tmid) / 2, tmid); - else - tape->best_dsc_rw_frequency = IDETAPE_MIN (tn, tmid); - } - } else - tape->best_dsc_rw_frequency = t1; - - /* - * Ensure that the number we got makes sense. - */ - - if (tape->best_dsc_rw_frequency > IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY) { - printk ("ide-tape: Although the recommended polling period is %lu jiffies, \n",tape->best_dsc_rw_frequency); - printk ("ide-tape: we will use %u jiffies\n",IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY); - printk ("ide-tape: (It may well be that we are wrong here)\n"); - tape->best_dsc_rw_frequency = IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY; - } - - if (tape->best_dsc_rw_frequency < IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY) { - printk ("ide-tape: Although the recommended polling period is %lu jiffies, \n",tape->best_dsc_rw_frequency); - printk ("ide-tape: we will use %u jiffies\n",IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY); - tape->best_dsc_rw_frequency = IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY; - } - -#else - tape->best_dsc_rw_frequency=IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY; -#endif /* IDETAPE_ANTICIPATE_READ_WRITE_DSC */ - - printk (KERN_INFO "ide-tape: %s <-> %s, %dKBps, %d*%dkB buffer, %dkB pipeline, %lums tDSC%s\n", - drive->name, "ht0", tape->capabilities.speed, (tape->capabilities.buffer_size * 512) / tape->data_buffer_size, - tape->data_buffer_size / 1024, tape->max_number_of_stages * tape->data_buffer_size / 1024, - tape->best_dsc_rw_frequency * 1000 / HZ, drive->using_dma ? ", DMA":""); - return; -} - -/* - * idetape_get_mode_sense_results asks the tape about its various - * parameters. In particular, we will adjust our data transfer buffer - * size to the recommended value as returned by the tape. - */ - -void idetape_get_mode_sense_results (ide_drive_t *drive) - -{ - int retval; - idetape_tape_t *tape=&(drive->tape); - idetape_mode_parameter_header_t *header; - idetape_capabilities_page_t *capabilities; - idetape_packet_command_t pc; - - idetape_create_mode_sense_cmd (&pc,IDETAPE_CAPABILITIES_PAGE); - pc.buffer=pc.temp_buffer; - pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc.current_position=pc.temp_buffer; - retval=idetape_queue_pc_tail (drive,&pc); - - header=(idetape_mode_parameter_header_t *) pc.buffer; - capabilities=(idetape_capabilities_page_t *) (pc.buffer+sizeof (idetape_mode_parameter_header_t)); - - capabilities->max_speed=idetape_swap_short (capabilities->max_speed); - capabilities->ctl=idetape_swap_short (capabilities->ctl); - capabilities->speed=idetape_swap_short (capabilities->speed); - capabilities->buffer_size=idetape_swap_short (capabilities->buffer_size); - - tape->capabilities=*capabilities; /* Save us a copy */ - tape->tape_block_size=capabilities->blk512 ? 512:1024; - - if (retval) { - printk ("ide-tape: Can't get tape parameters\n"); - printk ("ide-tape: Assuming some default parameters\n"); - tape->tape_block_size=512; - tape->capabilities.ctl=52; - tape->capabilities.speed=450; - tape->capabilities.buffer_size=6*52; - return; - } - -#if IDETAPE_DEBUG_LOG - printk ("Dumping the results of the MODE SENSE packet command\n"); - printk ("Mode Parameter Header:\n"); - printk ("Mode Data Length - %d\n",header->mode_data_length); - printk ("Medium Type - %d\n",header->medium_type); - printk ("Device Specific Parameter - %d\n",header->dsp); - printk ("Block Descriptor Length - %d\n",header->bdl); - - printk ("Capabilities and Mechanical Status Page:\n"); - printk ("Page code - %d\n",capabilities->page_code); - printk ("Page length - %d\n",capabilities->page_length); - printk ("Read only - %s\n",capabilities->ro ? "Yes":"No"); - printk ("Supports reverse space - %s\n",capabilities->sprev ? "Yes":"No"); - printk ("Supports erase initiated formatting - %s\n",capabilities->efmt ? "Yes":"No"); - printk ("Supports QFA two Partition format - %s\n",capabilities->qfa ? "Yes":"No"); - printk ("Supports locking the medium - %s\n",capabilities->lock ? "Yes":"No"); - printk ("The volume is currently locked - %s\n",capabilities->locked ? "Yes":"No"); - printk ("The device defaults in the prevent state - %s\n",capabilities->prevent ? "Yes":"No"); - printk ("Supports ejecting the medium - %s\n",capabilities->eject ? "Yes":"No"); - printk ("Supports error correction - %s\n",capabilities->ecc ? "Yes":"No"); - printk ("Supports data compression - %s\n",capabilities->cmprs ? "Yes":"No"); - printk ("Supports 512 bytes block size - %s\n",capabilities->blk512 ? "Yes":"No"); - printk ("Supports 1024 bytes block size - %s\n",capabilities->blk1024 ? "Yes":"No"); - printk ("Restricted byte count for PIO transfers - %s\n",capabilities->slowb ? "Yes":"No"); - printk ("Maximum supported speed in KBps - %d\n",capabilities->max_speed); - printk ("Continuous transfer limits in blocks - %d\n",capabilities->ctl); - printk ("Current speed in KBps - %d\n",capabilities->speed); - printk ("Buffer size - %d\n",capabilities->buffer_size*512); -#endif /* IDETAPE_DEBUG_LOG */ -} - -/* - * Packet Command Interface - * - * The current Packet Command is available in tape->pc, and will not - * change until we finish handling it. Each packet command is associated - * with a callback function that will be called when the command is - * finished. - * - * The handling will be done in three stages: - * - * 1. idetape_issue_packet_command will send the packet command to the - * drive, and will set the interrupt handler to idetape_pc_intr. - * - * 2. On each interrupt, idetape_pc_intr will be called. This step - * will be repeated until the device signals us that no more - * interrupts will be issued. - * - * 3. ATAPI Tape media access commands have immediate status with a - * delayed process. In case of a successful initiation of a - * media access packet command, the DSC bit will be set when the - * actual execution of the command is finished. - * Since the tape drive will not issue an interrupt, we have to - * poll for this event. In this case, we define the request as - * "low priority request" by setting rq_status to - * IDETAPE_RQ_POSTPONED, set a timer to poll for DSC and exit - * the driver. - * - * ide.c will then give higher priority to requests which - * originate from the other device, until will change rq_status - * to RQ_ACTIVE. - * - * 4. When the packet command is finished, it will be checked for errors. - * - * 5. In case an error was found, we queue a request sense packet command - * in front of the request queue and retry the operation up to - * IDETAPE_MAX_PC_RETRIES times. - * - * 6. In case no error was found, or we decided to give up and not - * to retry again, the callback function will be called and then - * we will handle the next request. - * - */ - -void idetape_issue_packet_command (ide_drive_t *drive,idetape_packet_command_t *pc,ide_handler_t *handler) - -{ - idetape_tape_t *tape; - idetape_bcount_reg_t bcount; - idetape_ireason_reg_t ireason; - int dma_ok=0; - - tape=&(drive->tape); - -#if IDETAPE_DEBUG_BUGS - if (tape->pc->c[0] == IDETAPE_REQUEST_SENSE_CMD && pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) { - printk ("ide-tape: possible ide-tape.c bug - Two request sense in serial were issued\n"); - } -#endif /* IDETAPE_DEBUG_BUGS */ - - if (tape->failed_pc == NULL && pc->c[0] != IDETAPE_REQUEST_SENSE_CMD) - tape->failed_pc=pc; - tape->pc=pc; /* Set the current packet command */ - - if (pc->retries > IDETAPE_MAX_PC_RETRIES || pc->abort) { - - /* - * We will "abort" retrying a packet command in case - * a legitimate error code was received (crossing a - * filemark, or DMA error in the end of media, for - * example). - */ - - if (!pc->abort) { - printk ("ide-tape: %s: I/O error, ",drive->name); - printk ("pc = %x, key = %x, asc = %x, ascq = %x\n",pc->c[0],tape->sense_key,tape->asc,tape->ascq); -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Maximum retries reached - Giving up\n"); -#endif /* IDETAPE_DEBUG_LOG */ - pc->error=1; /* Giving up */ - } - tape->failed_pc=NULL; -#if IDETAPE_DEBUG_BUGS - if (pc->callback==NULL) - printk ("ide-tape: ide-tape bug - Callback function not set !\n"); - else -#endif /* IDETAPE_DEBUG_BUGS */ - (*pc->callback)(drive); - return; - } - -#if IDETAPE_DEBUG_LOG - printk ("Retry number - %d\n",pc->retries); -#endif /* IDETAPE_DEBUG_LOG */ - - pc->retries++; - -/* - * We no longer call ide_wait_stat to wait for the drive to be ready, - * as ide.c already does this for us in do_request. - */ - - pc->actually_transferred=0; /* We haven't transferred any data yet */ - pc->current_position=pc->buffer; - bcount.all=pc->request_transfer; /* Request to transfer the entire buffer at once */ - -#ifdef CONFIG_BLK_DEV_TRITON - if (pc->dma_error) { - printk ("ide-tape: DMA disabled, reverting to PIO\n"); - drive->using_dma=0; - pc->dma_error=0; - } - if (pc->request_transfer && pc->dma_recommended && drive->using_dma) { - dma_ok=!(HWIF(drive)->dmaproc(pc->writing ? ide_dma_write : ide_dma_read, drive)); - } -#endif /* CONFIG_BLK_DEV_TRITON */ - - OUT_BYTE (drive->ctl,IDETAPE_CONTROL_REG); - OUT_BYTE (dma_ok ? 1:0,IDETAPE_FEATURES_REG); /* Use PIO/DMA */ - OUT_BYTE (bcount.b.high,IDETAPE_BCOUNTH_REG); - OUT_BYTE (bcount.b.low,IDETAPE_BCOUNTL_REG); - OUT_BYTE (drive->select.all,IDETAPE_DRIVESEL_REG); - - ide_set_handler (drive,handler,WAIT_CMD); /* Set the interrupt routine */ - OUT_BYTE (WIN_PACKETCMD,IDETAPE_ATACOMMAND_REG); /* Issue the packet command */ - if (ide_wait_stat (drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) { /* Wait for DRQ to be ready - Assuming Accelerated DRQ */ - /* - * We currently only support tape drives which report - * accelerated DRQ assertion. For this case, specs - * allow up to 50us. We really shouldn't get here. - * - * ??? Still needs to think what to do if we reach - * here anyway. - */ - - printk ("ide-tape: Strange, packet command initiated yet DRQ isn't asserted\n"); - return; - } - - ireason.all=IN_BYTE (IDETAPE_IREASON_REG); - if (!ireason.b.cod || ireason.b.io) { - printk ("ide-tape: (IO,CoD) != (0,1) while issuing a packet command\n"); - ide_do_reset (drive); - return; - } - - ide_output_data (drive,pc->c,12/4); /* Send the actual packet */ -#ifdef CONFIG_BLK_DEV_TRITON - if ((pc->dma_in_progress=dma_ok)) { /* Begin DMA, if necessary */ - pc->dma_error=0; - (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); - } -#endif /* CONFIG_BLK_DEV_TRITON */ -} - -/* - * idetape_pc_intr is the usual interrupt handler which will be called - * during a packet command. We will transfer some of the data (as - * requested by the drive) and will re-point interrupt handler to us. - * When data transfer is finished, we will act according to the - * algorithm described before idetape_issue_packet_command. + * idetape_queue_pc_head generates a new packet command request in front + * of the request queue, before the current request, so that it will be + * processed immediately, on the next pass through the driver. * - */ - - -void idetape_pc_intr (ide_drive_t *drive) - -{ - idetape_tape_t *tape=&(drive->tape); - idetape_status_reg_t status; - idetape_bcount_reg_t bcount; - idetape_ireason_reg_t ireason; - idetape_packet_command_t *pc=tape->pc; - unsigned long temp; - -#ifdef CONFIG_BLK_DEV_TRITON - if (pc->dma_in_progress) { - if ((pc->dma_error=HWIF(drive)->dmaproc(ide_dma_status_bad, drive))) - /* - * We will currently correct the following in - * idetape_analyze_error. - */ - pc->actually_transferred=HWIF(drive)->dmaproc(ide_dma_transferred, drive); - else - pc->actually_transferred=pc->request_transfer; - (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive)); /* End DMA */ -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: DMA finished\n"); -#endif /* IDETAPE_DEBUG_LOG */ - } -#endif /* CONFIG_BLK_DEV_TRITON */ - - status.all=IN_BYTE (IDETAPE_STATUS_REG); /* Clear the interrupt */ - -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_pc_intr interrupt handler\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - if (!status.b.drq) { /* No more interrupts */ -#if IDETAPE_DEBUG_LOG - printk ("Packet command completed\n"); - printk ("Total bytes transferred: %lu\n",pc->actually_transferred); -#endif /* IDETAPE_DEBUG_LOG */ - pc->dma_in_progress=0; - - sti (); - - if (status.b.check || pc->dma_error) { /* Error detected */ -#if IDETAPE_DEBUG_LOG - /* - * Without debugging, we only log an error if we decided to - * give up retrying. - */ - printk ("ide-tape: %s: I/O error, ",drive->name); -#endif /* IDETAPE_DEBUG_LOG */ - if (pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) { - printk ("ide-tape: I/O error in request sense command\n"); - ide_do_reset (drive); - return; - } - - idetape_retry_pc (drive); /* Retry operation */ - return; - } - pc->error=0; - if (pc->wait_for_dsc && !status.b.dsc) { /* Media access command */ - tape->dsc_polling_frequency=IDETAPE_DSC_FAST_MEDIA_ACCESS_FREQUENCY; - idetape_postpone_request (drive); /* Allow ide.c to handle other requests */ - return; - } - if (tape->failed_pc == pc) - tape->failed_pc=NULL; -#if IDETAPE_DEBUG_BUGS - if (pc->callback==NULL) - printk ("ide-tape: ide-tape bug - Callback function not set !\n"); - else -#endif /* IDETAPE_DEBUG_BUGS */ - (*pc->callback)(drive); /* Command finished - Call the callback function */ - return; - } -#ifdef CONFIG_BLK_DEV_TRITON - if (pc->dma_in_progress) { - pc->dma_in_progress=0; - printk ("ide-tape: The tape wants to issue more interrupts in DMA mode\n"); - printk ("ide-tape: DMA disabled, reverting to PIO\n"); - drive->using_dma=0; - ide_do_reset (drive); - return; - } -#endif /* CONFIG_BLK_DEV_TRITON */ - bcount.b.high=IN_BYTE (IDETAPE_BCOUNTH_REG); /* Get the number of bytes to transfer */ - bcount.b.low=IN_BYTE (IDETAPE_BCOUNTL_REG); /* on this interrupt */ - ireason.all=IN_BYTE (IDETAPE_IREASON_REG); /* Read the interrupt reason register */ - - if (ireason.b.cod) { - printk ("ide-tape: CoD != 0 in idetape_pc_intr\n"); - ide_do_reset (drive); - return; - } - if (ireason.b.io != !(pc->writing)) { /* Hopefully, we will never get here */ - printk ("ide-tape: We wanted to %s, ",pc->writing ? "Write":"Read"); - printk ("but the tape wants us to %s !\n",ireason.b.io ? "Read":"Write"); - ide_do_reset (drive); - return; - } - - if (!pc->writing) { /* Reading - Check that we have enough space */ - temp=(unsigned long) pc->actually_transferred + bcount.all; - if ( temp > pc->request_transfer) { - if (temp > pc->buffer_size) { - printk ("ide-tape: The tape wants to send us more data than requested - discarding data\n"); - idetape_discard_data (drive,bcount.all); - ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD); - return; - } -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: The tape wants to send us more data than requested - allowing transfer\n"); -#endif /* IDETAPE_DEBUG_LOG */ - } - } -#if IDETAPE_DEBUG_BUGS - if (bcount.all && !pc->buffer) { - printk ("ide-tape: ide-tape.c bug - Buffer not set in idetape_pc_intr. Discarding data.\n"); - - if (!pc->writing) { - printk ("ide-tape: Discarding data\n"); - idetape_discard_data (drive,bcount.all); - ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD); - return; - } - else { /* ??? */ - } - } -#endif /* IDETAPE_DEBUG_BUGS */ - if (pc->writing) - idetape_output_data (drive,pc->current_position,bcount.all); /* Write the current buffer */ - else - idetape_input_data (drive,pc->current_position,bcount.all); /* Read the current buffer */ -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: %s %d bytes\n",pc->writing ? "Wrote":"Received",bcount.all); -#endif /* IDETAPE_DEBUG_LOG */ - pc->actually_transferred+=bcount.all; /* Update the current position */ - pc->current_position+=bcount.all; - - ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD); /* And set the interrupt handler again */ -} - -/* - * idetape_postpone_request postpones the current request so that - * ide.c will be able to service requests from another device on - * the same hwgroup while we are polling for DSC. - */ - -void idetape_postpone_request (ide_drive_t *drive) - -{ - idetape_tape_t *tape=&(drive->tape); - struct request *rq; - idetape_status_reg_t status; - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_postpone_request\n"); -#endif /* IDETAPE_DEBUG_LOG */ -#if IDETAPE_DEBUG_BUGS - if (tape->postponed_rq != NULL) - printk ("ide-tape.c bug - postponed_rq not NULL in idetape_postpone_request\n"); -#endif /* IDETAPE_DEBUG_BUGS */ - - tape->dsc_timer.expires=jiffies + tape->dsc_polling_frequency; /* Set timer to poll for */ - tape->dsc_timeout=jiffies+IDETAPE_DSC_TIMEOUT; /* actual completion */ - tape->dsc_timer.data=(unsigned long) drive; - tape->dsc_timer.function=&idetape_poll_for_dsc; - init_timer (&(tape->dsc_timer)); - - /* - * Remove current request from the request queue: - */ - - tape->postponed_rq = rq = HWGROUP(drive)->rq; - rq->rq_status = IDETAPE_RQ_POSTPONED; - blk_dev[MAJOR(rq->rq_dev)].current_request = rq->next; - HWGROUP(drive)->rq = NULL; - - /* - * Check the status again - Maybe we can save one polling period. - */ - - status.all=IN_BYTE (IDETAPE_STATUS_REG); - tape->last_status=status.all; - tape->request_status=1; - - tape->dsc_polling_start=jiffies; - add_timer(&(tape->dsc_timer)); /* Activate the polling timer */ -} - -/* - * idetape_poll_for_dsc_direct is called from idetape_poll_for_dsc - * to handle the case in which we can safely communicate with the tape - * (since no other request for this hwgroup is active). - */ - -void idetape_poll_for_dsc_direct (unsigned long data) - -{ - ide_drive_t *drive=(ide_drive_t *) data; - idetape_tape_t *tape=&(drive->tape); - idetape_status_reg_t status; - -#if IDETAPE_DEBUG_LOG - printk ("%s: idetape_poll_for_dsc_direct called\n",drive->name); -#endif /* IDETAPE_DEBUG_LOG */ - - OUT_BYTE(drive->select.all,IDE_SELECT_REG); - status.all=IN_BYTE (IDETAPE_STATUS_REG); - - if (status.b.dsc) { /* DSC received */ - tape->dsc_received=1; - del_timer (&(tape->dsc_timer)); /* Stop polling and put back the postponed */ - idetape_put_back_postponed_request (drive); /* request in the request queue */ - return; - } - - if (jiffies > tape->dsc_timeout) { /* Timeout */ - tape->dsc_received=0; - del_timer (&(tape->dsc_timer)); - /* ??? */ - idetape_put_back_postponed_request (drive); - return; - } - - /* Poll again */ - - if (jiffies - tape->dsc_polling_start > IDETAPE_FAST_SLOW_THRESHOLD) - tape->dsc_timer.expires = jiffies + IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY; - else - tape->dsc_timer.expires = jiffies + tape->dsc_polling_frequency; - add_timer(&(tape->dsc_timer)); - return; -} - -/* - * idetape_poll_for_dsc gets invoked by a timer (which was set - * by idetape_postpone_request) to poll for the DSC bit - * in the status register. + * idetape_queue_pc_head is called from the request handling part of + * the driver (the "bottom" part). Safe storage for the request should + * be allocated with idetape_next_pc_storage and idetape_next_rq_storage + * before calling idetape_queue_pc_head. * - * We take care not to perform any tape access if the driver is - * accessing the other device. We will instead ask ide.c to sample - * the tape status register on our behalf in the next call to do_request, - * at the point in which the other device is idle, or assume that - * DSC was received even though we won't verify it (but when we assume - * that, it will usually have a solid basis). + * Memory for those requests is pre-allocated at initialization time, and + * is limited to IDETAPE_PC_STACK requests. We assume that we have enough + * space for the maximum possible number of inter-dependent packet commands. * - * The use of cli () below is a must, as we inspect and change - * the device request list while another request is active. + * The higher level of the driver - The ioctl handler and the character + * device handling functions should queue request to the lower level part + * and wait for their completion using idetape_queue_pc_tail or + * idetape_queue_rw_tail. */ - -void idetape_poll_for_dsc (unsigned long data) - +static void idetape_queue_pc_head (ide_drive_t *drive,idetape_pc_t *pc,struct request *rq) { - ide_drive_t *drive=(ide_drive_t *) data; unsigned int major = HWIF(drive)->major; - idetape_tape_t *tape=&(drive->tape); struct blk_dev_struct *bdev = &blk_dev[major]; - struct request *next_rq; - unsigned long flags; - idetape_status_reg_t status; - -#if IDETAPE_DEBUG_LOG - printk ("%s: idetape_poll_for_dsc called\n",drive->name); -#endif /* IDETAPE_DEBUG_LOG */ - - save_flags (flags);cli (); - - /* - * Check if the other device is idle. If there are no requests, - * we can safely access the tape. - */ - - if (HWGROUP (drive)->rq == NULL) { - sti (); - idetape_poll_for_dsc_direct (data); - return; - } - - /* - * If DSC was received, re-insert our postponed request into - * the request queue (using ide_next). - */ - - status.all=tape->last_status; - - if (status.b.dsc) { /* DSC received */ - tape->dsc_received=1; - idetape_put_back_postponed_request (drive); - del_timer (&(tape->dsc_timer)); - restore_flags (flags); - return; - } - - /* - * At this point, DSC may have been received, but we can't - * check it. We now have two options: - * - * 1. The "simple" method - We can continue polling - * until we know the value of DSC. - * - * but we also have a more clever option :-) - * - * 2. We can sometimes more or less anticipate in - * advance how much time it will take for - * the tape to perform the request. This is the - * place to take advantage of this ! - * - * We can assume that DSC was received, put - * back our request, and hope that we will have - * a "cache hit". This will only work when - * we haven't initiated the packet command yet, - * but this is the common read/write case. As - * for the slower media access commands, fallback - * to method 1 above. - * - * When using method 2, we can also take advantage of the - * knowledge of the tape's internal buffer size - We can - * precalculate the time it will take for the tape to complete - * servicing not only one request, but rather, say, 50% of its - * internal buffer. The polling period will then be much larger, - * decreasing our load on Linux, and we will also call - * idetape_postpone_request less often, as there will usually - * be more room in the internal tape buffer while we are in - * idetape_do_request. - * - * For this method to work well, the ongoing request of the - * other device should be serviced by the time the tape is - * still working on its remaining 50% internal buffer. This - * will usually happen when the other device is much faster - * than the tape. - */ - -#if IDETAPE_ANTICIPATE_READ_WRITE_DSC - - /* - * Method 2. - * - * There is a high chance that DSC was received, even though - * we couldn't verify it. Let's hope that it's a "cache hit" - * rather than a "cache miss". Someday I will probably add a - * feedback loop around the number of "cache hits" which will - * fine-tune the polling period. - */ - - if (tape->postponed_rq->cmd != IDETAPE_PACKET_COMMAND_REQUEST_TYPE1) { - - /* - * We can use this method only when the packet command - * was still not initiated. - */ - - idetape_put_back_postponed_request (drive); - del_timer (&(tape->dsc_timer)); - restore_flags (flags); - return; - } -#endif /* IDETAPE_ANTICIPATE_READ_WRITE_DSC */ - - /* - * Fallback to method 1. - */ - - next_rq=bdev->current_request; - if (next_rq == HWGROUP (drive)->rq) - next_rq=next_rq->next; - - if (next_rq == NULL) { - - /* - * There will not be another request after the currently - * ongoing request, so ide.c won't be able to sample - * the status register on our behalf in do_request. - * - * In case we are waiting for DSC before the packet - * command was initiated, we will put back our postponed - * request and have another look at the status register - * in idetape_do_request, as done in method 2 above. - * - * In case we already initiated the command, we can't - * put it back, but it is anyway a slow media access - * command. We will just give up and poll again until - * we are lucky. - */ - - if (tape->postponed_rq->cmd == IDETAPE_PACKET_COMMAND_REQUEST_TYPE1) { - - /* - * Media access command - Poll again. - * - * We set tape->request_status to 1, just in case - * other requests are added while we are waiting. - */ - - tape->request_status=1; - restore_flags (flags); - tape->dsc_timer.expires = jiffies + tape->dsc_polling_frequency; - add_timer(&(tape->dsc_timer)); - return; - } - - /* - * The packet command hasn't been sent to the tape yet - - * We can safely put back the request and have another - * look at the status register in idetape_do_request. - */ - - idetape_put_back_postponed_request (drive); - del_timer (&(tape->dsc_timer)); - restore_flags (flags); - return; - } - - /* - * There will be another request after the current request. - * - * Request ide.c to sample for us the tape's status register - * before the next request. - */ - - tape->request_status=1; - restore_flags (flags); - if (jiffies > tape->dsc_timeout) { /* Timeout */ - tape->dsc_received=0; - /* ??? */ - idetape_put_back_postponed_request (drive); - del_timer (&(tape->dsc_timer)); - restore_flags (flags); - return; - } + bdev->current_request=HWGROUP (drive)->rq; /* Since we may have taken it out */ - /* Poll again */ - - if (jiffies - tape->dsc_polling_start > IDETAPE_FAST_SLOW_THRESHOLD) - tape->dsc_timer.expires = jiffies + IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY; - else - tape->dsc_timer.expires = jiffies + tape->dsc_polling_frequency; - add_timer(&(tape->dsc_timer)); - return; + ide_init_drive_cmd (rq); + rq->buffer = (char *) pc; + rq->cmd = IDETAPE_PC_RQ; + (void) ide_do_drive_cmd (drive, rq, ide_preempt); } /* - * idetape_put_back_postponed_request gets called when we decided to - * stop polling for DSC and continue servicing our postponed request. + * idetape_next_pc_storage returns a pointer to a place in which we can + * safely store a packet command, even though we intend to leave the + * driver. A storage space for a maximum of IDETAPE_PC_STACK packet + * commands is allocated at initialization time. */ - -void idetape_put_back_postponed_request (ide_drive_t *drive) - +static idetape_pc_t *idetape_next_pc_storage (ide_drive_t *drive) { - idetape_tape_t *tape = &(drive->tape); + idetape_tape_t *tape = drive->driver_data; #if IDETAPE_DEBUG_LOG - printk ("ide-tape: Putting back postponed request\n"); + printk (KERN_INFO "ide-tape: pc_stack_index=%d\n",tape->pc_stack_index); #endif /* IDETAPE_DEBUG_LOG */ -#if IDETAPE_DEBUG_BUGS - if (tape->postponed_rq == NULL) { - printk ("tape->postponed_rq is NULL in put_back_postponed_request\n"); - return; - } -#endif /* IDETAPE_DEBUG_BUGS */ - (void) ide_do_drive_cmd (drive, tape->postponed_rq, ide_next); - - /* - * Note that the procedure done here is different than the method - * we are using in idetape_queue_pc_head - There we are putting - * request(s) before our currently called request. - * - * Here, on the other hand, HWGROUP(drive)->rq is not our - * request but rather a request to another device. Therefore, - * we will let it finish and only then service our postponed - * request --> We don't touch HWGROUP(drive)->rq. - */ + if (tape->pc_stack_index==IDETAPE_PC_STACK) + tape->pc_stack_index=0; + return (&tape->pc_stack[tape->pc_stack_index++]); } -void idetape_media_access_finished (ide_drive_t *drive) - +/* + * idetape_next_rq_storage is used along with idetape_next_pc_storage. + * Since we queue packet commands in the request queue, we need to + * allocate a request, along with the allocation of a packet command. + */ + +/************************************************************** + * * + * This should get fixed to use kmalloc(GFP_ATOMIC, ..) * + * followed later on by kfree(). -ml * + * * + **************************************************************/ + +static struct request *idetape_next_rq_storage (ide_drive_t *drive) { - idetape_tape_t *tape=&(drive->tape); - idetape_status_reg_t status; - idetape_packet_command_t *pc; - - pc=tape->pc; - - status.all=IN_BYTE (IDETAPE_STATUS_REG); + idetape_tape_t *tape = drive->driver_data; - if (tape->dsc_received) { #if IDETAPE_DEBUG_LOG - printk ("DSC received\n"); + printk (KERN_INFO "ide-tape: rq_stack_index=%d\n",tape->rq_stack_index); #endif /* IDETAPE_DEBUG_LOG */ - if (status.b.check) { /* Error detected */ - printk ("ide-tape: %s: I/O error, ",drive->name); - idetape_retry_pc (drive); /* Retry operation */ - return; - } - pc->error=0; - if (tape->failed_pc == pc) - tape->failed_pc=NULL; -#if IDETAPE_DEBUG_BUGS - if (pc->callback==NULL) - printk ("ide-tape: ide-tape bug - Callback function not set !\n"); - else -#endif /* IDETAPE_DEBUG_BUGS */ - (*pc->callback)(drive); - - return; - } - else { - printk ("ide-tape: %s: DSC timeout.\n",drive->name); - /* ??? */ - pc->error=1; - tape->failed_pc=NULL; -#if IDETAPE_DEBUG_BUGS - if (pc->callback==NULL) - printk ("ide-tape: ide-tape bug - Callback function not set !\n"); - else -#endif /* IDETAPE_DEBUG_BUGS */ - (*pc->callback)(drive); - return; - } + if (tape->rq_stack_index==IDETAPE_PC_STACK) + tape->rq_stack_index=0; + return (&tape->rq_stack[tape->rq_stack_index++]); } - /* - * idetape_retry_pc is called when an error was detected during the - * last packet command. We queue a request sense packet command in - * the head of the request list. + * Pipeline related functions */ - -void idetape_retry_pc (ide_drive_t *drive) +static inline int idetape_pipeline_active (idetape_tape_t *tape) { - idetape_tape_t *tape = &drive->tape; - idetape_packet_command_t *pc; - struct request *new_rq; - - idetape_error_reg_t error; - error.all=IN_BYTE (IDETAPE_ERROR_REG); - pc=idetape_next_pc_storage (drive); - new_rq=idetape_next_rq_storage (drive); - idetape_create_request_sense_cmd (pc); - pc->buffer=pc->temp_buffer; - pc->buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc->current_position=pc->temp_buffer; - tape->reset_issued = 1; - idetape_queue_pc_head (drive,pc,new_rq); + return tape->active_data_request != NULL; } /* - * General packet command callback function. + * idetape_kfree_stage calls kfree to completely free a stage, along with + * its related buffers. */ - -void idetape_pc_callback (ide_drive_t *drive) - +static void __idetape_kfree_stage (idetape_stage_t *stage) { - idetape_tape_t *tape; - struct request *rq; - - tape=&(drive->tape); - rq=HWGROUP(drive)->rq; - -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_pc_callback\n"); -#endif /* IDETAPE_DEBUG_LOG */ - if (!tape->pc->error) { -#if IDETAPE_DEBUG_LOG - printk ("Request completed\n"); -#endif /* IDETAPE_DEBUG_LOG */ - idetape_end_request (1,HWGROUP (drive)); - } - else { - idetape_end_request (0,HWGROUP (drive)); + struct buffer_head *prev_bh, *bh = stage->bh; + int size; + + while (bh != NULL) { + if (bh->b_data != NULL) { + size = (int) bh->b_size; + while (size > 0) { + free_page ((unsigned long) bh->b_data); + size -= PAGE_SIZE; + bh->b_data += PAGE_SIZE; + } + } + prev_bh = bh; + bh = bh->b_reqnext; + kfree (prev_bh); } - return; + kfree (stage); } +static void idetape_kfree_stage (idetape_tape_t *tape, idetape_stage_t *stage) +{ + if (tape->cache_stage == NULL) + tape->cache_stage = stage; + else + __idetape_kfree_stage (stage); +} -void idetape_read_callback (ide_drive_t *drive) - +/* + * idetape_kmalloc_stage uses __get_free_page to allocate a pipeline + * stage, along with all the necessary small buffers which together make + * a buffer of size tape->stage_size (or a bit more). We attempt to + * combine sequential pages as much as possible. + * + * Returns a pointer to the new allocated stage, or NULL if we + * can't (or don't want to) allocate a stage. + * + * Pipeline stages are optional and are used to increase performance. + * If we can't allocate them, we'll manage without them. + */ +static idetape_stage_t *__idetape_kmalloc_stage (idetape_tape_t *tape) { - idetape_tape_t *tape=&(drive->tape); - struct request *rq=HWGROUP(drive)->rq; - int blocks_read=tape->pc->actually_transferred/tape->tape_block_size; + idetape_stage_t *stage; + struct buffer_head *prev_bh, *bh; + int pages = tape->pages_per_stage; + char *b_data; -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_read_callback\n"); -#endif /* IDETAPE_DEBUG_LOG */ + if ((stage = (idetape_stage_t *) kmalloc (sizeof (idetape_stage_t),GFP_KERNEL)) == NULL) + return NULL; + stage->next = NULL; - tape->block_address+=blocks_read; - rq->current_nr_sectors-=blocks_read; + bh = stage->bh = (struct buffer_head *) kmalloc (sizeof (struct buffer_head), GFP_KERNEL); + if (bh == NULL) + goto abort; + bh->b_reqnext = NULL; + if ((bh->b_data = (char *) __get_free_page (GFP_KERNEL)) == NULL) + goto abort; + bh->b_size = PAGE_SIZE; + set_bit (BH_Lock, &bh->b_state); - if (!tape->pc->error) - idetape_end_request (1,HWGROUP (drive)); - else { - rq->errors=tape->pc->error; - switch (rq->errors) { - case IDETAPE_RQ_ERROR_FILEMARK: - case IDETAPE_RQ_ERROR_EOD: - break; + while (--pages) { + if ((b_data = (char *) __get_free_page (GFP_KERNEL)) == NULL) + goto abort; + if (bh->b_data == b_data + PAGE_SIZE && virt_to_bus (bh->b_data) == virt_to_bus (b_data) + PAGE_SIZE) { + bh->b_size += PAGE_SIZE; + bh->b_data -= PAGE_SIZE; + continue; } - idetape_end_request (0,HWGROUP (drive)); + if (b_data == bh->b_data + bh->b_size && virt_to_bus (b_data) == virt_to_bus (bh->b_data) + bh->b_size) { + bh->b_size += PAGE_SIZE; + continue; + } + prev_bh = bh; + if ((bh = (struct buffer_head *) kmalloc (sizeof (struct buffer_head), GFP_KERNEL)) == NULL) { + free_page ((unsigned long) b_data); + goto abort; + } + bh->b_reqnext = NULL; + bh->b_data = b_data; + bh->b_size = PAGE_SIZE; + set_bit (BH_Lock, &bh->b_state); + prev_bh->b_reqnext = bh; } - return; + bh->b_size -= tape->excess_bh_size; + return stage; +abort: + __idetape_kfree_stage (stage); + return NULL; } -void idetape_write_callback (ide_drive_t *drive) - +static idetape_stage_t *idetape_kmalloc_stage (idetape_tape_t *tape) { - idetape_tape_t *tape=&(drive->tape); - struct request *rq=HWGROUP(drive)->rq; - int blocks_written=tape->pc->actually_transferred/tape->tape_block_size; - -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_write_callback\n"); -#endif /* IDETAPE_DEBUG_LOG */ + idetape_stage_t *cache_stage = tape->cache_stage; - tape->block_address+=blocks_written; - rq->current_nr_sectors-=blocks_written; +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "Reached idetape_kmalloc_stage\n"); +#endif /* IDETAPE_DEBUG_LOG */ - if (!tape->pc->error) - idetape_end_request (1,HWGROUP (drive)); - else { - rq->errors=tape->pc->error; - idetape_end_request (0,HWGROUP (drive)); + if (tape->nr_stages >= tape->max_stages) + return NULL; + if (cache_stage != NULL) { + tape->cache_stage = NULL; + return cache_stage; } - return; + return __idetape_kmalloc_stage (tape); } -void idetape_inquiry_callback (ide_drive_t *drive) - +static void idetape_copy_stage_from_user (idetape_tape_t *tape, idetape_stage_t *stage, const char *buf, int n) { - idetape_tape_t *tape; - - tape=&(drive->tape); - - idetape_display_inquiry_result (tape->pc->buffer); - idetape_pc_callback (drive); - return; -} + struct buffer_head *bh = tape->bh; + int count; -/* - * idetape_input_data is called to read data from the tape's data - * register. We basically let ide_input_data do the job, but we also - * take care about the remaining bytes which can not be transferred - * in 32-bit data transfers. - */ - -void idetape_input_data (ide_drive_t *drive,void *buffer, unsigned long bcount) + while (n) { +#if IDETAPE_DEBUG_BUGS + if (bh == NULL) { + printk (KERN_ERR "ide-tape: bh == NULL in idetape_copy_stage_from_user\n"); + return; + } +#endif /* IDETAPE_DEBUG_BUGS */ + count = IDETAPE_MIN (bh->b_size - bh->b_count, n); + copy_from_user (bh->b_data + bh->b_count, buf, count); + n -= count; bh->b_count += count; buf += count; + if (bh->b_count == bh->b_size) { + bh = bh->b_reqnext; + if (bh) + bh->b_count = 0; + } + } + tape->bh = bh; +} +static void idetape_copy_stage_to_user (idetape_tape_t *tape, char *buf, idetape_stage_t *stage, int n) { - unsigned long wcount; - - wcount=bcount >> 2; - bcount -= 4*wcount; - - if (wcount) - ide_input_data (drive,buffer,wcount); - - if (bcount) { - ((byte *)buffer) += 4*wcount; - insb (IDETAPE_DATA_REG,buffer,bcount); + struct buffer_head *bh = tape->bh; + int count; + + while (n) { +#if IDETAPE_DEBUG_BUGS + if (bh == NULL) { + printk (KERN_ERR "ide-tape: bh == NULL in idetape_copy_stage_to_user\n"); + return; + } +#endif /* IDETAPE_DEBUG_BUGS */ + count = IDETAPE_MIN (tape->b_count, n); + copy_to_user (buf, tape->b_data, count); + n -= count; tape->b_data += count; tape->b_count -= count; buf += count; + if (!tape->b_count) { + tape->bh = bh = bh->b_reqnext; + if (bh) { + tape->b_data = bh->b_data; + tape->b_count = bh->b_count; + } + } } } -/* - * idetape_output_data is used to write data to the tape. - */ - -void idetape_output_data (ide_drive_t *drive,void *buffer, unsigned long bcount) - +static void idetape_init_merge_stage (idetape_tape_t *tape) { - unsigned long wcount; - - wcount=bcount >> 2; - bcount -= 4*wcount; - - if (wcount) - ide_output_data (drive,buffer,wcount); + struct buffer_head *bh = tape->merge_stage->bh; - if (bcount) { - ((byte *)buffer) += 4*wcount; - outsb (IDETAPE_DATA_REG,buffer,bcount); + tape->bh = bh; + if (tape->chrdev_direction == idetape_direction_write) + bh->b_count = 0; + else { + tape->b_data = bh->b_data; + tape->b_count = bh->b_count; } } +static void idetape_switch_buffers (idetape_tape_t *tape, idetape_stage_t *stage) +{ + struct buffer_head *tmp; + + tmp = stage->bh; + stage->bh = tape->merge_stage->bh; + tape->merge_stage->bh = tmp; + idetape_init_merge_stage (tape); +} + /* - * Too bad. The drive wants to send us data which we are not ready to accept. - * Just throw it away. + * idetape_increase_max_pipeline_stages is a part of the feedback + * loop which tries to find the optimum number of stages. In the + * feedback loop, we are starting from a minimum maximum number of + * stages, and if we sense that the pipeline is empty, we try to + * increase it, until we reach the user compile time memory limit. */ - -void idetape_discard_data (ide_drive_t *drive, unsigned long bcount) - +static void idetape_increase_max_pipeline_stages (ide_drive_t *drive) { - unsigned long i; + idetape_tape_t *tape = drive->driver_data; - for (i=0;imax_stages = IDETAPE_MIN (tape->max_stages + IDETAPE_INCREASE_STAGES_RATE, IDETAPE_MAX_PIPELINE_STAGES); } /* - * Issue an INQUIRY packet command. + * idetape_add_stage_tail adds a new stage at the end of the pipeline. */ - -void idetape_create_inquiry_cmd (idetape_packet_command_t *pc) - +static void idetape_add_stage_tail (ide_drive_t *drive,idetape_stage_t *stage) { -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating INQUIRY packet command\n"); -#endif /* IDETAPE_DEBUG_LOG */ - pc->request_transfer=36; - pc->callback=&idetape_inquiry_callback; - pc->writing=0; + idetape_tape_t *tape = drive->driver_data; + unsigned long flags; - idetape_zero_packet_command (pc); - pc->c[0]=IDETAPE_INQUIRY_CMD; - pc->c[4]=255; +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "Reached idetape_add_stage_tail\n"); +#endif /* IDETAPE_DEBUG_LOG */ + save_flags (flags); + cli (); + stage->next=NULL; + if (tape->last_stage != NULL) + tape->last_stage->next=stage; + else + tape->first_stage=tape->next_stage=stage; + tape->last_stage=stage; + if (tape->next_stage == NULL) + tape->next_stage=tape->last_stage; + tape->nr_stages++; + tape->nr_pending_stages++; + restore_flags (flags); } /* - * Format the INQUIRY command results. + * idetape_remove_stage_head removes tape->first_stage from the pipeline. + * The caller should avoid race conditions. */ - -void idetape_display_inquiry_result (byte *buffer) - +static void idetape_remove_stage_head (ide_drive_t *drive) { - idetape_inquiry_result_t *result; - - result=(idetape_inquiry_result_t *) buffer; - ide_fixstring (result->vendor_id,8,0); - ide_fixstring (result->product_id,16,0); - ide_fixstring (result->revision_level,4,0); - - if (result->response_format != 2) { - printk ("The INQUIRY Data Format is unknown to us !\n"); - printk ("Assuming QIC-157C format.\n"); - } - + idetape_tape_t *tape = drive->driver_data; + idetape_stage_t *stage; + #if IDETAPE_DEBUG_LOG - printk ("Dumping INQUIRY command results:\n"); - printk ("Response Data Format: %d - ",result->response_format); - switch (result->response_format) { - case 2: - printk ("As specified in QIC-157 Revision C\n"); - break; - default: - printk ("Unknown\n"); - break; + printk (KERN_INFO "Reached idetape_remove_stage_head\n"); +#endif /* IDETAPE_DEBUG_LOG */ +#if IDETAPE_DEBUG_BUGS + if (tape->first_stage == NULL) { + printk (KERN_ERR "ide-tape: bug: tape->first_stage is NULL\n"); + return; } - - printk ("Device Type: %x - ",result->device_type); - switch (result->device_type) { - case 0: printk ("Direct-access Device\n");break; - case 1: printk ("Streaming Tape Device\n");break; - case 2: case 3: case 4: printk ("Reserved\n");break; - case 5: printk ("CD-ROM Device\n");break; - case 6: printk ("Reserved\n"); - case 7: printk ("Optical memory Device\n");break; - case 0x1f: printk ("Unknown or no Device type\n");break; - default: printk ("Reserved\n"); + if (tape->active_stage == tape->first_stage) { + printk (KERN_ERR "ide-tape: bug: Trying to free our active pipeline stage\n"); + return; } - - printk ("Removable Medium: %s",result->rmb ? "Yes\n":"No\n"); - - printk ("ANSI Version: %d - ",result->ansi_version); - switch (result->ansi_version) { - case 2: - printk ("QIC-157 Revision C\n"); - break; - default: - printk ("Unknown\n"); - break; +#endif /* IDETAPE_DEBUG_BUGS */ + stage=tape->first_stage; + tape->first_stage=stage->next; + idetape_kfree_stage (tape, stage); + tape->nr_stages--; + if (tape->first_stage == NULL) { + tape->last_stage=NULL; +#if IDETAPE_DEBUG_BUGS + if (tape->next_stage != NULL) + printk (KERN_ERR "ide-tape: bug: tape->next_stage != NULL\n"); + if (tape->nr_stages) + printk (KERN_ERR "ide-tape: bug: nr_stages should be 0 now\n"); +#endif /* IDETAPE_DEBUG_BUGS */ } +} - printk ("ECMA Version: "); - if (result->ecma_version) - printk ("%d\n",result->ecma_version); - else - printk ("Not supported\n"); - - printk ("ISO Version: "); - if (result->iso_version) - printk ("%d\n",result->iso_version); - else - printk ("Not supported\n"); +/* + * idetape_active_next_stage will declare the next stage as "active". + */ +static void idetape_active_next_stage (ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + idetape_stage_t *stage=tape->next_stage; + struct request *rq = &stage->rq; - printk ("Additional Length: %d\n",result->additional_length); - printk ("Vendor Identification: %s\n",result->vendor_id); - printk ("Product Identification: %s\n",result->product_id); - printk ("Product Revision Level: %s\n",result->revision_level); +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "Reached idetape_active_next_stage\n"); #endif /* IDETAPE_DEBUG_LOG */ +#if IDETAPE_DEBUG_BUGS + if (stage == NULL) { + printk (KERN_ERR "ide-tape: bug: Trying to activate a non existing stage\n"); + return; + } +#endif /* IDETAPE_DEBUG_BUGS */ - if (result->device_type != 1) - printk ("Device type is not set to tape\n"); + rq->buffer = NULL; + rq->bh = stage->bh; + tape->active_data_request=rq; + tape->active_stage=stage; + tape->next_stage=stage->next; +} - if (!result->rmb) - printk ("The removable flag is not set\n"); +/* + * idetape_insert_pipeline_into_queue is used to start servicing the + * pipeline stages, starting from tape->next_stage. + */ +static void idetape_insert_pipeline_into_queue (ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; - if (result->ansi_version != 2) { - printk ("The Ansi Version is unknown to us !\n"); - printk ("Assuming compliance with QIC-157C specification.\n"); + if (tape->next_stage == NULL) + return; + if (!idetape_pipeline_active (tape)) { + idetape_active_next_stage (drive); + (void) ide_do_drive_cmd (drive, tape->active_data_request, ide_end); } } -void idetape_create_request_sense_cmd (idetape_packet_command_t *pc) - +static void idetape_abort_pipeline (ide_drive_t *drive) { -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating REQUEST SENSE packet command\n"); -#endif /* IDETAPE_DEBUG_LOG */ - pc->request_transfer=18; - pc->callback=&idetape_request_sense_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c[0]=IDETAPE_REQUEST_SENSE_CMD; - pc->c[4]=255; -} + idetape_tape_t *tape = drive->driver_data; + idetape_stage_t *stage = tape->next_stage; -void idetape_request_sense_callback (ide_drive_t *drive) + while (stage) { + stage->rq.cmd = IDETAPE_ABORTED_WRITE_RQ; + stage = stage->next; + } +} +/* + * idetape_end_request is used to finish servicing a request, and to + * insert a pending pipeline request into the main device queue. + */ +static void idetape_end_request (byte uptodate, ide_hwgroup_t *hwgroup) { - idetape_tape_t *tape=&(drive->tape); + ide_drive_t *drive = hwgroup->drive; + struct request *rq = hwgroup->rq; + idetape_tape_t *tape = drive->driver_data; + unsigned int major = HWIF(drive)->major; + struct blk_dev_struct *bdev = &blk_dev[major]; + int error; #if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_request_sense_callback\n"); -#endif /* IDETAPE_DEBUG_LOG */ - if (!tape->pc->error) { -#if IDETAPE_DEBUG_LOG - printk ("Request completed\n"); + printk (KERN_INFO "Reached idetape_end_request\n"); #endif /* IDETAPE_DEBUG_LOG */ - idetape_analyze_error (drive,(idetape_request_sense_result_t *) tape->pc->buffer); - idetape_end_request (1,HWGROUP (drive)); - } - else { - printk ("Error in REQUEST SENSE itself - Aborting request!\n"); - idetape_end_request (0,HWGROUP (drive)); + + bdev->current_request=rq; /* Since we may have taken it out */ + + switch (uptodate) { + case 0: error = IDETAPE_ERROR_GENERAL; break; + case 1: error = 0; break; + default: error = uptodate; + } + rq->errors = error; + if (error) + tape->failed_pc = NULL; + + if (tape->active_data_request == rq) { /* The request was a pipelined data transfer request */ + tape->active_stage = NULL; + tape->active_data_request = NULL; + tape->nr_pending_stages--; + if (rq->cmd == IDETAPE_WRITE_RQ) { + if (error) { + set_bit (IDETAPE_PIPELINE_ERROR, &tape->flags); + if (error == IDETAPE_ERROR_EOD) + idetape_abort_pipeline (drive); + } + idetape_remove_stage_head (drive); + } + if (tape->next_stage != NULL) { + idetape_active_next_stage (drive); + + /* + * Insert the next request into the request queue. + * The choice of using ide_next or ide_end is now left to the user. + */ +#if IDETAPE_LOW_TAPE_PRIORITY + (void) ide_do_drive_cmd (drive, tape->active_data_request, ide_end); +#else + (void) ide_do_drive_cmd (drive, tape->active_data_request, ide_next); +#endif /* IDETAPE_LOW_TAPE_PRIORITY */ + } else if (!error) + idetape_increase_max_pipeline_stages (drive); } - return; + ide_end_drive_cmd (drive, 0, 0); } /* @@ -2449,515 +1659,603 @@ * to analyze the request sense. We currently do not utilize this * information. */ - -void idetape_analyze_error (ide_drive_t *drive,idetape_request_sense_result_t *result) - +static void idetape_analyze_error (ide_drive_t *drive,idetape_request_sense_result_t *result) { - idetape_tape_t *tape=&(drive->tape); - idetape_packet_command_t *pc=tape->failed_pc; + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t *pc = tape->failed_pc; - tape->sense_key=result->sense_key; - tape->asc=result->asc; - tape->ascq=result->ascq; - -#if IDETAPE_DEBUG_LOG + tape->sense_key = result->sense_key; tape->asc = result->asc; tape->ascq = result->ascq; +#if IDETAPE_DEBUG_LOG /* * Without debugging, we only log an error if we decided to * give up retrying. */ - printk ("ide-tape: pc = %x, sense key = %x, asc = %x, ascq = %x\n",pc->c[0],result->sense_key,result->asc,result->ascq); + printk (KERN_INFO "ide-tape: pc = %x, sense key = %x, asc = %x, ascq = %x\n",pc->c[0],result->sense_key,result->asc,result->ascq); #endif /* IDETAPE_DEBUG_LOG */ - if (pc->c[0] == IDETAPE_READ_CMD) { - if (result->filemark) { - pc->error=IDETAPE_RQ_ERROR_FILEMARK; - pc->abort=1; - } - } - - if (pc->c[0] == IDETAPE_READ_CMD || pc->c[0] == IDETAPE_WRITE_CMD) { - if (result->sense_key == 8) { - pc->error=IDETAPE_RQ_ERROR_EOD; - pc->abort=1; - } - } - -#if 1 #ifdef CONFIG_BLK_DEV_TRITON /* * Correct pc->actually_transferred by asking the tape. */ - - if (pc->dma_error && pc->abort) { - unsigned long *long_ptr=(unsigned long *) &(result->information1); - pc->actually_transferred=pc->request_transfer-tape->tape_block_size*idetape_swap_long (*long_ptr); - } + if (test_bit (PC_DMA_ERROR, &pc->flags)) { + pc->actually_transferred = pc->request_transfer - tape->tape_block_size * ntohl (get_unaligned (&result->information)); + idetape_update_buffers (pc); + } #endif /* CONFIG_BLK_DEV_TRITON */ -#endif -} - -void idetape_create_test_unit_ready_cmd (idetape_packet_command_t *pc) - -{ -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating TEST UNIT READY packet command\n"); -#endif /* IDETAPE_DEBUG_LOG */ - pc->request_transfer=0; - pc->buffer=NULL; - pc->current_position=NULL; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c[0]=IDETAPE_TEST_UNIT_READY_CMD; + if (pc->c[0] == IDETAPE_READ_CMD && result->filemark) { + pc->error = IDETAPE_ERROR_FILEMARK; + set_bit (PC_ABORT, &pc->flags); + } + if (pc->c[0] == IDETAPE_WRITE_CMD) { + if (result->eom || (result->sense_key == 0xd && result->asc == 0x0 && result->ascq == 0x2)) { + pc->error = IDETAPE_ERROR_EOD; + set_bit (PC_ABORT, &pc->flags); + } + } + if (pc->c[0] == IDETAPE_READ_CMD || pc->c[0] == IDETAPE_WRITE_CMD) { + if (result->sense_key == 8) { + pc->error = IDETAPE_ERROR_EOD; + set_bit (PC_ABORT, &pc->flags); + } + if (!test_bit (PC_ABORT, &pc->flags) && pc->actually_transferred) + pc->retries = IDETAPE_MAX_PC_RETRIES + 1; + } } -void idetape_create_locate_cmd (idetape_packet_command_t *pc,unsigned long block,byte partition) - +static void idetape_request_sense_callback (ide_drive_t *drive) { - unsigned long *ptr; + idetape_tape_t *tape = drive->driver_data; #if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating LOCATE packet command\n"); + printk (KERN_INFO "ide-tape: Reached idetape_request_sense_callback\n"); #endif /* IDETAPE_DEBUG_LOG */ - pc->request_transfer=0; - pc->buffer=NULL; - pc->current_position=NULL; - pc->buffer_size=0; - pc->wait_for_dsc=1; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_LOCATE_CMD; - pc->c [1]=2; - ptr=(unsigned long *) &(pc->c[3]); - *ptr=idetape_swap_long (block); - pc->c[8]=partition; + if (!tape->pc->error) { + idetape_analyze_error (drive,(idetape_request_sense_result_t *) tape->pc->buffer); + idetape_end_request (1,HWGROUP (drive)); + } else { + printk (KERN_ERR "Error in REQUEST SENSE itself - Aborting request!\n"); + idetape_end_request (0,HWGROUP (drive)); + } } -void idetape_create_rewind_cmd (idetape_packet_command_t *pc) - +/* + * idetape_init_pc initializes a packet command. + */ +static void idetape_init_pc (idetape_pc_t *pc) { -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating REWIND packet command\n"); -#endif /* IDETAPE_DEBUG_LOG */ - pc->request_transfer=0; - pc->buffer=NULL; - pc->current_position=NULL; - pc->buffer_size=0; - pc->wait_for_dsc=1; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_REWIND_CMD; + memset (pc->c, 0, 12); + pc->retries = 0; + pc->flags = 0; + pc->request_transfer = 0; + pc->buffer = pc->pc_buffer; + pc->buffer_size = IDETAPE_PC_BUFFER_SIZE; + pc->bh = NULL; + pc->b_data = NULL; +} + +static void idetape_create_request_sense_cmd (idetape_pc_t *pc) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_REQUEST_SENSE_CMD; + pc->c[4] = 255; + pc->request_transfer = 18; + pc->callback = &idetape_request_sense_callback; } /* - * A mode sense command is used to "sense" tape parameters. + * idetape_retry_pc is called when an error was detected during the + * last packet command. We queue a request sense packet command in + * the head of the request list. */ - -void idetape_create_mode_sense_cmd (idetape_packet_command_t *pc,byte page_code) - +static void idetape_retry_pc (ide_drive_t *drive) { -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating MODE SENSE packet command - Page %d\n",page_code); -#endif /* IDETAPE_DEBUG_LOG */ + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t *pc; + struct request *rq; + idetape_error_reg_t error; - pc->wait_for_dsc=0; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - switch (page_code) { - case IDETAPE_CAPABILITIES_PAGE: - pc->request_transfer=24; - } - - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_MODE_SENSE_CMD; - pc->c [1]=8; /* DBD = 1 - Don't return block descriptors for now */ - pc->c [2]=page_code; - pc->c [3]=255; /* Don't limit the returned information */ - pc->c [4]=255; /* (We will just discard data in that case) */ + error.all = IN_BYTE (IDE_ERROR_REG); + pc = idetape_next_pc_storage (drive); + rq = idetape_next_rq_storage (drive); + idetape_create_request_sense_cmd (pc); + set_bit (IDETAPE_IGNORE_DSC, &tape->flags); + idetape_queue_pc_head (drive, pc, rq); } /* - * idetape_create_write_filemark_cmd will: - * - * 1. Write a filemark if write_filemark=1. - * 2. Flush the device buffers without writing a filemark - * if write_filemark=0. + * idetape_pc_intr is the usual interrupt handler which will be called + * during a packet command. We will transfer some of the data (as + * requested by the drive) and will re-point interrupt handler to us. + * When data transfer is finished, we will act according to the + * algorithm described before idetape_issue_packet_command. * */ - -void idetape_create_write_filemark_cmd (idetape_packet_command_t *pc,int write_filemark) - +static void idetape_pc_intr (ide_drive_t *drive) { + idetape_tape_t *tape = drive->driver_data; + idetape_status_reg_t status; + idetape_bcount_reg_t bcount; + idetape_ireason_reg_t ireason; + idetape_pc_t *pc=tape->pc; + unsigned int temp; + +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "ide-tape: Reached idetape_pc_intr interrupt handler\n"); +#endif /* IDETAPE_DEBUG_LOG */ + +#ifdef CONFIG_BLK_DEV_TRITON + if (test_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { + if (HWIF(drive)->dmaproc(ide_dma_status_bad, drive)) { + set_bit (PC_DMA_ERROR, &pc->flags); + /* + * We will currently correct the following in + * idetape_analyze_error. + */ + pc->actually_transferred=HWIF(drive)->dmaproc(ide_dma_transferred, drive); + } else { + pc->actually_transferred=pc->request_transfer; + idetape_update_buffers (pc); + } + (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive)); /* End DMA */ #if IDETAPE_DEBUG_LOG - printk ("Creating WRITE FILEMARK packet command\n"); - if (!write_filemark) - printk ("which will only flush buffered data\n"); + printk (KERN_INFO "ide-tape: DMA finished\n"); #endif /* IDETAPE_DEBUG_LOG */ - pc->request_transfer=0; - pc->buffer=NULL; - pc->current_position=NULL; - pc->buffer_size=0; - pc->wait_for_dsc=1; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_WRITE_FILEMARK_CMD; - if (write_filemark) - pc->c [4]=1; -} + } +#endif /* CONFIG_BLK_DEV_TRITON */ -void idetape_create_load_unload_cmd (idetape_packet_command_t *pc,int cmd) + status.all = GET_STAT(); /* Clear the interrupt */ -{ + if (!status.b.drq) { /* No more interrupts */ #if IDETAPE_DEBUG_LOG - printk ("Creating LOAD UNLOAD packet command, cmd=%d\n",cmd); + printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred); #endif /* IDETAPE_DEBUG_LOG */ - pc->request_transfer=0; - pc->buffer=NULL; - pc->current_position=NULL; - pc->buffer_size=0; - pc->wait_for_dsc=1; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_LOAD_UNLOAD_CMD; - pc->c [4]=cmd; -} + clear_bit (PC_DMA_IN_PROGRESS, &pc->flags); -void idetape_create_erase_cmd (idetape_packet_command_t *pc) - -{ + ide_sti(); + if (status.b.check || test_bit (PC_DMA_ERROR, &pc->flags)) { /* Error detected */ #if IDETAPE_DEBUG_LOG - printk ("Creating ERASE command\n"); + printk (KERN_INFO "ide-tape: %s: I/O error, ",tape->name); #endif /* IDETAPE_DEBUG_LOG */ + if (pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) { + printk (KERN_ERR "ide-tape: I/O error in request sense command\n"); + ide_do_reset (drive); + return; + } + idetape_retry_pc (drive); /* Retry operation */ + return; + } + pc->error = 0; + if (test_bit (PC_WAIT_FOR_DSC, &pc->flags) && !status.b.dsc) { /* Media access command */ + tape->dsc_polling_start = jiffies; + tape->dsc_polling_frequency = IDETAPE_DSC_MA_FAST; + tape->dsc_timeout = jiffies + IDETAPE_DSC_MA_TIMEOUT; + idetape_postpone_request (drive); /* Allow ide.c to handle other requests */ + return; + } + if (tape->failed_pc == pc) + tape->failed_pc=NULL; + pc->callback(drive); /* Command finished - Call the callback function */ + return; + } +#ifdef CONFIG_BLK_DEV_TRITON + if (clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { + printk (KERN_ERR "ide-tape: The tape wants to issue more interrupts in DMA mode\n"); + printk (KERN_ERR "ide-tape: DMA disabled, reverting to PIO\n"); + drive->using_dma=0; + ide_do_reset (drive); + return; + } +#endif /* CONFIG_BLK_DEV_TRITON */ + bcount.b.high=IN_BYTE (IDE_BCOUNTH_REG); /* Get the number of bytes to transfer */ + bcount.b.low=IN_BYTE (IDE_BCOUNTL_REG); /* on this interrupt */ + ireason.all=IN_BYTE (IDE_IREASON_REG); - pc->request_transfer=0; - pc->buffer=NULL; - pc->current_position=NULL; - pc->buffer_size=0; - pc->wait_for_dsc=1; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_ERASE_CMD; - pc->c [1]=1; -} - -void idetape_create_read_cmd (idetape_packet_command_t *pc,unsigned long length) - -{ - union convert { - unsigned all :32; - struct { - unsigned b1 :8; - unsigned b2 :8; - unsigned b3 :8; - unsigned b4 :8; - } b; - } original; - + if (ireason.b.cod) { + printk (KERN_ERR "ide-tape: CoD != 0 in idetape_pc_intr\n"); + ide_do_reset (drive); + return; + } + if (ireason.b.io == test_bit (PC_WRITING, &pc->flags)) { /* Hopefully, we will never get here */ + printk (KERN_ERR "ide-tape: We wanted to %s, ", ireason.b.io ? "Write":"Read"); + printk (KERN_ERR "but the tape wants us to %s !\n",ireason.b.io ? "Read":"Write"); + ide_do_reset (drive); + return; + } + if (!test_bit (PC_WRITING, &pc->flags)) { /* Reading - Check that we have enough space */ + temp = pc->actually_transferred + bcount.all; + if ( temp > pc->request_transfer) { + if (temp > pc->buffer_size) { + printk (KERN_ERR "ide-tape: The tape wants to send us more data than expected - discarding data\n"); + idetape_discard_data (drive,bcount.all); + ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD); + return; + } #if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating READ packet command\n"); + printk (KERN_NOTICE "ide-tape: The tape wants to send us more data than expected - allowing transfer\n"); #endif /* IDETAPE_DEBUG_LOG */ + } + } + if (test_bit (PC_WRITING, &pc->flags)) { + if (pc->bh != NULL) + idetape_output_buffers (drive, pc, bcount.all); + else + atapi_output_bytes (drive,pc->current_position,bcount.all); /* Write the current buffer */ + } else { + if (pc->bh != NULL) + idetape_input_buffers (drive, pc, bcount.all); + else + atapi_input_bytes (drive,pc->current_position,bcount.all); /* Read the current buffer */ + } + pc->actually_transferred+=bcount.all; /* Update the current position */ + pc->current_position+=bcount.all; + + ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD); /* And set the interrupt handler again */ +} + +/* + * Packet Command Interface + * + * The current Packet Command is available in tape->pc, and will not + * change until we finish handling it. Each packet command is associated + * with a callback function that will be called when the command is + * finished. + * + * The handling will be done in three stages: + * + * 1. idetape_issue_packet_command will send the packet command to the + * drive, and will set the interrupt handler to idetape_pc_intr. + * + * 2. On each interrupt, idetape_pc_intr will be called. This step + * will be repeated until the device signals us that no more + * interrupts will be issued. + * + * 3. ATAPI Tape media access commands have immediate status with a + * delayed process. In case of a successful initiation of a + * media access packet command, the DSC bit will be set when the + * actual execution of the command is finished. + * Since the tape drive will not issue an interrupt, we have to + * poll for this event. In this case, we define the request as + * "low priority request" by setting rq_status to + * IDETAPE_RQ_POSTPONED, set a timer to poll for DSC and exit + * the driver. + * + * ide.c will then give higher priority to requests which + * originate from the other device, until will change rq_status + * to RQ_ACTIVE. + * + * 4. When the packet command is finished, it will be checked for errors. + * + * 5. In case an error was found, we queue a request sense packet command + * in front of the request queue and retry the operation up to + * IDETAPE_MAX_PC_RETRIES times. + * + * 6. In case no error was found, or we decided to give up and not + * to retry again, the callback function will be called and then + * we will handle the next request. + * + */ +static void idetape_issue_packet_command (ide_drive_t *drive, idetape_pc_t *pc) +{ + idetape_tape_t *tape = drive->driver_data; + idetape_bcount_reg_t bcount; + idetape_ireason_reg_t ireason; + int dma_ok=0; + +#if IDETAPE_DEBUG_BUGS + if (tape->pc->c[0] == IDETAPE_REQUEST_SENSE_CMD && pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) { + printk (KERN_ERR "ide-tape: possible ide-tape.c bug - Two request sense in serial were issued\n"); + } +#endif /* IDETAPE_DEBUG_BUGS */ + + if (tape->failed_pc == NULL && pc->c[0] != IDETAPE_REQUEST_SENSE_CMD) + tape->failed_pc=pc; + tape->pc=pc; /* Set the current packet command */ - original.all=length; + if (pc->retries > IDETAPE_MAX_PC_RETRIES || test_bit (PC_ABORT, &pc->flags)) { + /* + * We will "abort" retrying a packet command in case + * a legitimate error code was received (crossing a + * filemark, or DMA error in the end of media, for + * example). + */ + if (!test_bit (PC_ABORT, &pc->flags)) { + printk (KERN_ERR "ide-tape: %s: I/O error, pc = %2x, key = %2x, asc = %2x, ascq = %2x\n", + tape->name, pc->c[0], tape->sense_key, tape->asc, tape->ascq); + pc->error = IDETAPE_ERROR_GENERAL; /* Giving up */ + } + tape->failed_pc=NULL; + pc->callback(drive); + return; + } +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "Retry number - %d\n",pc->retries); +#endif /* IDETAPE_DEBUG_LOG */ - pc->wait_for_dsc=0; - pc->callback=&idetape_read_callback; - pc->writing=0; + pc->retries++; + pc->actually_transferred=0; /* We haven't transferred any data yet */ + pc->current_position=pc->buffer; + bcount.all=pc->request_transfer; /* Request to transfer the entire buffer at once */ - idetape_zero_packet_command (pc); +#ifdef CONFIG_BLK_DEV_TRITON + if (clear_bit (PC_DMA_ERROR, &pc->flags)) { + printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n"); + drive->using_dma=0; + } + if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) + dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); +#endif /* CONFIG_BLK_DEV_TRITON */ - pc->c [0]=IDETAPE_READ_CMD; - pc->c [1]=1; - pc->c [4]=original.b.b1; - pc->c [3]=original.b.b2; - pc->c [2]=original.b.b3; + OUT_BYTE (drive->ctl,IDE_CONTROL_REG); + OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ + OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG); + OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG); + OUT_BYTE (drive->select.all,IDE_SELECT_REG); - if (length) - pc->dma_recommended=1; + ide_set_handler (drive, &idetape_pc_intr, WAIT_CMD); /* Set the interrupt routine */ + OUT_BYTE (WIN_PACKETCMD,IDE_COMMAND_REG); /* Issue the packet command */ - return; + if (ide_wait_stat (drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) { /* Wait for DRQ to be ready - Assuming Accelerated DRQ */ + /* + * We currently only support tape drives which report + * accelerated DRQ assertion. For this case, specs + * allow up to 50us. We really shouldn't get here. + * + * ??? Still needs to think what to do if we reach + * here anyway. + */ + printk (KERN_ERR "ide-tape: Strange, packet command initiated yet DRQ isn't asserted\n"); + return; + } + ireason.all=IN_BYTE (IDE_IREASON_REG); + if (!ireason.b.cod || ireason.b.io) { + printk (KERN_ERR "ide-tape: (IO,CoD) != (0,1) while issuing a packet command\n"); + ide_do_reset (drive); + return; + } + atapi_output_bytes (drive,pc->c,12); /* Send the actual packet */ +#ifdef CONFIG_BLK_DEV_TRITON + if (dma_ok) { /* Begin DMA, if necessary */ + set_bit (PC_DMA_IN_PROGRESS, &pc->flags); + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + } +#endif /* CONFIG_BLK_DEV_TRITON */ } -void idetape_create_space_cmd (idetape_packet_command_t *pc,long count,byte cmd) +static void idetape_media_access_finished (ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t *pc = tape->pc; + idetape_status_reg_t status; + + status.all = GET_STAT(); + if (status.b.dsc) { + if (status.b.check) { /* Error detected */ + printk (KERN_ERR "ide-tape: %s: I/O error, ",tape->name); + idetape_retry_pc (drive); /* Retry operation */ + return; + } + pc->error = 0; + if (tape->failed_pc == pc) + tape->failed_pc = NULL; + } else { + pc->error = IDETAPE_ERROR_GENERAL; + tape->failed_pc = NULL; + } + pc->callback (drive); +} +/* + * General packet command callback function. + */ +static void idetape_pc_callback (ide_drive_t *drive) { - union convert { - unsigned all :32; - struct { - unsigned b1 :8; - unsigned b2 :8; - unsigned b3 :8; - unsigned b4 :8; - } b; - } original; + idetape_tape_t *tape = drive->driver_data; #if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating SPACE packet command\n"); + printk (KERN_INFO "ide-tape: Reached idetape_pc_callback\n"); #endif /* IDETAPE_DEBUG_LOG */ - original.all=count; + idetape_end_request (tape->pc->error ? 0:1, HWGROUP(drive)); +} - pc->request_transfer=0; - pc->buffer=NULL; - pc->current_position=NULL; - pc->buffer_size=0; - pc->wait_for_dsc=1; - pc->callback=&idetape_pc_callback; - pc->writing=0; - - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_SPACE_CMD; - pc->c [1]=cmd; - pc->c [4]=original.b.b1; - pc->c [3]=original.b.b2; - pc->c [2]=original.b.b3; - - return; -} - -void idetape_create_write_cmd (idetape_packet_command_t *pc,unsigned long length) - -{ - union convert { - unsigned all :32; - struct { - unsigned b1 :8; - unsigned b2 :8; - unsigned b3 :8; - unsigned b4 :8; - } b; - } original; - -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating WRITE packet command\n"); +static void idetape_rw_callback (ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + struct request *rq = HWGROUP(drive)->rq; + int blocks = tape->pc->actually_transferred / tape->tape_block_size; + +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "ide-tape: Reached idetape_rw_callback\n"); #endif /* IDETAPE_DEBUG_LOG */ - original.all=length; + tape->block_address += blocks; + rq->current_nr_sectors -= blocks; - pc->wait_for_dsc=0; - pc->callback=&idetape_write_callback; - pc->writing=1; + if (!tape->pc->error) + idetape_end_request (1, HWGROUP (drive)); + else + idetape_end_request (tape->pc->error, HWGROUP (drive)); +} - idetape_zero_packet_command (pc); +static void idetape_create_locate_cmd (idetape_pc_t *pc, unsigned int block, byte partition) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_LOCATE_CMD; + pc->c[1] = 2; + put_unaligned (htonl (block), (unsigned int *) &pc->c[3]); + pc->c[8] = partition; + set_bit (PC_WAIT_FOR_DSC, &pc->flags); + pc->callback = &idetape_pc_callback; +} - pc->c [0]=IDETAPE_WRITE_CMD; - pc->c [1]=1; - pc->c [4]=original.b.b1; - pc->c [3]=original.b.b2; - pc->c [2]=original.b.b3; +static void idetape_create_rewind_cmd (idetape_pc_t *pc) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_REWIND_CMD; + set_bit (PC_WAIT_FOR_DSC, &pc->flags); + pc->callback = &idetape_pc_callback; +} - if (length) - pc->dma_recommended=1; +/* + * A mode sense command is used to "sense" tape parameters. + */ +static void idetape_create_mode_sense_cmd (idetape_pc_t *pc, byte page_code) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_MODE_SENSE_CMD; + pc->c[1] = 8; /* DBD = 1 - Don't return block descriptors for now */ + pc->c[2] = page_code; + pc->c[3] = 255; /* Don't limit the returned information */ + pc->c[4] = 255; /* (We will just discard data in that case) */ + if (page_code == IDETAPE_CAPABILITIES_PAGE) + pc->request_transfer = 24; +#if IDETAPE_DEBUG_BUGS + else + printk (KERN_ERR "ide-tape: unsupported page code in create_mode_sense_cmd\n"); +#endif /* IDETAPE_DEBUG_BUGS */ + pc->callback = &idetape_pc_callback; +} - return; +/* + * idetape_create_write_filemark_cmd will: + * + * 1. Write a filemark if write_filemark=1. + * 2. Flush the device buffers without writing a filemark + * if write_filemark=0. + * + */ +static void idetape_create_write_filemark_cmd (idetape_pc_t *pc,int write_filemark) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_WRITE_FILEMARK_CMD; + pc->c[4] = write_filemark; + set_bit (PC_WAIT_FOR_DSC, &pc->flags); + pc->callback = &idetape_pc_callback; } -void idetape_create_read_position_cmd (idetape_packet_command_t *pc) +static void idetape_create_load_unload_cmd (idetape_pc_t *pc,int cmd) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_LOAD_UNLOAD_CMD; + pc->c[4] = cmd; + set_bit (PC_WAIT_FOR_DSC, &pc->flags); + pc->callback = &idetape_pc_callback; +} +static void idetape_create_erase_cmd (idetape_pc_t *pc) { -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Creating READ POSITION packet command\n"); -#endif /* IDETAPE_DEBUG_LOG */ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_ERASE_CMD; + pc->c[1] = 1; + set_bit (PC_WAIT_FOR_DSC, &pc->flags); + pc->callback = &idetape_pc_callback; +} - pc->request_transfer=20; - pc->wait_for_dsc=0; - pc->callback=&idetape_read_position_callback; - pc->writing=0; +static void idetape_create_read_cmd (idetape_tape_t *tape, idetape_pc_t *pc, unsigned int length, struct buffer_head *bh) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_READ_CMD; + put_unaligned (htonl (length), (unsigned int *) &pc->c[1]); + pc->c[1] = 1; + pc->callback = &idetape_rw_callback; + pc->bh = bh; + bh->b_count = 0; + pc->buffer = NULL; + pc->request_transfer = pc->buffer_size = length * tape->tape_block_size; + if (pc->request_transfer == tape->stage_size) + set_bit (PC_DMA_RECOMMENDED, &pc->flags); +} - idetape_zero_packet_command (pc); - pc->c [0]=IDETAPE_READ_POSITION_CMD; - pc->c [1]=0; +static void idetape_create_space_cmd (idetape_pc_t *pc,int count,byte cmd) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_SPACE_CMD; + put_unaligned (htonl (count), (unsigned int *) &pc->c[1]); + pc->c[1] = cmd; + set_bit (PC_WAIT_FOR_DSC, &pc->flags); + pc->callback = &idetape_pc_callback; } -void idetape_read_position_callback (ide_drive_t *drive) +static void idetape_create_write_cmd (idetape_tape_t *tape, idetape_pc_t *pc, unsigned int length, struct buffer_head *bh) +{ + idetape_init_pc (pc); + pc->c[0] = IDETAPE_WRITE_CMD; + put_unaligned (htonl (length), (unsigned int *) &pc->c[1]); + pc->c[1] = 1; + pc->callback = &idetape_rw_callback; + set_bit (PC_WRITING, &pc->flags); + pc->bh = bh; + pc->b_data = bh->b_data; + pc->b_count = bh->b_count; + pc->buffer = NULL; + pc->request_transfer = pc->buffer_size = length * tape->tape_block_size; + if (pc->request_transfer == tape->stage_size) + set_bit (PC_DMA_RECOMMENDED, &pc->flags); +} +static void idetape_read_position_callback (ide_drive_t *drive) { - idetape_tape_t *tape; - struct request *rq; + idetape_tape_t *tape = drive->driver_data; idetape_read_position_result_t *result; - tape=&(drive->tape); - #if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_read_position_callback\n"); + printk (KERN_INFO "ide-tape: Reached idetape_read_position_callback\n"); #endif /* IDETAPE_DEBUG_LOG */ - rq=HWGROUP(drive)->rq; - if (!tape->pc->error) { - result=(idetape_read_position_result_t *) tape->pc->buffer; + result = (idetape_read_position_result_t *) tape->pc->buffer; #if IDETAPE_DEBUG_LOG - printk ("Request completed\n"); - printk ("Dumping the results of the READ POSITION command\n"); - printk ("BOP - %s\n",result->bop ? "Yes":"No"); - printk ("EOP - %s\n",result->eop ? "Yes":"No"); + printk (KERN_INFO "BOP - %s\n",result->bop ? "Yes":"No"); + printk (KERN_INFO "EOP - %s\n",result->eop ? "Yes":"No"); #endif /* IDETAPE_DEBUG_LOG */ if (result->bpu) { - printk ("ide-tape: Block location is unknown to the tape\n"); - printk ("Aborting request\n"); - tape->block_address_valid=0; + printk (KERN_INFO "ide-tape: Block location is unknown to the tape\n"); + clear_bit (IDETAPE_ADDRESS_VALID, &tape->flags); idetape_end_request (0,HWGROUP (drive)); - } - else { + } else { #if IDETAPE_DEBUG_LOG - printk ("Block Location - %lu\n",idetape_swap_long (result->first_block)); + printk (KERN_INFO "Block Location - %lu\n", ntohl (result->first_block)); #endif /* IDETAPE_DEBUG_LOG */ - tape->block_address=idetape_swap_long (result->first_block); - tape->block_address_valid=1; + tape->partition = result->partition; + tape->block_address = ntohl (result->first_block); + set_bit (IDETAPE_ADDRESS_VALID, &tape->flags); idetape_end_request (1,HWGROUP (drive)); } - } - else { - printk ("Aborting request\n"); + } else idetape_end_request (0,HWGROUP (drive)); - } - return; -} - -/* - * Our special ide-tape ioctl's. - * - * Currently there aren't any significant ioctl's. - * mtio.h compatible commands should be issued to the character device - * interface. - */ - -int idetape_blkdev_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - idetape_packet_command_t pc; - - pc.buffer=pc.temp_buffer; - pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc.current_position=pc.temp_buffer; - -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: Reached idetape_blkdev_ioctl\n"); -#endif /* IDETAPE_DEBUG_LOG */ - switch (cmd) { - default: - return -EIO; - } } -/* - * Functions which handle requests. - */ - -/* - * idetape_end_request is used to end a request. - */ - -void idetape_end_request (byte uptodate, ide_hwgroup_t *hwgroup) - +static void idetape_create_read_position_cmd (idetape_pc_t *pc) { - ide_drive_t *drive = hwgroup->drive; - struct request *rq = hwgroup->rq; - idetape_tape_t *tape = &(drive->tape); - unsigned int major = HWIF(drive)->major; - struct blk_dev_struct *bdev = &blk_dev[major]; - int error; - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_end_request\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - bdev->current_request=rq; /* Since we may have taken it out */ - - if (!rq->errors) /* In case rq->errors is already set, */ - rq->errors=!uptodate; /* we won't change it. */ - error=rq->errors; - - if (tape->active_data_request == rq) { /* The request was a pipelined data transfer request */ - - if (rq->cmd == IDETAPE_READ_REQUEST) { -#if IDETAPE_DEBUG_BUGS - if (tape->active_stage == NULL) - printk ("ide-tape: bug: active_stage is NULL in idetape_end_request\n"); - else -#endif /* IDETAPE_DEBUG_BUGS */ - idetape_copy_buffer_to_stage (tape->active_stage,tape->data_buffer); - } - - tape->active_stage=NULL; - tape->active_data_request=NULL; - - if (rq->cmd == IDETAPE_WRITE_REQUEST) { - if (rq->errors) - tape->error_in_pipeline_stage=rq->errors; - idetape_remove_stage_head (drive); - } - - if (tape->next_stage == NULL) { - if (!error) - idetape_increase_max_pipeline_stages (drive); - ide_end_drive_cmd (drive, 0, 0); - return; - } - - idetape_active_next_stage (drive); - - /* - * Insert the next request into the request queue. - * - * The choice of using ide_next or ide_end is now left - * to the user. - */ - -#if IDETAPE_LOW_TAPE_PRIORITY - (void) ide_do_drive_cmd (drive,tape->active_data_request,ide_end); -#else - (void) ide_do_drive_cmd (drive,tape->active_data_request,ide_next); -#endif /* IDETAPE_LOW_TAPE_PRIORITY */ - } - ide_end_drive_cmd (drive, 0, 0); + idetape_init_pc (pc); + pc->c[0] = IDETAPE_READ_POSITION_CMD; + pc->request_transfer = 20; + pc->callback = &idetape_read_position_callback; } /* * idetape_do_request is our request handling function. */ - -void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long block) - +static void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long block) { - idetape_tape_t *tape=&(drive->tape); - idetape_packet_command_t *pc; - unsigned int major = HWIF(drive)->major; - struct blk_dev_struct *bdev = &blk_dev[major]; + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t *pc; + struct blk_dev_struct *bdev = &blk_dev[HWIF(drive)->major]; + struct request *postponed_rq = tape->postponed_rq; idetape_status_reg_t status; #if IDETAPE_DEBUG_LOG - printk ("Current request:\n"); - printk ("rq_status: %d, rq_dev: %u, cmd: %d, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->cmd,rq->errors); - printk ("sector: %ld, nr_sectors: %ld, current_nr_sectors: %ld\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); + printk (KERN_INFO "rq_status: %d, rq_dev: %u, cmd: %d, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->cmd,rq->errors); + printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %ld\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); #endif /* IDETAPE_DEBUG_LOG */ - if (!IDETAPE_REQUEST_CMD (rq->cmd)) { - + if (!IDETAPE_RQ_CMD (rq->cmd)) { /* * We do not support buffer cache originated requests. */ - - printk ("ide-tape: Unsupported command in request queue\n"); - printk ("ide-tape: The block device interface should not be used for data transfers.\n"); - printk ("ide-tape: Use the character device interfaces\n"); - printk ("ide-tape: /dev/ht0 and /dev/nht0 instead.\n"); - printk ("ide-tape: (Run linux/scripts/MAKEDEV.ide to create them)\n"); - printk ("ide-tape: Aborting request.\n"); - + printk (KERN_NOTICE "ide-tape: %s: Unsupported command in request queue\n", drive->name); ide_end_request (0,HWGROUP (drive)); /* Let the common code handle it */ return; } @@ -2980,131 +2278,81 @@ * * The potential fragmentation inefficiency was pointed to me * by Mark Lord. + * + * Uhuh.. the following "fix" is actually not entirely correct. + * Some day we should probably move to a per device request + * queue, rather than per interface. */ - if (rq->next != NULL && rq->rq_dev != rq->next->rq_dev) bdev->current_request=rq->next; - /* Retry a failed packet command */ - + /* + * Retry a failed packet command + */ if (tape->failed_pc != NULL && tape->pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) { - idetape_issue_packet_command (drive,tape->failed_pc,&idetape_pc_intr); + idetape_issue_packet_command (drive, tape->failed_pc); return; } - - /* Check if we have a postponed request */ - - if (tape->postponed_rq != NULL) { #if IDETAPE_DEBUG_BUGS - if (tape->postponed_rq->rq_status != RQ_ACTIVE || rq != tape->postponed_rq) { - printk ("ide-tape: ide-tape.c bug - Two DSC requests were queued\n"); + if (postponed_rq != NULL) + if (postponed_rq->rq_status != RQ_ACTIVE || rq != postponed_rq) { + printk (KERN_ERR "ide-tape: ide-tape.c bug - Two DSC requests were queued\n"); idetape_end_request (0,HWGROUP (drive)); return; } #endif /* IDETAPE_DEBUG_BUGS */ - if (rq->cmd == IDETAPE_PACKET_COMMAND_REQUEST_TYPE1) { - - /* Media access command */ - - tape->postponed_rq = NULL; - idetape_media_access_finished (drive); - return; - } - - /* - * Read / Write command - DSC polling was done before the - * actual command - Continue normally so that the command - * will be performed below. - */ - - tape->postponed_rq = NULL; - } - status.all=IN_BYTE (IDETAPE_STATUS_REG); + tape->postponed_rq = NULL; /* - * After a software reset, the status register is locked. We - * will ignore the DSC value for our very first packet command, - * which will restore DSC operation. + * If the tape is still busy, postpone our request and service + * the other device meanwhile. */ - - if (tape->reset_issued) { - status.b.dsc=1; - tape->reset_issued=0; + status.all = GET_STAT(); + if (!clear_bit (IDETAPE_IGNORE_DSC, &tape->flags) && !status.b.dsc) { + if (postponed_rq == NULL) { + tape->dsc_polling_start = jiffies; + tape->dsc_polling_frequency = tape->best_dsc_rw_frequency; + tape->dsc_timeout = jiffies + IDETAPE_DSC_RW_TIMEOUT; + } else if ((signed long) (jiffies - tape->dsc_timeout) > 0) { + printk (KERN_ERR "ide-tape: %s: DSC timeout\n", tape->name); + if (rq->cmd == IDETAPE_PC_RQ) + idetape_media_access_finished (drive); + else + ide_do_reset (drive); + return; + } else if (jiffies - tape->dsc_polling_start > IDETAPE_DSC_MA_THRESHOLD) + tape->dsc_polling_frequency = IDETAPE_DSC_MA_SLOW; + idetape_postpone_request (drive); + return; } - switch (rq->cmd) { - case IDETAPE_READ_REQUEST: - if (!status.b.dsc) { /* Tape buffer not ready to accept r/w command */ -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: DSC != 1 - Postponing read request\n"); -#endif /* IDETAPE_DEBUG_LOG */ - tape->dsc_polling_frequency=tape->best_dsc_rw_frequency; - idetape_postpone_request (drive); /* Allow ide.c to process requests from */ - return; - } - + case IDETAPE_READ_RQ: pc=idetape_next_pc_storage (drive); - - idetape_create_read_cmd (pc,rq->current_nr_sectors); - - pc->buffer=rq->buffer; - pc->buffer_size=rq->current_nr_sectors*tape->tape_block_size; - pc->current_position=rq->buffer; - pc->request_transfer=rq->current_nr_sectors*tape->tape_block_size; - - idetape_issue_packet_command (drive,pc,&idetape_pc_intr); - return; - - case IDETAPE_WRITE_REQUEST: - if (!status.b.dsc) { /* Tape buffer not ready to accept r/w command */ -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: DSC != 1 - Postponing write request\n"); -#endif /* IDETAPE_DEBUG_LOG */ - tape->dsc_polling_frequency=tape->best_dsc_rw_frequency; - idetape_postpone_request (drive); /* Allow ide.c to process requests from */ - return; - } - + idetape_create_read_cmd (tape, pc, rq->current_nr_sectors, rq->bh); + break; + case IDETAPE_WRITE_RQ: pc=idetape_next_pc_storage (drive); - - idetape_create_write_cmd (pc,rq->current_nr_sectors); - - pc->buffer=rq->buffer; - pc->buffer_size=rq->current_nr_sectors*tape->tape_block_size; - pc->current_position=rq->buffer; - pc->request_transfer=rq->current_nr_sectors*tape->tape_block_size; - - idetape_issue_packet_command (drive,pc,&idetape_pc_intr); + idetape_create_write_cmd (tape, pc, rq->current_nr_sectors, rq->bh); + break; + case IDETAPE_ABORTED_WRITE_RQ: + rq->cmd = IDETAPE_WRITE_RQ; + rq->errors = IDETAPE_ERROR_EOD; + idetape_end_request (1, HWGROUP(drive)); return; - - case IDETAPE_PACKET_COMMAND_REQUEST_TYPE1: - case IDETAPE_PACKET_COMMAND_REQUEST_TYPE2: -/* - * This should be unnecessary (postponing of a general packet command), - * but I have occasionally missed DSC on a media access command otherwise. - * ??? Still have to figure it out ... - */ - if (!status.b.dsc) { /* Tape buffers are still not ready */ -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: DSC != 1 - Postponing packet command request\n"); -#endif /* IDETAPE_DEBUG_LOG */ - rq->cmd=IDETAPE_PACKET_COMMAND_REQUEST_TYPE2; /* Note that we are waiting for DSC *before* we */ - /* even issued the command */ - tape->dsc_polling_frequency=IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY; - idetape_postpone_request (drive); /* Allow ide.c to process requests from */ + case IDETAPE_PC_RQ: + if (postponed_rq != NULL) { + idetape_media_access_finished (drive); return; } - rq->cmd=IDETAPE_PACKET_COMMAND_REQUEST_TYPE1; - pc=(idetape_packet_command_t *) rq->buffer; - idetape_issue_packet_command (drive,pc,&idetape_pc_intr); - return; -#if IDETAPE_DEBUG_BUGS + pc=(idetape_pc_t *) rq->buffer; + break; default: - printk ("ide-tape: bug in IDETAPE_REQUEST_CMD macro\n"); + printk (KERN_ERR "ide-tape: bug in IDETAPE_RQ_CMD macro\n"); idetape_end_request (0,HWGROUP (drive)); -#endif /* IDETAPE_DEBUG_BUGS */ - } + return; + } + idetape_issue_packet_command (drive, pc); } /* @@ -3127,72 +2375,34 @@ * the request to the request list without waiting for it to be serviced ! * In that case, we usually use idetape_queue_pc_head. */ - -int idetape_queue_pc_tail (ide_drive_t *drive,idetape_packet_command_t *pc) +static int idetape_queue_pc_tail (ide_drive_t *drive,idetape_pc_t *pc) { struct request rq; ide_init_drive_cmd (&rq); rq.buffer = (char *) pc; - rq.cmd = IDETAPE_PACKET_COMMAND_REQUEST_TYPE1; + rq.cmd = IDETAPE_PC_RQ; return ide_do_drive_cmd (drive, &rq, ide_wait); } /* - * idetape_queue_pc_head generates a new packet command request in front - * of the request queue, before the current request, so that it will be - * processed immediately, on the next pass through the driver. - * - * idetape_queue_pc_head is called from the request handling part of - * the driver (the "bottom" part). Safe storage for the request should - * be allocated with idetape_next_pc_storage and idetape_next_rq_storage - * before calling idetape_queue_pc_head. - * - * Memory for those requests is pre-allocated at initialization time, and - * is limited to IDETAPE_PC_STACK requests. We assume that we have enough - * space for the maximum possible number of inter-dependent packet commands. - * - * The higher level of the driver - The ioctl handler and the character - * device handling functions should queue request to the lower level part - * and wait for their completion using idetape_queue_pc_tail or - * idetape_queue_rw_tail. - */ - -void idetape_queue_pc_head (ide_drive_t *drive,idetape_packet_command_t *pc,struct request *rq) - -{ - unsigned int major = HWIF(drive)->major; - struct blk_dev_struct *bdev = &blk_dev[major]; - - bdev->current_request=HWGROUP (drive)->rq; /* Since we may have taken it out */ - - ide_init_drive_cmd (rq); - rq->buffer = (char *) pc; - rq->cmd = IDETAPE_PACKET_COMMAND_REQUEST_TYPE1; - (void) ide_do_drive_cmd (drive, rq, ide_preempt); -} - -/* * idetape_wait_for_request installs a semaphore in a pending request * and sleeps until it is serviced. * * The caller should ensure that the request will not be serviced * before we install the semaphore (usually by disabling interrupts). */ - -void idetape_wait_for_request (struct request *rq) - +static void idetape_wait_for_request (struct request *rq) { struct semaphore sem = MUTEX_LOCKED; #if IDETAPE_DEBUG_BUGS - if (rq == NULL || !IDETAPE_REQUEST_CMD (rq->cmd)) { - printk ("ide-tape: bug: Trying to sleep on non-valid request\n"); - return; + if (rq == NULL || !IDETAPE_RQ_CMD (rq->cmd)) { + printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n"); + return; } #endif /* IDETAPE_DEBUG_BUGS */ - - rq->sem=&sem; + rq->sem = &sem; down (&sem); } @@ -3200,93 +2410,93 @@ * idetape_queue_rw_tail generates a read/write request for the block * device interface and wait for it to be serviced. */ - -int idetape_queue_rw_tail (ide_drive_t *drive,int cmd,int blocks,char *buffer) - +static int idetape_queue_rw_tail (ide_drive_t *drive, int cmd, int blocks, struct buffer_head *bh) { - idetape_tape_t *tape = &(drive->tape); + idetape_tape_t *tape = drive->driver_data; struct request rq; #if IDETAPE_DEBUG_LOG - printk ("idetape_queue_rw_tail: cmd=%d\n",cmd); + printk (KERN_INFO "idetape_queue_rw_tail: cmd=%d\n",cmd); #endif /* IDETAPE_DEBUG_LOG */ #if IDETAPE_DEBUG_BUGS - if (tape->active_data_request != NULL) { - printk ("ide-tape: bug: the pipeline is active in idetape_queue_rw_tail\n"); + if (idetape_pipeline_active (tape)) { + printk (KERN_ERR "ide-tape: bug: the pipeline is active in idetape_queue_rw_tail\n"); return (0); } #endif /* IDETAPE_DEBUG_BUGS */ ide_init_drive_cmd (&rq); - rq.buffer = buffer; + rq.bh = bh; rq.cmd = cmd; rq.sector = tape->block_address; rq.nr_sectors = rq.current_nr_sectors = blocks; (void) ide_do_drive_cmd (drive, &rq, ide_wait); - return (tape->tape_block_size*(blocks-rq.current_nr_sectors)); + idetape_init_merge_stage (tape); + if (rq.errors == IDETAPE_ERROR_GENERAL) + return -EIO; + return (tape->tape_block_size * (blocks-rq.current_nr_sectors)); } /* - * idetape_add_chrdev_read_request handles character device read requests - * when operating in the pipelined operation mode. + * idetape_add_chrdev_read_request is called from idetape_chrdev_read + * to service a character device read request and add read-ahead + * requests to our pipeline. */ - -int idetape_add_chrdev_read_request (ide_drive_t *drive,int blocks,char *buffer) - +static int idetape_add_chrdev_read_request (ide_drive_t *drive,int blocks) { - idetape_tape_t *tape = &(drive->tape); - idetape_pipeline_stage_t *new_stage; + idetape_tape_t *tape = drive->driver_data; + idetape_stage_t *new_stage; unsigned long flags; struct request rq,*rq_ptr; int bytes_read; #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_add_chrdev_read_request\n"); + printk (KERN_INFO "Reached idetape_add_chrdev_read_request\n"); #endif /* IDETAPE_DEBUG_LOG */ ide_init_drive_cmd (&rq); - rq.cmd = IDETAPE_READ_REQUEST; + rq.cmd = IDETAPE_READ_RQ; rq.sector = tape->block_address; rq.nr_sectors = rq.current_nr_sectors = blocks; - if (tape->active_data_request != NULL || tape->current_number_of_stages <= tape->max_number_of_stages / 4) { - new_stage=idetape_kmalloc_stage (drive); + if (idetape_pipeline_active (tape) || tape->nr_stages <= tape->max_stages / 4) { + new_stage=idetape_kmalloc_stage (tape); while (new_stage != NULL) { new_stage->rq=rq; - save_flags (flags);cli (); idetape_add_stage_tail (drive,new_stage); - restore_flags (flags); - new_stage=idetape_kmalloc_stage (drive); + new_stage=idetape_kmalloc_stage (tape); } - if (tape->active_data_request == NULL) + if (!idetape_pipeline_active (tape)) idetape_insert_pipeline_into_queue (drive); } - if (tape->first_stage == NULL) { - /* * Linux is short on memory. Revert to non-pipelined * operation mode for this request. */ - - return (idetape_queue_rw_tail (drive,IDETAPE_READ_REQUEST,blocks,buffer)); - } - - save_flags (flags);cli (); - if (tape->active_data_request == &(tape->first_stage->rq)) + return (idetape_queue_rw_tail (drive, IDETAPE_READ_RQ, blocks, tape->merge_stage->bh)); + } + save_flags (flags); + cli (); + if (tape->active_stage == tape->first_stage) idetape_wait_for_request (tape->active_data_request); restore_flags (flags); - rq_ptr=&(tape->first_stage->rq); - bytes_read=tape->tape_block_size*(rq_ptr->nr_sectors-rq_ptr->current_nr_sectors); - rq_ptr->nr_sectors=rq_ptr->current_nr_sectors=0; - idetape_copy_buffer_from_stage (tape->first_stage,buffer); - if (rq_ptr->errors != IDETAPE_RQ_ERROR_FILEMARK) + rq_ptr = &tape->first_stage->rq; + bytes_read = tape->tape_block_size * (rq_ptr->nr_sectors - rq_ptr->current_nr_sectors); + rq_ptr->nr_sectors = rq_ptr->current_nr_sectors = 0; + + idetape_switch_buffers (tape, tape->first_stage); + + if (rq_ptr->errors != IDETAPE_ERROR_FILEMARK) { + clear_bit (IDETAPE_FILEMARK, &tape->flags); idetape_remove_stage_head (drive); + } else + set_bit (IDETAPE_FILEMARK, &tape->flags); #if IDETAPE_DEBUG_BUGS if (bytes_read > blocks*tape->tape_block_size) { - printk ("ide-tape: bug: trying to return more bytes than requested\n"); + printk (KERN_ERR "ide-tape: bug: trying to return more bytes than requested\n"); bytes_read=blocks*tape->tape_block_size; } #endif /* IDETAPE_DEBUG_BUGS */ @@ -3304,60 +2514,47 @@ * 3. If we still can't allocate a stage, fallback to * non-pipelined operation mode for this request. */ - -int idetape_add_chrdev_write_request (ide_drive_t *drive,int blocks,char *buffer) - +static int idetape_add_chrdev_write_request (ide_drive_t *drive, int blocks) { - idetape_tape_t *tape = &(drive->tape); - idetape_pipeline_stage_t *new_stage; + idetape_tape_t *tape = drive->driver_data; + idetape_stage_t *new_stage; unsigned long flags; struct request *rq; #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_add_chrdev_write_request\n"); + printk (KERN_INFO "Reached idetape_add_chrdev_write_request\n"); #endif /* IDETAPE_DEBUG_LOG */ - - - new_stage=idetape_kmalloc_stage (drive); - /* - * If we don't have a new stage, wait for more and more requests - * to finish, and try to allocate after each one. - * + /* + * Attempt to allocate a new stage. * Pay special attention to possible race conditions. */ - - while (new_stage == NULL) { - save_flags (flags);cli (); - if (tape->active_data_request != NULL) { + while ((new_stage = idetape_kmalloc_stage (tape)) == NULL) { + save_flags (flags); + cli (); + if (idetape_pipeline_active (tape)) { idetape_wait_for_request (tape->active_data_request); restore_flags (flags); - new_stage=idetape_kmalloc_stage (drive); - } - else { + } else { + restore_flags (flags); + idetape_insert_pipeline_into_queue (drive); + if (idetape_pipeline_active (tape)) + continue; /* * Linux is short on memory. Fallback to * non-pipelined operation mode for this request. */ - - restore_flags (flags); - return (idetape_queue_rw_tail (drive,IDETAPE_WRITE_REQUEST,blocks,buffer)); + return idetape_queue_rw_tail (drive, IDETAPE_WRITE_RQ, blocks, tape->merge_stage->bh); } } - - rq=&(new_stage->rq); - + rq = &new_stage->rq; ide_init_drive_cmd (rq); - rq->cmd = IDETAPE_WRITE_REQUEST; + rq->cmd = IDETAPE_WRITE_RQ; rq->sector = tape->block_address; /* Doesn't actually matter - We always assume sequential access */ - rq->nr_sectors = blocks; - rq->current_nr_sectors = blocks; + rq->nr_sectors = rq->current_nr_sectors = blocks; - idetape_copy_buffer_to_stage (new_stage,buffer); - - save_flags (flags);cli (); + idetape_switch_buffers (tape, new_stage); idetape_add_stage_tail (drive,new_stage); - restore_flags (flags); /* * Check if we are currently servicing requests in the bottom @@ -3367,111 +2564,127 @@ * starting to service requests, so that we will be able to * keep up with the higher speeds of the tape. */ + if (!idetape_pipeline_active (tape) && tape->nr_stages >= (3 * tape->max_stages) / 4) + idetape_insert_pipeline_into_queue (drive); - if (tape->active_data_request == NULL && tape->current_number_of_stages >= (3 * tape->max_number_of_stages) / 4) - idetape_insert_pipeline_into_queue (drive); - - if (tape->error_in_pipeline_stage) { /* Return a deferred error */ - tape->error_in_pipeline_stage=0; - return (-EIO); - } - - return (blocks); + if (clear_bit (IDETAPE_PIPELINE_ERROR, &tape->flags)) /* Return a deferred error */ + return -EIO; + return blocks; } -void idetape_discard_read_pipeline (ide_drive_t *drive) - +static void idetape_discard_read_pipeline (ide_drive_t *drive) { - idetape_tape_t *tape = &(drive->tape); + idetape_tape_t *tape = drive->driver_data; unsigned long flags; #if IDETAPE_DEBUG_BUGS if (tape->chrdev_direction != idetape_direction_read) { - printk ("ide-tape: bug: Trying to discard read pipeline, but we are not reading.\n"); + printk (KERN_ERR "ide-tape: bug: Trying to discard read pipeline, but we are not reading.\n"); return; } #endif /* IDETAPE_DEBUG_BUGS */ - - tape->merge_buffer_size=tape->merge_buffer_offset=0; - tape->chrdev_direction=idetape_direction_none; + tape->merge_stage_size = 0; + if (tape->merge_stage != NULL) { + __idetape_kfree_stage (tape->merge_stage); + tape->merge_stage = NULL; + } + tape->chrdev_direction = idetape_direction_none; if (tape->first_stage == NULL) return; - save_flags (flags);cli (); - tape->next_stage=NULL; - if (tape->active_data_request != NULL) + save_flags (flags); + cli (); + tape->next_stage = NULL; + if (idetape_pipeline_active (tape)) idetape_wait_for_request (tape->active_data_request); restore_flags (flags); while (tape->first_stage != NULL) idetape_remove_stage_head (drive); - -#if IDETAPE_PIPELINE - tape->max_number_of_stages=IDETAPE_MIN_PIPELINE_STAGES; -#else - tape->max_number_of_stages=0; -#endif /* IDETAPE_PIPELINE */ + tape->nr_pending_stages = 0; + tape->max_stages = IDETAPE_MIN_PIPELINE_STAGES; } /* * idetape_wait_for_pipeline will wait until all pending pipeline * requests are serviced. Typically called on device close. */ - -void idetape_wait_for_pipeline (ide_drive_t *drive) - +static void idetape_wait_for_pipeline (ide_drive_t *drive) { - idetape_tape_t *tape = &(drive->tape); + idetape_tape_t *tape = drive->driver_data; unsigned long flags; - if (tape->active_data_request == NULL) - idetape_insert_pipeline_into_queue (drive); - - save_flags (flags);cli (); - if (tape->active_data_request == NULL) { - restore_flags (flags); - return; - } - - if (tape->last_stage != NULL) - idetape_wait_for_request (&(tape->last_stage->rq)); + if (!idetape_pipeline_active (tape)) + idetape_insert_pipeline_into_queue (drive); - else if (tape->active_data_request != NULL) - idetape_wait_for_request (tape->active_data_request); + save_flags (flags); + cli (); + if (!idetape_pipeline_active (tape)) + goto abort; +#if IDETAPE_DEBUG_BUGS + if (tape->last_stage == NULL) + printk ("ide-tape: tape->last_stage == NULL\n"); + else +#endif /* IDETAPE_DEBUG_BUGS */ + idetape_wait_for_request (&tape->last_stage->rq); +abort: restore_flags (flags); } -void idetape_empty_write_pipeline (ide_drive_t *drive) +static void idetape_pad_zeros (ide_drive_t *drive, int bcount) +{ + idetape_tape_t *tape = drive->driver_data; + struct buffer_head *bh; + int count, blocks; + + while (bcount) { + bh = tape->merge_stage->bh; + count = IDETAPE_MIN (tape->stage_size, bcount); + bcount -= count; + blocks = count / tape->tape_block_size; + while (count) { + bh->b_count = IDETAPE_MIN (count, bh->b_size); + memset (bh->b_data, 0, bh->b_count); + count -= bh->b_count; + bh = bh->b_reqnext; + } + idetape_queue_rw_tail (drive, IDETAPE_WRITE_RQ, blocks, tape->merge_stage->bh); + } +} +static void idetape_empty_write_pipeline (ide_drive_t *drive) { - idetape_tape_t *tape = &(drive->tape); - int blocks; + idetape_tape_t *tape = drive->driver_data; + int blocks, i; #if IDETAPE_DEBUG_BUGS if (tape->chrdev_direction != idetape_direction_write) { - printk ("ide-tape: bug: Trying to empty write pipeline, but we are not writing.\n"); + printk (KERN_ERR "ide-tape: bug: Trying to empty write pipeline, but we are not writing.\n"); return; } - if (tape->merge_buffer_size > tape->data_buffer_size) { - printk ("ide-tape: bug: merge_buffer too big\n"); - tape->merge_buffer_size = tape->data_buffer_size; + if (tape->merge_stage_size > tape->stage_size) { + printk (KERN_ERR "ide-tape: bug: merge_buffer too big\n"); + tape->merge_stage_size = tape->stage_size; } #endif /* IDETAPE_DEBUG_BUGS */ - - if (tape->merge_buffer_size) { - blocks=tape->merge_buffer_size/tape->tape_block_size; - if (tape->merge_buffer_size % tape->tape_block_size) { + if (tape->merge_stage_size) { + blocks=tape->merge_stage_size/tape->tape_block_size; + if (tape->merge_stage_size % tape->tape_block_size) { blocks++; - memset (tape->merge_buffer+tape->merge_buffer_size,0,tape->data_buffer_size-tape->merge_buffer_size); + i = tape->tape_block_size - tape->merge_stage_size % tape->tape_block_size; + memset (tape->merge_stage->bh->b_data + tape->merge_stage->bh->b_count, 0, i); + tape->merge_stage->bh->b_count += i; } - (void) idetape_add_chrdev_write_request (drive,blocks,tape->merge_buffer); - tape->merge_buffer_size=0; + (void) idetape_add_chrdev_write_request (drive, blocks); + tape->merge_stage_size = 0; } - idetape_wait_for_pipeline (drive); - - tape->error_in_pipeline_stage=0; + if (tape->merge_stage != NULL) { + __idetape_kfree_stage (tape->merge_stage); + tape->merge_stage = NULL; + } + clear_bit (IDETAPE_PIPELINE_ERROR, &tape->flags); tape->chrdev_direction=idetape_direction_none; /* @@ -3480,185 +2693,241 @@ * as some systems are constantly on, and the system load * can be totally different on the next backup). */ + tape->max_stages = IDETAPE_MIN_PIPELINE_STAGES; +#if IDETAPE_DEBUG_BUGS + if (tape->first_stage != NULL || tape->next_stage != NULL || tape->last_stage != NULL || tape->nr_stages != 0) { + printk (KERN_ERR "ide-tape: ide-tape pipeline bug\n"); + } +#endif /* IDETAPE_DEBUG_BUGS */ +} + +static int idetape_pipeline_size (ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + idetape_stage_t *stage; + struct request *rq; + int size = 0; -#if IDETAPE_PIPELINE - tape->max_number_of_stages=IDETAPE_MIN_PIPELINE_STAGES; -#else - tape->max_number_of_stages=0; -#endif /* IDETAPE_PIPELINE */ -#if IDETAPE_DEBUG_BUGS - if (tape->first_stage != NULL || tape->next_stage != NULL || tape->last_stage != NULL || tape->current_number_of_stages != 0) { - printk ("ide-tape: ide-tape pipeline bug\n"); + idetape_wait_for_pipeline (drive); + stage = tape->first_stage; + while (stage != NULL) { + rq = &stage->rq; + size += tape->tape_block_size * (rq->nr_sectors-rq->current_nr_sectors); + if (rq->errors == IDETAPE_ERROR_FILEMARK) + size += tape->tape_block_size; + stage = stage->next; } -#endif /* IDETAPE_DEBUG_BUGS */ + size += tape->merge_stage_size; + return size; } /* - * idetape_zero_packet_command just zeros a packet command and - * sets the number of retries to 0, as we haven't retried it yet. + * idetape_position_tape positions the tape to the requested block + * using the LOCATE packet command. A READ POSITION command is then + * issued to check where we are positioned. + * + * Like all higher level operations, we queue the commands at the tail + * of the request queue and wait for their completion. + * */ - -void idetape_zero_packet_command (idetape_packet_command_t *pc) +static int idetape_position_tape (ide_drive_t *drive, unsigned int block, byte partition) +{ + int retval; + idetape_pc_t pc; + + idetape_create_locate_cmd (&pc, block, partition); + retval=idetape_queue_pc_tail (drive,&pc); + if (retval) return (retval); + + idetape_create_read_position_cmd (&pc); + return (idetape_queue_pc_tail (drive,&pc)); +} +/* + * Rewinds the tape to the Beginning Of the current Partition (BOP). + * + * We currently support only one partition. + */ +static int idetape_rewind_tape (ide_drive_t *drive) { - int i; + int retval; + idetape_pc_t pc; +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "Reached idetape_rewind_tape\n"); +#endif /* IDETAPE_DEBUG_LOG */ - for (i=0;i<12;i++) - pc->c[i]=0; - pc->retries=0; - pc->abort=0; - pc->dma_recommended=0; - pc->dma_error=0; + idetape_create_rewind_cmd (&pc); + retval=idetape_queue_pc_tail (drive,&pc); + if (retval) return (retval); + + idetape_create_read_position_cmd (&pc); + return (idetape_queue_pc_tail (drive,&pc)); +} + +static int idetape_flush_tape_buffers (ide_drive_t *drive) +{ + idetape_pc_t pc; + + idetape_create_write_filemark_cmd (&pc,0); + return (idetape_queue_pc_tail (drive,&pc)); } /* - * idetape_swap_shorts converts a 16 bit number from little endian - * to big endian format. + * Our special ide-tape ioctl's. + * + * Currently there aren't any ioctl's. + * mtio.h compatible commands should be issued to the character device + * interface. */ - -unsigned short idetape_swap_short (unsigned short temp) - +static int idetape_blkdev_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) { - union convert { - unsigned all :16; - struct { - unsigned b1 :8; - unsigned b2 :8; - } b; - } original,converted; - - original.all=temp; - converted.b.b1=original.b.b2; - converted.b.b2=original.b.b1; - return (converted.all); + idetape_tape_t *tape = drive->driver_data; + idetape_config_t config; + +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "ide-tape: Reached idetape_blkdev_ioctl\n"); +#endif /* IDETAPE_DEBUG_LOG */ + switch (cmd) { + case 0x0340: + if (copy_from_user ((char *) &config, (char *) arg, sizeof (idetape_config_t))) + return -EFAULT; + tape->best_dsc_rw_frequency = config.dsc_rw_frequency; + tape->max_stages = config.nr_stages; + break; + case 0x0350: + config.dsc_rw_frequency = (int) tape->best_dsc_rw_frequency; + config.nr_stages = tape->max_stages; + if (copy_to_user ((char *) arg, (char *) &config, sizeof (idetape_config_t))) + return -EFAULT; + break; + default: + return -EIO; + } + return 0; } /* - * idetape_swap_long converts from little endian to big endian format. + * The block device interface should not be used for data transfers. + * However, we still allow opening it so that we can issue general + * ide driver configuration ioctl's, such as the interrupt unmask feature. */ - -unsigned long idetape_swap_long (unsigned long temp) - +static int idetape_blkdev_open (struct inode *inode, struct file *filp, ide_drive_t *drive) { - union convert { - unsigned all :32; - struct { - unsigned b1 :8; - unsigned b2 :8; - unsigned b3 :8; - unsigned b4 :8; - } b; - } original,converted; - - original.all=temp; - converted.b.b1=original.b.b4; - converted.b.b2=original.b.b3; - converted.b.b3=original.b.b2; - converted.b.b4=original.b.b1; - return (converted.all); + MOD_INC_USE_COUNT; + return 0; } +static void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + MOD_DEC_USE_COUNT; +} /* - * idetape_next_pc_storage returns a pointer to a place in which we can - * safely store a packet command, even though we intend to leave the - * driver. A storage space for a maximum of IDETAPE_PC_STACK packet - * commands is allocated at initialization time. + * idetape_pre_reset is called before an ATAPI/ATA software reset. */ - -idetape_packet_command_t *idetape_next_pc_storage (ide_drive_t *drive) - +static void idetape_pre_reset (ide_drive_t *drive) { - idetape_tape_t *tape; - - tape=&(drive->tape); -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: pc_stack_index=%d\n",tape->pc_stack_index); -#endif /* IDETAPE_DEBUG_LOG */ - if (tape->pc_stack_index==IDETAPE_PC_STACK) - tape->pc_stack_index=0; - return (&(tape->pc_stack [tape->pc_stack_index++])); + idetape_tape_t *tape = drive->driver_data; + if (tape != NULL) + set_bit (IDETAPE_IGNORE_DSC, &tape->flags); } /* - * idetape_next_rq_storage is used along with idetape_next_pc_storage. - * Since we queue packet commands in the request queue, we need to - * allocate a request, along with the allocation of a packet command. + * Character device interface functions */ - -/************************************************************** - * * - * This should get fixed to use kmalloc(GFP_ATOMIC, ..) * - * followed later on by kfree(). -ml * - * * - **************************************************************/ - -struct request *idetape_next_rq_storage (ide_drive_t *drive) - +static ide_drive_t *get_drive_ptr (kdev_t i_rdev) { - idetape_tape_t *tape; + unsigned int i = MINOR(i_rdev) & ~0x80; - tape=&(drive->tape); - -#if IDETAPE_DEBUG_LOG - printk ("ide-tape: rq_stack_index=%d\n",tape->rq_stack_index); -#endif /* IDETAPE_DEBUG_LOG */ - if (tape->rq_stack_index==IDETAPE_PC_STACK) - tape->rq_stack_index=0; - return (&(tape->rq_stack [tape->rq_stack_index++])); + if (i >= MAX_HWIFS * MAX_DRIVES) + return NULL; + return (idetape_chrdevs[i].drive); } /* - * Block device interface functions + * idetape_space_over_filemarks is now a bit more complicated than just + * passing the command to the tape since we may have crossed some + * filemarks during our pipelined read-ahead mode. * - * The block device interface should not be used for data transfers. - * However, we still allow opening it so that we can issue general - * ide driver configuration ioctl's, such as the interrupt unmask feature. + * As a minor side effect, the pipeline enables us to support MTFSFM when + * the filemark is in our internal pipeline even if the tape doesn't + * support spacing over filemarks in the reverse direction. */ - -int idetape_blkdev_open (struct inode *inode, struct file *filp, ide_drive_t *drive) - +static int idetape_space_over_filemarks (ide_drive_t *drive,short mt_op,int mt_count) { - idetape_tape_t *tape=&(drive->tape); + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t pc; unsigned long flags; - - save_flags (flags);cli (); - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_blkdev_open\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - if (tape->busy) { - restore_flags (flags); /* Allowing access only through one */ - return (-EBUSY); /* one file descriptor */ - } - - tape->busy=1; - restore_flags (flags); - - return (0); -} + int retval,count=0; -void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t *drive) + if (tape->chrdev_direction == idetape_direction_read) { -{ - idetape_tape_t *tape=&(drive->tape); - unsigned long flags; - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_blkdev_release\n"); -#endif /* IDETAPE_DEBUG_LOG */ + /* + * We have a read-ahead buffer. Scan it for crossed + * filemarks. + */ + tape->merge_stage_size = 0; + clear_bit (IDETAPE_FILEMARK, &tape->flags); + while (tape->first_stage != NULL) { + /* + * Wait until the first read-ahead request + * is serviced. + */ + save_flags (flags); + cli (); + if (tape->active_stage == tape->first_stage) + idetape_wait_for_request (tape->active_data_request); + restore_flags (flags); - save_flags (flags);cli (); - tape->busy=0; - restore_flags (flags); + if (tape->first_stage->rq.errors == IDETAPE_ERROR_FILEMARK) + count++; + if (count == mt_count) { + switch (mt_op) { + case MTFSF: + idetape_remove_stage_head (drive); + case MTFSFM: + return (0); + default: + break; + } + } + idetape_remove_stage_head (drive); + } + idetape_discard_read_pipeline (drive); + } - return; + /* + * The filemark was not found in our internal pipeline. + * Now we can issue the space command. + */ + switch (mt_op) { + case MTFSF: + idetape_create_space_cmd (&pc,mt_count-count,IDETAPE_SPACE_OVER_FILEMARK); + return (idetape_queue_pc_tail (drive,&pc)); + case MTFSFM: + if (!tape->capabilities.sprev) + return (-EIO); + retval = idetape_space_over_filemarks (drive, MTFSF, mt_count-count); + if (retval) return (retval); + return (idetape_space_over_filemarks (drive, MTBSF, 1)); + case MTBSF: + if (!tape->capabilities.sprev) + return (-EIO); + idetape_create_space_cmd (&pc,-(mt_count+count),IDETAPE_SPACE_OVER_FILEMARK); + return (idetape_queue_pc_tail (drive,&pc)); + case MTBSFM: + if (!tape->capabilities.sprev) + return (-EIO); + retval = idetape_space_over_filemarks (drive, MTBSF, mt_count+count); + if (retval) return (retval); + return (idetape_space_over_filemarks (drive, MTFSF, 1)); + default: + printk (KERN_ERR "ide-tape: MTIO operation %d not supported\n",mt_op); + return (-EIO); + } } -/* - * Character device interface functions - */ /* * Our character device read / write functions. @@ -3667,10 +2936,6 @@ * an integral number of the "continuous transfer limit", which is * a parameter of the specific tape (26 KB on my particular tape). * - * For best results use an integral number of the tape's parameter - * (which is displayed in the driver installation stage and is returned - * by the MTIOCGET ioctl). - * * As of version 1.3 of the driver, the character device provides an * abstract continuous view of the media - any mix of block sizes (even 1 * byte) on the same backup/restore procedure is supported. The driver @@ -3679,226 +2944,150 @@ * size will only result in a (slightly) increased driver overhead, but * will no longer hit performance. */ - -int idetape_chrdev_read (struct inode *inode, struct file *file, char *buf, int count) - +static long idetape_chrdev_read (struct inode *inode, struct file *file, char *buf, unsigned long count) { - ide_drive_t *drive=idetape_chrdev.drive; - idetape_tape_t *tape=&(drive->tape); - char *buf_ptr=buf; - int bytes_read,temp,actually_read=0; + ide_drive_t *drive = get_drive_ptr (inode->i_rdev); + idetape_tape_t *tape = drive->driver_data; + int bytes_read,temp,actually_read=0, original_count = count; #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_chrdev_read\n"); + printk (KERN_INFO "Reached idetape_chrdev_read\n"); #endif /* IDETAPE_DEBUG_LOG */ - + if (tape->chrdev_direction != idetape_direction_read) { /* Initialize read operation */ if (tape->chrdev_direction == idetape_direction_write) { idetape_empty_write_pipeline (drive); idetape_flush_tape_buffers (drive); } - +#if IDETAPE_DEBUG_BUGS + if (tape->merge_stage || tape->merge_stage_size) { + printk (KERN_ERR "ide-tape: merge_stage_size should be 0 now\n"); + tape->merge_stage_size = 0; + } +#endif /* IDETAPE_DEBUG_BUGS */ + if ((tape->merge_stage = __idetape_kmalloc_stage (tape)) == NULL) + return -ENOMEM; + tape->chrdev_direction = idetape_direction_read; + /* * Issue a read 0 command to ensure that DSC handshake * is switched from completion mode to buffer available * mode. */ - - bytes_read=idetape_queue_rw_tail (drive,IDETAPE_READ_REQUEST,0,tape->merge_buffer); - if (bytes_read < 0) - return (bytes_read); - - tape->chrdev_direction=idetape_direction_read; + bytes_read = idetape_queue_rw_tail (drive, IDETAPE_READ_RQ, 0, tape->merge_stage->bh); + if (bytes_read < 0) { + kfree (tape->merge_stage); + tape->merge_stage = NULL; + tape->chrdev_direction = idetape_direction_none; + return bytes_read; + } + if (test_bit (IDETAPE_DETECT_BS, &tape->flags)) + if (count > tape->tape_block_size && (count % tape->tape_block_size) == 0) + tape->user_bs_factor = count / tape->tape_block_size; } - if (count==0) return (0); - - if (tape->merge_buffer_size) { -#if IDETAPE_DEBUG_BUGS - if (tape->merge_buffer_offset+tape->merge_buffer_size > tape->data_buffer_size) { - printk ("ide-tape: bug: merge buffer too big\n"); - tape->merge_buffer_offset=0;tape->merge_buffer_size=tape->data_buffer_size-1; - } -#endif /* IDETAPE_DEBUG_BUGS */ - actually_read=IDETAPE_MIN (tape->merge_buffer_size,count); - copy_to_user (buf_ptr,tape->merge_buffer+tape->merge_buffer_offset,actually_read); - buf_ptr+=actually_read;tape->merge_buffer_size-=actually_read; - count-=actually_read;tape->merge_buffer_offset+=actually_read; + if (tape->merge_stage_size) { + actually_read=IDETAPE_MIN (tape->merge_stage_size,count); + idetape_copy_stage_to_user (tape, buf, tape->merge_stage, actually_read); + buf += actually_read; tape->merge_stage_size -= actually_read; count-=actually_read; } - - while (count >= tape->data_buffer_size) { - bytes_read=idetape_add_chrdev_read_request (drive,tape->capabilities.ctl,tape->merge_buffer); + while (count >= tape->stage_size) { + bytes_read=idetape_add_chrdev_read_request (drive, tape->capabilities.ctl); if (bytes_read <= 0) - return (actually_read); - copy_to_user (buf_ptr,tape->merge_buffer,bytes_read); - buf_ptr+=bytes_read;count-=bytes_read;actually_read+=bytes_read; + goto finish; + idetape_copy_stage_to_user (tape, buf, tape->merge_stage, bytes_read); + buf += bytes_read; count -= bytes_read; actually_read += bytes_read; } - if (count) { - bytes_read=idetape_add_chrdev_read_request (drive,tape->capabilities.ctl,tape->merge_buffer); + bytes_read=idetape_add_chrdev_read_request (drive, tape->capabilities.ctl); if (bytes_read <= 0) - return (actually_read); + goto finish; temp=IDETAPE_MIN (count,bytes_read); - copy_to_user (buf_ptr,tape->merge_buffer,temp); + idetape_copy_stage_to_user (tape, buf, tape->merge_stage, temp); actually_read+=temp; - tape->merge_buffer_offset=temp; - tape->merge_buffer_size=bytes_read-temp; + tape->merge_stage_size=bytes_read-temp; } +finish: + if (actually_read < original_count && test_bit (IDETAPE_FILEMARK, &tape->flags)) + idetape_space_over_filemarks (drive, MTFSF, 1); return (actually_read); } -int idetape_chrdev_write (struct inode *inode, struct file *file, const char *buf, int count) - +static long idetape_chrdev_write (struct inode *inode, struct file *file, const char *buf, unsigned long count) { - ide_drive_t *drive=idetape_chrdev.drive; - idetape_tape_t *tape=&(drive->tape); - const char *buf_ptr=buf; + ide_drive_t *drive = get_drive_ptr (inode->i_rdev); + idetape_tape_t *tape = drive->driver_data; int retval,actually_written=0; #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_chrdev_write\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - if (tape->chrdev_direction != idetape_direction_write) { /* Initialize write operation */ - if (tape->chrdev_direction == idetape_direction_read) - idetape_discard_read_pipeline (drive); - - /* - * Issue a write 0 command to ensure that DSC handshake - * is switched from completion mode to buffer available - * mode. - */ - - retval=idetape_queue_rw_tail (drive,IDETAPE_WRITE_REQUEST,0,tape->merge_buffer); - if (retval < 0) - return (retval); - - tape->chrdev_direction=idetape_direction_write; - } - - if (count==0) - return (0); - - if (tape->merge_buffer_size) { -#if IDETAPE_DEBUG_BUGS - if (tape->merge_buffer_size >= tape->data_buffer_size) { - printk ("ide-tape: bug: merge buffer too big\n"); - tape->merge_buffer_size=0; - } -#endif /* IDETAPE_DEBUG_BUGS */ - - actually_written=IDETAPE_MIN (tape->data_buffer_size-tape->merge_buffer_size,count); - copy_from_user (tape->merge_buffer+tape->merge_buffer_size,buf_ptr,actually_written); - buf_ptr+=actually_written;tape->merge_buffer_size+=actually_written;count-=actually_written; - - if (tape->merge_buffer_size == tape->data_buffer_size) { - tape->merge_buffer_size=0; - retval=idetape_add_chrdev_write_request (drive,tape->capabilities.ctl,tape->merge_buffer); - if (retval <= 0) - return (retval); - } - } - - while (count >= tape->data_buffer_size) { - copy_from_user (tape->merge_buffer,buf_ptr,tape->data_buffer_size); - buf_ptr+=tape->data_buffer_size;count-=tape->data_buffer_size; - retval=idetape_add_chrdev_write_request (drive,tape->capabilities.ctl,tape->merge_buffer); - actually_written+=tape->data_buffer_size; - if (retval <= 0) - return (retval); - } - - if (count) { - actually_written+=count; - copy_from_user (tape->merge_buffer,buf_ptr,count); - tape->merge_buffer_size+=count; - } - return (actually_written); -} - -/* - * Our character device ioctls. - * - * General mtio.h magnetic io commands are supported here, and not in - * the corresponding block interface. - * - * The following ioctls are supported: - * - * MTIOCTOP - Refer to idetape_mtioctop for detailed description. - * - * MTIOCGET - The mt_dsreg field in the returned mtget structure - * will be set to (recommended block size << - * MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK, which - * is currently equal to the size itself. - * The other mtget fields are not supported. - * - * Note that we do not actually return the tape's - * block size. Rather, we provide the recommended - * number of bytes which should be used as a "user - * block size" with the character device read/write - * functions to maximize throughput. - * - * MTIOCPOS - The current tape "position" is returned. - * (A unique number which can be used with the MTSEEK - * operation to return to this position in some - * future time, provided this place was not overwritten - * meanwhile). - * - * Our own ide-tape ioctls are supported on both interfaces. - */ - -int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) - -{ - ide_drive_t *drive=idetape_chrdev.drive; - idetape_tape_t *tape=&(drive->tape); - idetape_packet_command_t pc; - struct mtop mtop; - struct mtget mtget; - struct mtpos mtpos; - int retval; - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_chrdev_ioctl, cmd=%u\n",cmd); + printk (KERN_INFO "Reached idetape_chrdev_write\n"); #endif /* IDETAPE_DEBUG_LOG */ - if (tape->chrdev_direction == idetape_direction_write) { - idetape_empty_write_pipeline (drive); - idetape_flush_tape_buffers (drive); - } + if (tape->chrdev_direction != idetape_direction_write) { /* Initialize write operation */ + if (tape->chrdev_direction == idetape_direction_read) + idetape_discard_read_pipeline (drive); +#if IDETAPE_DEBUG_BUGS + if (tape->merge_stage || tape->merge_stage_size) { + printk (KERN_ERR "ide-tape: merge_stage_size should be 0 now\n"); + tape->merge_stage_size = 0; + } +#endif /* IDETAPE_DEBUG_BUGS */ + if ((tape->merge_stage = __idetape_kmalloc_stage (tape)) == NULL) + return -ENOMEM; + tape->chrdev_direction = idetape_direction_write; + idetape_init_merge_stage (tape); - if (tape->chrdev_direction == idetape_direction_read && cmd != MTIOCTOP) - idetape_discard_read_pipeline (drive); - - pc.buffer=pc.temp_buffer; - pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc.current_position=pc.temp_buffer; + /* + * Issue a write 0 command to ensure that DSC handshake + * is switched from completion mode to buffer available + * mode. + */ + retval = idetape_queue_rw_tail (drive, IDETAPE_WRITE_RQ, 0, tape->merge_stage->bh); + if (retval < 0) { + kfree (tape->merge_stage); + tape->merge_stage = NULL; + tape->chrdev_direction = idetape_direction_none; + return retval; + } + if (test_bit (IDETAPE_DETECT_BS, &tape->flags)) + if (count > tape->tape_block_size && (count % tape->tape_block_size) == 0) + tape->user_bs_factor = count / tape->tape_block_size; + } + if (count==0) + return (0); + if (tape->merge_stage_size) { +#if IDETAPE_DEBUG_BUGS + if (tape->merge_stage_size >= tape->stage_size) { + printk (KERN_ERR "ide-tape: bug: merge buffer too big\n"); + tape->merge_stage_size=0; + } +#endif /* IDETAPE_DEBUG_BUGS */ + actually_written=IDETAPE_MIN (tape->stage_size-tape->merge_stage_size,count); + idetape_copy_stage_from_user (tape, tape->merge_stage, buf, actually_written); + buf+=actually_written;tape->merge_stage_size+=actually_written;count-=actually_written; - switch (cmd) { - case MTIOCTOP: - retval=verify_area (VERIFY_READ,(char *) arg,sizeof (struct mtop)); - if (retval) return (retval); - copy_from_user ((char *) &mtop, (char *) arg, sizeof (struct mtop)); - return (idetape_mtioctop (drive,mtop.mt_op,mtop.mt_count)); - case MTIOCGET: - mtget.mt_dsreg=(tape->data_buffer_size << MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK; - retval=verify_area (VERIFY_WRITE,(char *) arg,sizeof (struct mtget)); - if (retval) return (retval); - copy_to_user ((char *) arg,(char *) &mtget, sizeof (struct mtget)); - return (0); - case MTIOCPOS: - idetape_create_read_position_cmd (&pc); - retval=idetape_queue_pc_tail (drive,&pc); - if (retval) return (retval); - mtpos.mt_blkno=tape->block_address; - retval=verify_area (VERIFY_WRITE,(char *) arg,sizeof (struct mtpos)); - if (retval) return (retval); - copy_to_user ((char *) arg,(char *) &mtpos, sizeof (struct mtpos)); - return (0); - default: - return (idetape_blkdev_ioctl (drive,inode,file,cmd,arg)); + if (tape->merge_stage_size == tape->stage_size) { + tape->merge_stage_size = 0; + retval=idetape_add_chrdev_write_request (drive, tape->capabilities.ctl); + if (retval <= 0) + return (retval); + } + } + while (count >= tape->stage_size) { + idetape_copy_stage_from_user (tape, tape->merge_stage, buf, tape->stage_size); + buf+=tape->stage_size;count-=tape->stage_size; + retval=idetape_add_chrdev_write_request (drive, tape->capabilities.ctl); + actually_written+=tape->stage_size; + if (retval <= 0) + return (retval); + } + if (count) { + actually_written+=count; + idetape_copy_stage_from_user (tape, tape->merge_stage, buf, count); + tape->merge_stage_size+=count; } + return (actually_written); } /* @@ -3918,7 +3107,6 @@ * * MTBSFM - Like MTBSF, only tape is positioned after the last filemark. * - * * Note: * * MTBSF and MTBSFM are not supported when the tape doesn't @@ -3932,8 +3120,10 @@ * * MTREW - Rewinds tape. * + * MTLOAD - Loads the tape. + * * MTOFFL - Puts the tape drive "Offline": Rewinds the tape and - * prevents further access until the media is replaced. + * MTUNLOAD prevents further access until the media is replaced. * * MTNOP - Flushes tape buffers. * @@ -3944,35 +3134,33 @@ * * MTERASE - Erases tape. * - * MTSEEK - Positions the tape in a specific block number, which - * was previously received using the MTIOCPOS ioctl, - * assuming this place was not overwritten meanwhile. + * MTSETBLK - Sets the user block size to mt_count bytes. If + * mt_count is 0, we will attempt to autodetect + * the block size. + * + * MTSEEK - Positions the tape in a specific block number, where + * each block is assumed to contain which user_block_size + * bytes. + * + * MTSETPART - Switches to another tape partition. * * The following commands are currently not supported: * - * MTFSR, MTBSR, MTFSS, MTBSS, MTWSM, MTSETBLK, MTSETDENSITY, + * MTFSR, MTBSR, MTFSS, MTBSS, MTWSM, MTSETDENSITY, * MTSETDRVBUFFER, MT_ST_BOOLEANS, MT_ST_WRITE_THRESHOLD. */ - -int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count) - +static int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count) { - idetape_tape_t *tape=&(drive->tape); - idetape_packet_command_t pc; + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t pc; int i,retval; - pc.buffer=pc.temp_buffer; - pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc.current_position=pc.temp_buffer; - #if IDETAPE_DEBUG_LOG - printk ("Handling MTIOCTOP ioctl: mt_op=%d, mt_count=%d\n",mt_op,mt_count); + printk (KERN_INFO "Handling MTIOCTOP ioctl: mt_op=%d, mt_count=%d\n",mt_op,mt_count); #endif /* IDETAPE_DEBUG_LOG */ - /* * Commands which need our pipelined read-ahead stages. */ - switch (mt_op) { case MTFSF: case MTFSFM: @@ -3988,7 +3176,6 @@ /* * Empty the pipeline. */ - if (tape->chrdev_direction == idetape_direction_read) idetape_discard_read_pipeline (drive); @@ -4002,6 +3189,10 @@ return (0); case MTREW: return (idetape_rewind_tape (drive)); + case MTLOAD: + idetape_create_load_unload_cmd (&pc, IDETAPE_LU_LOAD_MASK); + return (idetape_queue_pc_tail (drive,&pc)); + case MTUNLOAD: case MTOFFL: idetape_create_load_unload_cmd (&pc,!IDETAPE_LU_LOAD_MASK); return (idetape_queue_pc_tail (drive,&pc)); @@ -4014,607 +3205,582 @@ idetape_create_space_cmd (&pc,0,IDETAPE_SPACE_TO_EOD); return (idetape_queue_pc_tail (drive,&pc)); case MTERASE: - retval=idetape_rewind_tape (drive); - if (retval) return (retval); + (void) idetape_rewind_tape (drive); idetape_create_erase_cmd (&pc); return (idetape_queue_pc_tail (drive,&pc)); + case MTSETBLK: + if (mt_count) { + if (mt_count < tape->tape_block_size || mt_count % tape->tape_block_size) + return -EIO; + tape->user_bs_factor = mt_count / tape->tape_block_size; + clear_bit (IDETAPE_DETECT_BS, &tape->flags); + } else + set_bit (IDETAPE_DETECT_BS, &tape->flags); + return 0; case MTSEEK: - return (idetape_position_tape (drive,mt_count)); + return (idetape_position_tape (drive, mt_count * tape->user_bs_factor, tape->partition)); + case MTSETPART: + return (idetape_position_tape (drive, 0, mt_count)); default: - printk ("ide-tape: MTIO operation %d not supported\n",mt_op); + printk (KERN_ERR "ide-tape: MTIO operation %d not supported\n",mt_op); return (-EIO); } } /* - * idetape_space_over_filemarks is now a bit more complicated than just - * passing the command to the tape since we may have crossed some - * filemarks during our pipelined read-ahead mode. + * Our character device ioctls. * - * As a minor side effect, the pipeline enables us to support MTFSFM when - * the filemark is in our internal pipeline even if the tape doesn't - * support spacing over filemarks in the reverse direction. - */ - -int idetape_space_over_filemarks (ide_drive_t *drive,short mt_op,int mt_count) - -{ - idetape_tape_t *tape=&(drive->tape); - idetape_packet_command_t pc; - unsigned long flags; - int retval,count=0,errors; - - if (tape->chrdev_direction == idetape_direction_read) { - - /* - * We have a read-ahead buffer. Scan it for crossed - * filemarks. - */ - - tape->merge_buffer_size=tape->merge_buffer_offset=0; - while (tape->first_stage != NULL) { - - /* - * Wait until the first read-ahead request - * is serviced. - */ - - save_flags (flags);cli (); - if (tape->active_data_request == &(tape->first_stage->rq)) - idetape_wait_for_request (tape->active_data_request); - restore_flags (flags); - - errors=tape->first_stage->rq.errors; - if (errors == IDETAPE_RQ_ERROR_FILEMARK) - count++; - - if (count == mt_count) { - switch (mt_op) { - case MTFSF: - idetape_remove_stage_head (drive); - case MTFSFM: - return (0); - } - } - idetape_remove_stage_head (drive); - } - idetape_discard_read_pipeline (drive); - } - - /* - * The filemark was not found in our internal pipeline. - * Now we can issue the space command. - */ - - pc.buffer=pc.temp_buffer; - pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc.current_position=pc.temp_buffer; - - switch (mt_op) { - case MTFSF: - idetape_create_space_cmd (&pc,mt_count-count,IDETAPE_SPACE_OVER_FILEMARK); - return (idetape_queue_pc_tail (drive,&pc)); - case MTFSFM: - if (!tape->capabilities.sprev) - return (-EIO); - retval=idetape_mtioctop (drive,MTFSF,mt_count-count); - if (retval) return (retval); - return (idetape_mtioctop (drive,MTBSF,1)); - case MTBSF: - if (!tape->capabilities.sprev) - return (-EIO); - idetape_create_space_cmd (&pc,-(mt_count+count),IDETAPE_SPACE_OVER_FILEMARK); - return (idetape_queue_pc_tail (drive,&pc)); - case MTBSFM: - if (!tape->capabilities.sprev) - return (-EIO); - retval=idetape_mtioctop (drive,MTBSF,mt_count+count); - if (retval) return (retval); - return (idetape_mtioctop (drive,MTFSF,1)); - default: - printk ("ide-tape: MTIO operation %d not supported\n",mt_op); - return (-EIO); - } -} - -/* - * Our character device open function. - */ - -int idetape_chrdev_open (struct inode *inode, struct file *filp) - -{ - ide_drive_t *drive=idetape_chrdev.drive; - idetape_tape_t *tape=&(drive->tape); - unsigned long flags; - unsigned int minor=MINOR (inode->i_rdev),allocation_length; - - save_flags (flags);cli (); - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_chrdev_open\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - if (minor!=0 && minor!=128) { /* Currently supporting only one */ - restore_flags (flags); /* tape drive */ - return (-ENXIO); - } - - if (tape->busy) { - restore_flags (flags); /* Allowing access only through one */ - return (-EBUSY); /* one file descriptor */ - } - - tape->busy=1; - restore_flags (flags); - - allocation_length=tape->data_buffer_size; - if (tape->data_buffer_size % IDETAPE_ALLOCATION_BLOCK) - allocation_length+=IDETAPE_ALLOCATION_BLOCK; - -#if IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE - if (tape->data_buffer == NULL) - tape->data_buffer=kmalloc (allocation_length,GFP_KERNEL); - if (tape->data_buffer == NULL) - goto sorry; - if (tape->merge_buffer == NULL) - tape->merge_buffer=kmalloc (allocation_length,GFP_KERNEL); - if (tape->merge_buffer == NULL) { - kfree (tape->data_buffer); - sorry: - printk ("ide-tape: FATAL - Can not allocate continuous buffer of %d bytes\n",allocation_length); - tape->busy=0; - return (-EIO); - } -#endif /* IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE */ - - if (!tape->block_address_valid) { - if (idetape_rewind_tape (drive)) { - printk ("ide-tape: Rewinding tape failed\n"); - tape->busy=0; - return (-EIO); - } - } - - return (0); -} - -/* - * Our character device release function. - */ - -void idetape_chrdev_release (struct inode *inode, struct file *filp) - -{ - ide_drive_t *drive=idetape_chrdev.drive; - idetape_tape_t *tape=&(drive->tape); - unsigned int minor=MINOR (inode->i_rdev); - idetape_packet_command_t pc; - unsigned long flags; - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_chrdev_release\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - if (tape->chrdev_direction == idetape_direction_write) { - idetape_empty_write_pipeline (drive); - idetape_create_write_filemark_cmd (&pc,1); /* Write a filemark */ - if (idetape_queue_pc_tail (drive,&pc)) - printk ("ide-tape: Couldn't write a filemark\n"); - } - - if (tape->chrdev_direction == idetape_direction_read) { - if (minor < 128) - idetape_discard_read_pipeline (drive); - else - idetape_wait_for_pipeline (drive); - } - - if (minor < 128) - if (idetape_rewind_tape (drive)) - printk ("ide-tape: Rewinding tape failed\n"); - -#if IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE - kfree (tape->data_buffer); - tape->data_buffer=NULL; - if (!tape->merge_buffer_size) { - kfree (tape->merge_buffer); - tape->merge_buffer=NULL; - } -#endif /* IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE */ - - save_flags (flags);cli (); - tape->busy=0; - restore_flags (flags); - - return; -} - -/* - * idetape_position_tape positions the tape to the requested block - * using the LOCATE packet command. A READ POSITION command is then - * issued to check where we are positioned. + * General mtio.h magnetic io commands are supported here, and not in + * the corresponding block interface. * - * Like all higher level operations, we queue the commands at the tail - * of the request queue and wait for their completion. - * - */ - -int idetape_position_tape (ide_drive_t *drive,unsigned long block) - -{ - int retval; - idetape_packet_command_t pc; - - idetape_create_locate_cmd (&pc,block,0); - retval=idetape_queue_pc_tail (drive,&pc); - if (retval!=0) return (retval); - - idetape_create_read_position_cmd (&pc); - pc.buffer=pc.temp_buffer; - pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc.current_position=pc.temp_buffer; - return (idetape_queue_pc_tail (drive,&pc)); -} - -/* - * Rewinds the tape to the Beginning Of the current Partition (BOP). + * The following ioctls are supported: * - * We currently support only one partition. - */ - -int idetape_rewind_tape (ide_drive_t *drive) - -{ - int retval; - idetape_packet_command_t pc; -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_rewind_tape\n"); -#endif /* IDETAPE_DEBUG_LOG */ - - idetape_create_rewind_cmd (&pc); - retval=idetape_queue_pc_tail (drive,&pc); - if (retval) return (retval); - - idetape_create_read_position_cmd (&pc); - pc.buffer=pc.temp_buffer; - pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE; - pc.current_position=pc.temp_buffer; - return (idetape_queue_pc_tail (drive,&pc)); -} - -int idetape_flush_tape_buffers (ide_drive_t *drive) - -{ - idetape_packet_command_t pc; - - idetape_create_write_filemark_cmd (&pc,0); - return (idetape_queue_pc_tail (drive,&pc)); -} - -/* - * Pipeline related functions - */ - -/* - * idetape_kmalloc_stage uses kmalloc to allocate a pipeline stage, - * along with all the necessary small buffers which together make - * a buffer of size tape->data_buffer_size or a bit more, in case - * it is not a multiply of IDETAPE_ALLOCATION_BLOCK (it isn't ...). + * MTIOCTOP - Refer to idetape_mtioctop for detailed description. * - * Returns a pointer to the new allocated stage, or NULL if we - * can't (or don't want to, in case we already have too many stages) - * allocate a stage. + * MTIOCGET - The mt_dsreg field in the returned mtget structure + * will be set to (user block size in bytes << + * MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK. * - * Pipeline stages are optional and are used to increase performance. - * If we can't allocate them, we'll manage without them. + * The mt_blkno is set to the current user block number. + * The other mtget fields are not supported. + * + * MTIOCPOS - The current tape "block position" is returned. We + * assume that each block contains user_block_size + * bytes. + * + * Our own ide-tape ioctls are supported on both interfaces. */ - -idetape_pipeline_stage_t *idetape_kmalloc_stage (ide_drive_t *drive) - +static int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - idetape_tape_t *tape=&(drive->tape); - idetape_pipeline_stage_t *new_stage; - idetape_buffer_head_t *prev_bh,*bh; - int buffers_num,i; - + ide_drive_t *drive = get_drive_ptr (inode->i_rdev); + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t pc; + struct mtop mtop; + struct mtget mtget; + struct mtpos mtpos; + int retval, block_offset = 0; + #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_kmalloc_stage\n"); + printk (KERN_INFO "Reached idetape_chrdev_ioctl, cmd=%u\n",cmd); #endif /* IDETAPE_DEBUG_LOG */ - if (tape->current_number_of_stages>=tape->max_number_of_stages) { - return (NULL); + if (tape->chrdev_direction == idetape_direction_write) { + idetape_empty_write_pipeline (drive); + idetape_flush_tape_buffers (drive); } - - new_stage=(idetape_pipeline_stage_t *) kmalloc (sizeof (idetape_pipeline_stage_t),GFP_KERNEL); - if (new_stage==NULL) - return (NULL); - - new_stage->next=new_stage->prev=NULL; - - buffers_num=tape->data_buffer_size / IDETAPE_ALLOCATION_BLOCK; - if (tape->data_buffer_size % IDETAPE_ALLOCATION_BLOCK) - buffers_num++; - - prev_bh=new_stage->bh=(idetape_buffer_head_t *) kmalloc (sizeof (idetape_buffer_head_t),GFP_KERNEL); - if (new_stage->bh==NULL) { - idetape_kfree_stage (new_stage); - return (NULL); - } - new_stage->bh->next=NULL; - - new_stage->bh->data=kmalloc (IDETAPE_ALLOCATION_BLOCK,GFP_KERNEL); - if (new_stage->bh->data==NULL) { - idetape_kfree_stage (new_stage); - return (NULL); + if (cmd == MTIOCGET || cmd == MTIOCPOS) { + block_offset = idetape_pipeline_size (drive) / (tape->tape_block_size * tape->user_bs_factor); + idetape_create_read_position_cmd (&pc); + retval=idetape_queue_pc_tail (drive,&pc); + if (retval) return (retval); } - - for (i=1;inext=NULL; - prev_bh->next=bh; - bh->data=kmalloc (IDETAPE_ALLOCATION_BLOCK,GFP_KERNEL); - if (bh->data == NULL) { - idetape_kfree_stage (new_stage); - return (NULL); - } - prev_bh=bh; + switch (cmd) { + case MTIOCTOP: + if (copy_from_user ((char *) &mtop, (char *) arg, sizeof (struct mtop))) + return -EFAULT; + return (idetape_mtioctop (drive,mtop.mt_op,mtop.mt_count)); + case MTIOCGET: + memset (&mtget, 0, sizeof (struct mtget)); + mtget.mt_blkno = tape->block_address / tape->user_bs_factor - block_offset; + mtget.mt_dsreg = ((tape->tape_block_size * tape->user_bs_factor) << MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK; + if (copy_to_user ((char *) arg,(char *) &mtget, sizeof (struct mtget))) + return -EFAULT; + return 0; + case MTIOCPOS: + mtpos.mt_blkno = tape->block_address / tape->user_bs_factor - block_offset; + if (copy_to_user ((char *) arg,(char *) &mtpos, sizeof (struct mtpos))) + return -EFAULT; + return 0; + default: + if (tape->chrdev_direction == idetape_direction_read) + idetape_discard_read_pipeline (drive); + return (idetape_blkdev_ioctl (drive,inode,file,cmd,arg)); } - return (new_stage); } /* - * idetape_kfree_stage calls kfree to completely free a stage, along with - * its related buffers. + * Our character device open function. */ - -void idetape_kfree_stage (idetape_pipeline_stage_t *stage) - +static int idetape_chrdev_open (struct inode *inode, struct file *filp) { - idetape_buffer_head_t *prev_bh,*bh; - - if (stage == NULL) - return; - + ide_drive_t *drive; + idetape_tape_t *tape; + idetape_pc_t pc; + #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_kfree_stage\n"); + printk (KERN_INFO "Reached idetape_chrdev_open\n"); #endif /* IDETAPE_DEBUG_LOG */ - bh=stage->bh; - - while (bh != NULL) { - prev_bh=bh; - if (bh->data != NULL) - kfree (bh->data); - bh=bh->next; - kfree (prev_bh); - } - - kfree (stage); - return; + if ((drive = get_drive_ptr (inode->i_rdev)) == NULL) + return -ENXIO; + tape = drive->driver_data; + + if (set_bit (IDETAPE_BUSY, &tape->flags)) + return -EBUSY; + MOD_INC_USE_COUNT; + idetape_create_read_position_cmd (&pc); + (void) idetape_queue_pc_tail (drive,&pc); + if (!test_bit (IDETAPE_ADDRESS_VALID, &tape->flags)) + (void) idetape_rewind_tape (drive); + MOD_DEC_USE_COUNT; + + if (tape->chrdev_direction == idetape_direction_none) + MOD_INC_USE_COUNT; + return 0; } /* - * idetape_copy_buffer_from_stage and idetape_copy_buffer_to_stage - * copy data from/to the small buffers into/from a continuous buffer. + * Our character device release function. */ - -void idetape_copy_buffer_from_stage (idetape_pipeline_stage_t *stage,char *buffer) - +static void idetape_chrdev_release (struct inode *inode, struct file *filp) { - idetape_buffer_head_t *bh; - char *ptr; - + ide_drive_t *drive = get_drive_ptr (inode->i_rdev); + idetape_tape_t *tape = drive->driver_data; + unsigned int minor=MINOR (inode->i_rdev); + idetape_pc_t pc; + #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_copy_buffer_from_stage\n"); + printk (KERN_INFO "Reached idetape_chrdev_release\n"); #endif /* IDETAPE_DEBUG_LOG */ -#if IDETAPE_DEBUG_BUGS - if (buffer == NULL) { - printk ("ide-tape: bug: buffer is null in copy_buffer_from_stage\n"); - return; - } -#endif /* IDETAPE_DEBUG_BUGS */ - - ptr=buffer; - bh=stage->bh; - - while (bh != NULL) { -#if IDETAPE_DEBUG_BUGS - if (bh->data == NULL) { - printk ("ide-tape: bug: bh->data is null\n"); - return; + + if (tape->chrdev_direction == idetape_direction_write) { + idetape_empty_write_pipeline (drive); + tape->merge_stage = __idetape_kmalloc_stage (tape); + if (tape->merge_stage != NULL) { + idetape_pad_zeros (drive, tape->tape_block_size * (tape->user_bs_factor - 1)); + __idetape_kfree_stage (tape->merge_stage); + tape->merge_stage = NULL; } -#endif /* IDETAPE_DEBUG_BUGS */ - memcpy (ptr,bh->data,IDETAPE_ALLOCATION_BLOCK); - bh=bh->next; - ptr=ptr+IDETAPE_ALLOCATION_BLOCK; + idetape_create_write_filemark_cmd (&pc,1); /* Write a filemark */ + if (idetape_queue_pc_tail (drive,&pc)) + printk (KERN_ERR "ide-tape: Couldn't write a filemark\n"); + } + if (tape->chrdev_direction == idetape_direction_read) { + if (minor < 128) + idetape_discard_read_pipeline (drive); + else + idetape_wait_for_pipeline (drive); } - return; + if (tape->cache_stage != NULL) { + __idetape_kfree_stage (tape->cache_stage); + tape->cache_stage = NULL; + } + if (minor < 128) + (void) idetape_rewind_tape (drive); + + clear_bit (IDETAPE_BUSY, &tape->flags); + if (tape->chrdev_direction == idetape_direction_none) + MOD_DEC_USE_COUNT; } /* - * Here we copy a continuous data buffer to the various small buffers - * in the pipeline stage. + * idetape_identify_device is called to check the contents of the + * ATAPI IDENTIFY command results. We return: + * + * 1 If the tape can be supported by us, based on the information + * we have so far. + * + * 0 If this tape driver is not currently supported by us. */ - -void idetape_copy_buffer_to_stage (idetape_pipeline_stage_t *stage,char *buffer) - +static int idetape_identify_device (ide_drive_t *drive,struct hd_driveid *id) { - idetape_buffer_head_t *bh; - char *ptr; - + struct idetape_id_gcw gcw; #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_copy_buffer_to_stage\n"); + unsigned short mask,i; #endif /* IDETAPE_DEBUG_LOG */ -#if IDETAPE_DEBUG_BUGS - if (buffer == NULL) { - printk ("ide-tape: bug: buffer is null in copy_buffer_to_stage\n"); - return; - } -#endif /* IDETAPE_DEBUG_BUGS */ - - ptr=buffer; - bh=stage->bh; - - while (bh != NULL) { -#if IDETAPE_DEBUG_BUGS - if (bh->data == NULL) { - printk ("ide-tape: bug: bh->data is null\n"); - return; - } -#endif /* IDETAPE_DEBUG_BUGS */ - memcpy (bh->data,ptr,IDETAPE_ALLOCATION_BLOCK); - bh=bh->next; - ptr=ptr+IDETAPE_ALLOCATION_BLOCK; - } - return; -} - -/* - * idetape_increase_max_pipeline_stages is a part of the feedback - * loop which tries to find the optimum number of stages. In the - * feedback loop, we are starting from a minimum maximum number of - * stages, and if we sense that the pipeline is empty, we try to - * increase it, until we reach the user compile time memory limit. - */ -void idetape_increase_max_pipeline_stages (ide_drive_t *drive) + *((unsigned short *) &gcw) = id->config; -{ - idetape_tape_t *tape=&(drive->tape); - #if IDETAPE_DEBUG_LOG - printk ("Reached idetape_increase_max_pipeline_stages\n"); -#endif /* IDETAPE_DEBUG_LOG */ + printk (KERN_INFO "Dumping ATAPI Identify Device tape parameters\n"); + printk (KERN_INFO "Protocol Type: "); + switch (gcw.protocol) { + case 0: case 1: printk (KERN_INFO "ATA\n");break; + case 2: printk (KERN_INFO "ATAPI\n");break; + case 3: printk (KERN_INFO "Reserved (Unknown to ide-tape)\n");break; + } + printk (KERN_INFO "Device Type: %x - ",gcw.device_type); + switch (gcw.device_type) { + case 0: printk (KERN_INFO "Direct-access Device\n");break; + case 1: printk (KERN_INFO "Streaming Tape Device\n");break; + case 2: case 3: case 4: printk (KERN_INFO "Reserved\n");break; + case 5: printk (KERN_INFO "CD-ROM Device\n");break; + case 6: printk (KERN_INFO "Reserved\n"); + case 7: printk (KERN_INFO "Optical memory Device\n");break; + case 0x1f: printk (KERN_INFO "Unknown or no Device type\n");break; + default: printk (KERN_INFO "Reserved\n"); + } + printk (KERN_INFO "Removable: %s",gcw.removable ? "Yes\n":"No\n"); + printk (KERN_INFO "Command Packet DRQ Type: "); + switch (gcw.drq_type) { + case 0: printk (KERN_INFO "Microprocessor DRQ\n");break; + case 1: printk (KERN_INFO "Interrupt DRQ\n");break; + case 2: printk (KERN_INFO "Accelerated DRQ\n");break; + case 3: printk (KERN_INFO "Reserved\n");break; + } + printk (KERN_INFO "Command Packet Size: "); + switch (gcw.packet_size) { + case 0: printk (KERN_INFO "12 bytes\n");break; + case 1: printk (KERN_INFO "16 bytes\n");break; + default: printk (KERN_INFO "Reserved\n");break; + } + printk (KERN_INFO "Model: %s\n",id->model); + printk (KERN_INFO "Firmware Revision: %s\n",id->fw_rev); + printk (KERN_INFO "Serial Number: %s\n",id->serial_no); + printk (KERN_INFO "Write buffer size: %d bytes\n",id->buf_size*512); + printk (KERN_INFO "DMA: %s",id->capability & 0x01 ? "Yes\n":"No\n"); + printk (KERN_INFO "LBA: %s",id->capability & 0x02 ? "Yes\n":"No\n"); + printk (KERN_INFO "IORDY can be disabled: %s",id->capability & 0x04 ? "Yes\n":"No\n"); + printk (KERN_INFO "IORDY supported: %s",id->capability & 0x08 ? "Yes\n":"Unknown\n"); + printk (KERN_INFO "ATAPI overlap supported: %s",id->capability & 0x20 ? "Yes\n":"No\n"); + printk (KERN_INFO "PIO Cycle Timing Category: %d\n",id->tPIO); + printk (KERN_INFO "DMA Cycle Timing Category: %d\n",id->tDMA); + printk (KERN_INFO "Single Word DMA supported modes: "); + for (i=0,mask=1;i<8;i++,mask=mask << 1) { + if (id->dma_1word & mask) + printk (KERN_INFO "%d ",i); + if (id->dma_1word & (mask << 8)) + printk (KERN_INFO "(active) "); + } + printk (KERN_INFO "\n"); + printk (KERN_INFO "Multi Word DMA supported modes: "); + for (i=0,mask=1;i<8;i++,mask=mask << 1) { + if (id->dma_mword & mask) + printk (KERN_INFO "%d ",i); + if (id->dma_mword & (mask << 8)) + printk (KERN_INFO "(active) "); + } + printk (KERN_INFO "\n"); + if (id->field_valid & 0x0002) { + printk (KERN_INFO "Enhanced PIO Modes: %s\n",id->eide_pio_modes & 1 ? "Mode 3":"None"); + printk (KERN_INFO "Minimum Multi-word DMA cycle per word: "); + if (id->eide_dma_min == 0) + printk (KERN_INFO "Not supported\n"); + else + printk (KERN_INFO "%d ns\n",id->eide_dma_min); - tape->max_number_of_stages+=IDETAPE_INCREASE_STAGES_RATE; + printk (KERN_INFO "Manufacturer\'s Recommended Multi-word cycle: "); + if (id->eide_dma_time == 0) + printk (KERN_INFO "Not supported\n"); + else + printk (KERN_INFO "%d ns\n",id->eide_dma_time); - if (tape->max_number_of_stages >= IDETAPE_MAX_PIPELINE_STAGES) - tape->max_number_of_stages = IDETAPE_MAX_PIPELINE_STAGES; + printk (KERN_INFO "Minimum PIO cycle without IORDY: "); + if (id->eide_pio == 0) + printk (KERN_INFO "Not supported\n"); + else + printk (KERN_INFO "%d ns\n",id->eide_pio); -#if IDETAPE_DEBUG_LOG - printk ("Maximum number of stages: %d\n",tape->max_number_of_stages); + printk (KERN_INFO "Minimum PIO cycle with IORDY: "); + if (id->eide_pio_iordy == 0) + printk (KERN_INFO "Not supported\n"); + else + printk (KERN_INFO "%d ns\n",id->eide_pio_iordy); + + } else + printk (KERN_INFO "According to the device, fields 64-70 are not valid.\n"); #endif /* IDETAPE_DEBUG_LOG */ - return; + /* Check that we can support this device */ + + if (gcw.protocol !=2 ) + printk (KERN_ERR "ide-tape: Protocol is not ATAPI\n"); + else if (gcw.device_type != 1) + printk (KERN_ERR "ide-tape: Device type is not set to tape\n"); + else if (!gcw.removable) + printk (KERN_ERR "ide-tape: The removable flag is not set\n"); + else if (gcw.drq_type != 2) { + printk (KERN_ERR "ide-tape: Sorry, DRQ types other than Accelerated DRQ\n"); + printk (KERN_ERR "ide-tape: are still not supported by the driver\n"); + } else if (gcw.packet_size != 0) { + printk (KERN_ERR "ide-tape: Packet size is not 12 bytes long\n"); + if (gcw.packet_size == 1) + printk (KERN_ERR "ide-tape: Sorry, padding to 16 bytes is still not supported\n"); + } else + return 1; + return 0; } /* - * idetape_add_stage_tail adds a new stage at the end of the pipeline. - * - * Caller should disable interrupts, if necessary. + * idetape_get_mode_sense_results asks the tape about its various + * parameters. In particular, we will adjust our data transfer buffer + * size to the recommended value as returned by the tape. */ - -void idetape_add_stage_tail (ide_drive_t *drive,idetape_pipeline_stage_t *stage) - +static void idetape_get_mode_sense_results (ide_drive_t *drive) { - idetape_tape_t *tape=&(drive->tape); + idetape_tape_t *tape = drive->driver_data; + idetape_pc_t pc; + idetape_mode_parameter_header_t *header; + idetape_capabilities_page_t *capabilities; -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_add_stage_tail\n"); + idetape_create_mode_sense_cmd (&pc,IDETAPE_CAPABILITIES_PAGE); + if (idetape_queue_pc_tail (drive,&pc)) { + printk (KERN_ERR "ide-tape: Can't get tape parameters - assuming some default values\n"); + tape->tape_block_size = 512; tape->capabilities.ctl = 52; + tape->capabilities.speed = 450; tape->capabilities.buffer_size = 6 * 52; + return; + } + header = (idetape_mode_parameter_header_t *) pc.buffer; + capabilities = (idetape_capabilities_page_t *) (header + 1); + + capabilities->max_speed = ntohs (capabilities->max_speed); + capabilities->ctl = ntohs (capabilities->ctl); + capabilities->speed = ntohs (capabilities->speed); + capabilities->buffer_size = ntohs (capabilities->buffer_size); + + tape->capabilities = *capabilities; /* Save us a copy */ + tape->tape_block_size = capabilities->blk512 ? 512:1024; +#if IDETAPE_DEBUG_LOG + printk (KERN_INFO "Dumping the results of the MODE SENSE packet command\n"); + printk (KERN_INFO "Mode Parameter Header:\n"); + printk (KERN_INFO "Mode Data Length - %d\n",header->mode_data_length); + printk (KERN_INFO "Medium Type - %d\n",header->medium_type); + printk (KERN_INFO "Device Specific Parameter - %d\n",header->dsp); + printk (KERN_INFO "Block Descriptor Length - %d\n",header->bdl); + + printk (KERN_INFO "Capabilities and Mechanical Status Page:\n"); + printk (KERN_INFO "Page code - %d\n",capabilities->page_code); + printk (KERN_INFO "Page length - %d\n",capabilities->page_length); + printk (KERN_INFO "Read only - %s\n",capabilities->ro ? "Yes":"No"); + printk (KERN_INFO "Supports reverse space - %s\n",capabilities->sprev ? "Yes":"No"); + printk (KERN_INFO "Supports erase initiated formatting - %s\n",capabilities->efmt ? "Yes":"No"); + printk (KERN_INFO "Supports QFA two Partition format - %s\n",capabilities->qfa ? "Yes":"No"); + printk (KERN_INFO "Supports locking the medium - %s\n",capabilities->lock ? "Yes":"No"); + printk (KERN_INFO "The volume is currently locked - %s\n",capabilities->locked ? "Yes":"No"); + printk (KERN_INFO "The device defaults in the prevent state - %s\n",capabilities->prevent ? "Yes":"No"); + printk (KERN_INFO "Supports ejecting the medium - %s\n",capabilities->eject ? "Yes":"No"); + printk (KERN_INFO "Supports error correction - %s\n",capabilities->ecc ? "Yes":"No"); + printk (KERN_INFO "Supports data compression - %s\n",capabilities->cmprs ? "Yes":"No"); + printk (KERN_INFO "Supports 512 bytes block size - %s\n",capabilities->blk512 ? "Yes":"No"); + printk (KERN_INFO "Supports 1024 bytes block size - %s\n",capabilities->blk1024 ? "Yes":"No"); + printk (KERN_INFO "Restricted byte count for PIO transfers - %s\n",capabilities->slowb ? "Yes":"No"); + printk (KERN_INFO "Maximum supported speed in KBps - %d\n",capabilities->max_speed); + printk (KERN_INFO "Continuous transfer limits in blocks - %d\n",capabilities->ctl); + printk (KERN_INFO "Current speed in KBps - %d\n",capabilities->speed); + printk (KERN_INFO "Buffer size - %d\n",capabilities->buffer_size*512); #endif /* IDETAPE_DEBUG_LOG */ - - stage->next=NULL; - stage->prev=tape->last_stage; - if (tape->last_stage != NULL) - tape->last_stage->next=stage; - else - tape->first_stage=tape->next_stage=stage; - tape->last_stage=stage; - if (tape->next_stage == NULL) - tape->next_stage=tape->last_stage; - tape->current_number_of_stages++; } /* - * idetape_remove_stage_head removes tape->first_stage from the pipeline. + * ide_setup is called to: + * + * 1. Initialize our various state variables. + * 2. Ask the tape for its capabilities. + * 3. Allocate a buffer which will be used for data + * transfer. The buffer size is chosen based on + * the recommendation which we received in step (2). * - * Again, caller should avoid race conditions. + * Note that at this point ide.c already assigned us an irq, so that + * we can queue requests here and wait for their completion. */ - -void idetape_remove_stage_head (ide_drive_t *drive) - +static void idetape_setup (ide_drive_t *drive, idetape_tape_t *tape, int minor) { - idetape_tape_t *tape=&(drive->tape); - idetape_pipeline_stage_t *stage; - -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_remove_stage_head\n"); -#endif /* IDETAPE_DEBUG_LOG */ -#if IDETAPE_DEBUG_BUGS - if (tape->first_stage == NULL) { - printk ("ide-tape: bug: tape->first_stage is NULL\n"); - return; + ide_hwif_t *hwif = HWIF(drive); + unsigned long t1, tmid, tn, t; + + drive->driver_data = tape; + drive->ready_stat = 0; /* An ATAPI device ignores DRDY */ + memset (tape, 0, sizeof (idetape_tape_t)); + tape->drive = drive; + tape->minor = minor; + tape->name[0] = 'h'; tape->name[1] = 't'; tape->name[2] = '0' + minor; + tape->chrdev_direction = idetape_direction_none; + tape->pc = tape->pc_stack; + tape->max_stages = IDETAPE_MIN_PIPELINE_STAGES; + + idetape_get_mode_sense_results (drive); + + tape->user_bs_factor = 1; + tape->stage_size = tape->capabilities.ctl * tape->tape_block_size; + while (tape->stage_size > 0xffff) { + printk (KERN_NOTICE "ide-tape: decreasing stage size\n"); + tape->capabilities.ctl /= 2; + tape->stage_size = tape->capabilities.ctl * tape->tape_block_size; } - if (tape->active_stage == tape->first_stage) { - printk ("ide-tape: bug: Trying to free our active pipeline stage\n"); - return; + tape->pages_per_stage = tape->stage_size / PAGE_SIZE; + if (tape->stage_size % PAGE_SIZE) { + tape->pages_per_stage++; + tape->excess_bh_size = PAGE_SIZE - tape->stage_size % PAGE_SIZE; } -#endif /* IDETAPE_DEBUG_BUGS */ - stage=tape->first_stage; - tape->first_stage=stage->next; - idetape_kfree_stage (stage); - tape->current_number_of_stages--; - if (tape->first_stage == NULL) { - tape->last_stage=NULL; -#if IDETAPE_DEBUG_BUGS - if (tape->next_stage != NULL) - printk ("ide-tape: bug: tape->next_stage != NULL\n"); - if (tape->current_number_of_stages) - printk ("ide-tape: bug: current_number_of_stages should be 0 now\n"); -#endif /* IDETAPE_DEBUG_BUGS */ + + /* + * Select the "best" DSC read/write polling frequency. + * The following algorithm attempts to find a balance between + * good latency and good system throughput. It will be nice to + * have all this configurable in run time at some point. + */ + t1 = (tape->stage_size * HZ) / (tape->capabilities.speed * 1000); + tmid = (tape->capabilities.buffer_size * 32 * HZ) / (tape->capabilities.speed * 125); + tn = (IDETAPE_FIFO_THRESHOLD * tape->stage_size * HZ) / (tape->capabilities.speed * 1000); + + if (tape->max_stages) { + if (drive->using_dma) + t = tmid; + else { + if (hwif->drives[drive->select.b.unit ^ 1].present || hwif->next != hwif) + t = (tn + tmid) / 2; + else + t = tn; + } + } else + t = t1; + t = IDETAPE_MIN (t, tmid); + + /* + * Ensure that the number we got makes sense. + */ + tape->best_dsc_rw_frequency = IDETAPE_MAX (IDETAPE_MIN (t, IDETAPE_DSC_RW_MAX), IDETAPE_DSC_RW_MIN); + if (tape->best_dsc_rw_frequency != t) { + printk (KERN_NOTICE "ide-tape: Although the recommended polling period is %lu jiffies\n", t); + printk (KERN_NOTICE "ide-tape: we will use %lu jiffies\n", tape->best_dsc_rw_frequency); } + printk (KERN_INFO "ide-tape: %s <-> %s, %dKBps, %d*%dkB buffer, %dkB pipeline, %lums tDSC%s\n", + drive->name, tape->name, tape->capabilities.speed, (tape->capabilities.buffer_size * 512) / tape->stage_size, + tape->stage_size / 1024, tape->max_stages * tape->stage_size / 1024, + tape->best_dsc_rw_frequency * 1000 / HZ, drive->using_dma ? ", DMA":""); } +static int idetape_cleanup (ide_drive_t *drive) +{ + idetape_tape_t *tape = drive->driver_data; + int minor = tape->minor; + unsigned long flags; + + save_flags (flags); + cli (); + if (test_bit (IDETAPE_BUSY, &tape->flags) || tape->first_stage != NULL || tape->merge_stage_size || drive->usage) { + restore_flags(flags); + return 1; + } + idetape_chrdevs[minor].drive = NULL; + restore_flags (flags); + DRIVER(drive)->busy = 0; + (void) ide_unregister_subdriver (drive); + drive->driver_data = NULL; + kfree (tape); + for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++) + if (idetape_chrdevs[minor].drive != NULL) + return 0; + unregister_chrdev (IDETAPE_MAJOR, "ht"); + idetape_chrdev_present = 0; + return 0; +} + +int idetape_init (void); + +static ide_module_t idetape_module = { + IDE_DRIVER_MODULE, + idetape_init, + NULL +}; + /* - * idetape_insert_pipeline_into_queue is used to start servicing the - * pipeline stages, starting from tape->next_stage. + * IDE subdriver functions, registered with ide.c */ - -void idetape_insert_pipeline_into_queue (ide_drive_t *drive) +static ide_driver_t idetape_driver = { + ide_tape, /* media */ + 1, /* busy */ + 1, /* supports_dma */ + idetape_cleanup, /* cleanup */ + idetape_do_request, /* do_request */ + idetape_end_request, /* end_request */ + idetape_blkdev_ioctl, /* ioctl */ + idetape_blkdev_open, /* open */ + idetape_blkdev_release, /* release */ + NULL, /* media_change */ + idetape_pre_reset, /* pre_reset */ + NULL, /* capacity */ + NULL /* special */ +}; -{ - idetape_tape_t *tape=&(drive->tape); +/* + * Our character device supporting functions, passed to register_chrdev. + */ +static struct file_operations idetape_fops = { + NULL, /* lseek - default */ + idetape_chrdev_read, /* read */ + idetape_chrdev_write, /* write */ + NULL, /* readdir - bad */ + NULL, /* select */ + idetape_chrdev_ioctl, /* ioctl */ + NULL, /* mmap */ + idetape_chrdev_open, /* open */ + idetape_chrdev_release, /* release */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; - if (tape->next_stage == NULL) - return; +/* + * idetape_init will register the driver for each tape. + */ +int idetape_init (void) +{ + ide_drive_t *drive; + idetape_tape_t *tape; + int minor, failed = 0, supported = 0; - if (tape->active_data_request == NULL) { - idetape_active_next_stage (drive); - (void) (ide_do_drive_cmd (drive,tape->active_data_request,ide_end)); - return; - } + MOD_INC_USE_COUNT; + if (!idetape_chrdev_present) + for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++ ) + idetape_chrdevs[minor].drive = NULL; + + if ((drive = ide_scan_devices (ide_tape, NULL, failed++)) == NULL) { + ide_register_module (&idetape_module); + MOD_DEC_USE_COUNT; + return 0; + } + if (!idetape_chrdev_present && register_chrdev (IDETAPE_MAJOR, "ht", &idetape_fops)) { + printk (KERN_ERR "ide-tape: Failed to register character device interface\n"); + MOD_DEC_USE_COUNT; + return -EBUSY; + } + do { + if (!idetape_identify_device (drive, drive->id)) { + printk (KERN_ERR "ide-tape: %s: not supported by this version of ide-tape\n", drive->name); + continue; + } + tape = (idetape_tape_t *) kmalloc (sizeof (idetape_tape_t), GFP_KERNEL); + if (tape == NULL) { + printk (KERN_ERR "ide-tape: %s: Can't allocate a tape structure\n", drive->name); + continue; + } + if (ide_register_subdriver (drive, &idetape_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-tape: %s: Failed to register the driver with ide.c\n", drive->name); + kfree (tape); + continue; + } + for (minor = 0; idetape_chrdevs[minor].drive != NULL; minor++); + idetape_setup (drive, tape, minor); + idetape_chrdevs[minor].drive = drive; + supported++; failed--; + } while ((drive = ide_scan_devices (ide_tape, NULL, failed++)) != NULL); + if (!idetape_chrdev_present && !supported) { + unregister_chrdev (IDETAPE_MAJOR, "ht"); + } else + idetape_chrdev_present = 1; + ide_register_module (&idetape_module); + MOD_DEC_USE_COUNT; + return 0; } -/* - * idetape_active_next_stage will declare the next stage as "active". - */ - -void idetape_active_next_stage (ide_drive_t *drive) +#ifdef MODULE +int init_module (void) +{ + return idetape_init (); +} +void cleanup_module (void) { - idetape_tape_t *tape=&(drive->tape); - idetape_pipeline_stage_t *stage=tape->next_stage; - struct request *rq=&(stage->rq); + ide_drive_t *drive; + int minor; -#if IDETAPE_DEBUG_LOG - printk ("Reached idetape_active_next_stage\n"); -#endif /* IDETAPE_DEBUG_LOG */ -#if IDETAPE_DEBUG_BUGS - if (stage == NULL) { - printk ("ide-tape: bug: Trying to activate a non existing stage\n"); - return; + for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++) { + drive = idetape_chrdevs[minor].drive; + if (drive != NULL && idetape_cleanup (drive)) + printk (KERN_ERR "ide-tape: %s: cleanup_module() called while still busy\n", drive->name); } -#endif /* IDETAPE_DEBUG_BUGS */ - if (rq->cmd == IDETAPE_WRITE_REQUEST) - idetape_copy_buffer_from_stage (stage,tape->data_buffer); - - rq->buffer=tape->data_buffer; - tape->active_data_request=rq; - tape->active_stage=stage; - tape->next_stage=stage->next; + ide_unregister_module(&idetape_module); } +#endif /* MODULE */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide-tape.h linux/drivers/block/ide-tape.h --- v2.1.7/linux/drivers/block/ide-tape.h Fri Sep 27 08:27:12 1996 +++ linux/drivers/block/ide-tape.h Wed Nov 6 14:49:31 1996 @@ -1,529 +0,0 @@ -/* - * linux/drivers/block/ide-tape.h Version 1.8 - ALPHA Sep 26, 1996 - * - * Copyright (C) 1995, 1996 Gadi Oxman - */ - -/* - * Include file for the IDE ATAPI streaming tape driver. - * - * This file contains various ide-tape related structures and function - * prototypes which are already used in ide.h. - * - * The various compile time options are described below. - */ - -#ifndef IDETAPE_H -#define IDETAPE_H - -/**************************** Tunable parameters *****************************/ - -/* - * This is probably the most important configuration option. - * - * Pipelined operation mode has the potential to maximize the - * performance of the driver and thus to saturate the throughput - * to the maximum value supported by the tape. - * - * In pipelined mode we are servicing requests without blocking the - * user backup program. For example, on a write request, we will add it - * to the pipeline and return without waiting for it to complete. The - * user program will then have enough time to prepare the next blocks - * while the tape is still busy working on the previous requests. - * - * Pipelined operation mode is enabled by default, but since it has a - * few downfalls as well, you may wish to disable it. - * Further explanation of pipelined mode is available in ide-tape.c . - */ - -#define IDETAPE_PIPELINE 1 - -/* - * Pipelined mode parameters. - * - * We try to use the minimum number of stages which is enough to - * keep the tape constantly streaming. To accomplish that, we implement - * a feedback loop around the maximum number of stages: - * - * We start from MIN maximum stages (we will not even use MIN stages - * if we don't need them), increment it by RATE*(MAX-MIN) - * whenever we sense that the pipeline is empty, until we reach - * the optimum value or until we reach MAX. - */ - -#define IDETAPE_MIN_PIPELINE_STAGES 100 -#define IDETAPE_MAX_PIPELINE_STAGES 200 -#define IDETAPE_INCREASE_STAGES_RATE 20 - -/* - * Assuming the tape shares an interface with another device, the default - * behavior is to service our pending pipeline requests as soon as - * possible, but to gracefully postpone them in favor of the other device - * when the tape is busy. This has the potential to maximize our - * throughput and in the same time, to make efficient use of the IDE bus. - * - * Note that when we transfer data to / from the tape, we co-operate with - * the relatively fast tape buffers and the tape will perform the - * actual media access in the background, without blocking the IDE - * bus. This means that as long as the maximum IDE bus throughput is much - * higher than the sum of our maximum throughput and the maximum - * throughput of the other device, we should probably leave the default - * behavior. - * - * However, if it is still desired to give the other device a share even - * in our own (small) bus bandwidth, you can set IDETAPE_LOW_TAPE_PRIORITY - * to 1. This will let the other device finish *all* its pending requests - * before we even check if we can service our next pending request. - */ - -#define IDETAPE_LOW_TAPE_PRIORITY 0 - -/* - * It seems that dynamically allocating buffers of about 32KB - * each is doomed to fail, unless we are in or very near the - * initialization stage. Take care when changing this value, as it - * is now optimized with the design of kmalloc, so that we will not - * allocate parts of a page. Setting the size to 512 bytes, for example, - * would cause kmalloc to allocate for us 1024 bytes, and to - * unnecessarily waste double amount of memory. - */ - -#if PAGE_SIZE == 4096 - #define IDETAPE_ALLOCATION_BLOCK 500 -#elif PAGE_SIZE == 8192 - #define IDETAPE_ALLOCATION_BLOCK 496 -#else /* ??? Not defined by linux/mm/kmalloc.c */ - #define IDETAPE_ALLOCATION_BLOCK 512 -#endif - -/* - * ide-tape currently uses two continuous buffers, each of the size of - * one stage. By default, those buffers are allocated at initialization - * time and never released, since dynamic allocation of pages bigger - * than PAGE_SIZE may fail as memory becomes fragmented. - * - * This results in about 100 KB memory usage when the tape is idle. - * Setting IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE to 1 will let ide-tape - * to dynamically allocate those buffers, resulting in about 20 KB idle - * memory usage. - */ - -#define IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE 0 - -/* - * The following are used to debug the driver: - * - * Setting IDETAPE_DEBUG_LOG to 1 will log driver flow control. - * Setting IDETAPE_DEBUG_BUGS to 1 will enable self-sanity checks in - * some places. - * - * Setting them to 0 will restore normal operation mode: - * - * 1. Disable logging normal successful operations. - * 2. Disable self-sanity checks. - * 3. Errors will still be logged, of course. - * - * All the #if DEBUG code will be removed some day, when the driver - * is verified to be stable enough. This will make it much more - * esthetic. - */ - -#define IDETAPE_DEBUG_LOG 0 -#define IDETAPE_DEBUG_BUGS 1 - -/* - * After each failed packet command we issue a request sense command - * and retry the packet command IDETAPE_MAX_PC_RETRIES times. - * - * Setting IDETAPE_MAX_PC_RETRIES to 0 will disable retries. - */ - -#define IDETAPE_MAX_PC_RETRIES 3 - -/* - * With each packet command, we allocate a buffer of - * IDETAPE_TEMP_BUFFER_SIZE bytes. This is used for several packet - * commands (Not for READ/WRITE commands). - * - * The default below is too high - We should be using around 100 bytes - * typically, but I didn't check all the cases, so I rather be on the - * safe size. - */ - -#define IDETAPE_TEMP_BUFFER_SIZE 256 - -/* - * In various places in the driver, we need to allocate storage - * for packet commands and requests, which will remain valid while - * we leave the driver to wait for an interrupt or a timeout event. - * - * In the corresponding ide_drive_t structure, we pre-allocate storage - * for IDETAPE_PC_STACK packet commands and requests. This storage is - * used as a circular array - Each time we reach the last entry, we - * warp around to the first. - * - * It is crucial that we have enough entries for the maximum number - * of packet commands / sub-requests which we need to allocate during - * the handling of a specific request. - * - * Follows a worse case calculation of the required storage, with a - * large safety margin. - */ - -#define IDETAPE_PC_STACK 20+IDETAPE_MAX_PC_RETRIES - -/* - * DSC polling parameters. - * - * Polling for DSC (a single bit in the status register) is a very - * important function in ide-tape. There are two cases in which we - * poll for DSC: - * - * 1. Before a read/write packet command, to ensure that we - * can transfer data from/to the tape's data buffers, without - * causing an actual media access. In case the tape is not - * ready yet, we take out our request from the device - * request queue, so that ide.c will service requests from - * the other device on the same interface meanwhile. - * - * We can now automatically select the "best" polling frequency. - * Have a look at IDETAPE_ANTICIPATE_READ_WRITE_DSC below. - * - * In case you don't want to use the automatic selection, - * choose it to be relatively fast. The default fallback - * frequency is 1/50 msec. - * - * 2. After the successful initialization of a "media access - * packet command", which is a command which can take a long - * time to complete (it can be several seconds or even an hour). - * - * Again, we postpone our request in the middle to free the bus - * for the other device. The polling frequency here should be - * lower than the read/write frequency since those media access - * commands are slow. We start from a "fast" frequency - - * IDETAPE_DSC_FAST_MEDIA_ACCESS_FREQUENCY (one second), and - * if we don't receive DSC after IDETAPE_FAST_SLOW_THRESHOLD - * (5 minutes), we switch it to a lower frequency - - * IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY (1 minute). - * - * We also set a timeout for the timer, in case something goes wrong. - * The timeout should be longer then the maximum execution time of a - * tape operation. I still have to measure exactly how much time does - * it take to space over a far filemark, etc. It seemed that 15 minutes - * was way too low, so I am meanwhile setting it to a rather large - * timeout - 2 Hours ... - * - */ - -/* - * Setting IDETAPE_ANTICIPATE_READ_WRITE_DSC to 1 will allow ide-tape - * to cleverly select the lowest possible frequency which will - * not affect performance, based on the tape parameters and our operation - * mode. This has potential to dramatically decrease our polling load - * on Linux. - * - * However, for the cases in which our calculation fails, setting - * the following option to 0 will force the use of the "fallback" - * polling period defined below (defaults to 50 msec). - * - * In any case, the frequency will be between the "lowest" value - * to the "fallback" value, to ensure that our selected "best" frequency - * is reasonable. - */ - -#define IDETAPE_ANTICIPATE_READ_WRITE_DSC 1 - -/* - * The following parameter is used to select the point in the internal - * tape fifo in which we will start to refill the buffer. Decreasing - * the following parameter will improve the system's latency and - * interactive response, while using a high value might improve sytem - * throughput. - */ -#define IDETAPE_FIFO_THRESHOLD 2 - -/* - * DSC timings. - */ - -#define IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY 5*HZ/100 /* 50 msec */ -#define IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY 40*HZ/100 /* 400 msec */ -#define IDETAPE_DSC_FAST_MEDIA_ACCESS_FREQUENCY 1*HZ /* 1 second */ -#define IDETAPE_FAST_SLOW_THRESHOLD 5*60*HZ /* 5 minutes */ -#define IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY 60*HZ /* 1 minute */ -#define IDETAPE_DSC_TIMEOUT 2*60*60*HZ /* 2 hours */ - -/*************************** End of tunable parameters ***********************/ - -/* - * Definitions which are already needed in ide.h - */ - -/* - * Current character device data transfer direction. - */ - -typedef enum {idetape_direction_none,idetape_direction_read,idetape_direction_write} chrdev_direction_t; - -struct ide_drive_s; /* Forward declaration - Will be defined later in ide.h */ -typedef void (idetape_pc_completed_t)(struct ide_drive_s *); - -/* - * Our view of a packet command. - */ - -typedef struct idetape_packet_command_s { - byte c [12]; /* Actual packet bytes */ - - byte retries; /* On each retry, we increment retries */ - byte error; /* Error code */ - byte abort; /* Set when an error is considered normal - We won't retry */ - byte wait_for_dsc; /* 1 When polling for DSC on a media access command */ - byte dma_recommended; /* 1 when we prefer to use DMA if possible */ - byte dma_in_progress; /* 1 while DMA in progress */ - byte dma_error; /* 1 when encountered problem during DMA */ - unsigned long request_transfer; /* Bytes to transfer */ - unsigned long actually_transferred; /* Bytes actually transferred */ - unsigned long buffer_size; /* Size of our data buffer */ - byte *buffer; /* Data buffer */ - byte *current_position; /* Pointer into the above buffer */ - byte writing; /* Data direction */ - idetape_pc_completed_t *callback; /* Called when this packet command is completed */ - byte temp_buffer [IDETAPE_TEMP_BUFFER_SIZE]; /* Temporary buffer */ -} idetape_packet_command_t; - -/* - * Capabilities and Mechanical Status Page - */ - -typedef struct { - unsigned page_code :6; /* Page code - Should be 0x2a */ - unsigned reserved1_67 :2; - byte page_length; /* Page Length - Should be 0x12 */ - byte reserved2; - byte reserved3; - unsigned ro :1; /* Read Only Mode */ - unsigned reserved4_1234 :4; - unsigned sprev :1; /* Supports SPACE in the reverse direction */ - unsigned reserved4_67 :2; - unsigned reserved5_012 :3; - unsigned efmt :1; /* Supports ERASE command initiated formatting */ - unsigned reserved5_4 :1; - unsigned qfa :1; /* Supports the QFA two partition formats */ - unsigned reserved5_67 :2; - unsigned lock :1; /* Supports locking the volume */ - unsigned locked :1; /* The volume is locked */ - unsigned prevent :1; /* The device defaults in the prevent state after power up */ - unsigned eject :1; /* The device can eject the volume */ - unsigned reserved6_45 :2; /* Reserved */ - unsigned ecc :1; /* Supports error correction */ - unsigned cmprs :1; /* Supports data compression */ - unsigned reserved7_0 :1; - unsigned blk512 :1; /* Supports 512 bytes block size */ - unsigned blk1024 :1; /* Supports 1024 bytes block size */ - unsigned reserved7_3_6 :4; - unsigned slowb :1; /* The device restricts the byte count for PIO */ - /* transfers for slow buffer memory ??? */ - unsigned short max_speed; /* Maximum speed supported in KBps */ - byte reserved10; - byte reserved11; - unsigned short ctl; /* Continuous Transfer Limit in blocks */ - unsigned short speed; /* Current Speed, in KBps */ - unsigned short buffer_size; /* Buffer Size, in 512 bytes */ - byte reserved18; - byte reserved19; -} idetape_capabilities_page_t; - -/* - * A pipeline stage contains several small buffers of type - * idetape_buffer_head_t. This is necessary since dynamical allocation - * of large (32 KB or so) continuous memory blocks will usually fail. - */ - -typedef struct idetape_buffer_head_s { - char *data; /* Pointer to data (512 bytes by default) */ - struct idetape_buffer_head_s *next; -} idetape_buffer_head_t; - -/* - * A pipeline stage. - * - * In a pipeline stage we have a request, pointer to a list of small - * buffers, and pointers to the near stages. - */ - -typedef struct idetape_pipeline_stage_s { - struct request rq; /* The corresponding request */ - idetape_buffer_head_t *bh; /* The data buffers */ - struct idetape_pipeline_stage_s *next,*prev; /* Pointers to the next and previous stages */ -} idetape_pipeline_stage_t; - -/* - * Most of our global data which we need to save even as we leave the - * driver due to an interrupt or a timer event is stored in a variable - * of type tape_info, defined below. - * - * Additional global variables which provide the link between the - * character device interface to this structure are defined in - * ide-tape.c - */ - -typedef struct { - - /* - * Since a typical character device operation requires more - * than one packet command, we provide here enough memory - * for the maximum of interconnected packet commands. - * The packet commands are stored in the circular array pc_stack. - * pc_stack_index points to the last used entry, and warps around - * to the start when we get to the last array entry. - * - * pc points to the current processed packet command. - * - * failed_pc points to the last failed packet command, or contains - * NULL if we do not need to retry any packet command. This is - * required since an additional packet command is needed before the - * retry, to get detailed information on what went wrong. - */ - - idetape_packet_command_t *pc; /* Current packet command */ - idetape_packet_command_t *failed_pc; /* Last failed packet command */ - idetape_packet_command_t pc_stack [IDETAPE_PC_STACK]; /* Packet command stack */ - byte pc_stack_index; /* Next free packet command storage space */ - - /* - * The Linux ide driver basically traverses the request lists - * of the ide block devices, finds the next request, completes - * it, and passes to the next one. This is done in ide_do_request. - * - * In this regard, ide-tape.c is fully compatible with the rest of - * the ide driver - From the point of view of ide.c, we are just - * another ide block device which receives requests and completes - * them. - * - * However, our requests don't originate in the buffer cache but - * rather in ide-tape.c itself. Here we provide safe storage for - * such requests. - */ - - struct request rq_stack [IDETAPE_PC_STACK]; - byte rq_stack_index; /* We implement a circular array */ - - /* - * While polling for DSC we use postponed_rq to postpone the - * current request so that ide.c will be able to service - * pending requests on the other device. Note that at most - * we will have only one DSC (usually data transfer) request - * in the device request queue. Additional request can be - * queued in our internal pipeline, but they will be visible - * to ide.c only one at a time. - */ - - struct request *postponed_rq; - - /* - * DSC polling variables. - */ - - byte dsc_count; /* We received DSC dsc_count times in a row */ - unsigned long dsc_polling_start; /* The time in which we started polling for DSC */ - struct timer_list dsc_timer; /* Timer used to poll for dsc */ - - /* - * We can now be much more clever in our selection of the - * read/write polling frequency. This is used along with - * the compile time option IDETAPE_ANTICIPATE_DSC. - */ - - unsigned long best_dsc_rw_frequency; /* Read/Write dsc polling frequency */ - - unsigned long dsc_polling_frequency; /* The current polling frequency */ - unsigned long dsc_timeout; /* Maximum waiting time */ - byte dsc_received; /* Set when we receive DSC */ - - byte request_status; - byte last_status; /* Contents of the tape status register */ - /* before the current request (saved for us */ - /* by ide.c) */ - /* - * After an ATAPI software reset, the status register will be - * locked, and thus we need to ignore it when checking DSC for - * the first time. - */ - - byte reset_issued; - - /* Position information */ - - byte partition_num; /* Currently not used */ - unsigned long block_address; /* Current block */ - byte block_address_valid; /* 0 When the tape position is unknown */ - /* (To the tape or to us) */ - /* Last error information */ - - byte sense_key,asc,ascq; - - /* Character device operation */ - - chrdev_direction_t chrdev_direction; /* Current character device data transfer direction */ - byte busy; /* Device already opened */ - - /* Device information */ - - unsigned short tape_block_size; /* Usually 512 or 1024 bytes */ - idetape_capabilities_page_t capabilities; /* Copy of the tape's Capabilities and Mechanical Page */ - - /* - * Active data transfer request parameters. - * - * At most, there is only one ide-tape originated data transfer - * request in the device request queue. This allows ide.c to - * easily service requests from the other device when we - * postpone our active request. In the pipelined operation - * mode, we use our internal pipeline structure to hold - * more data requests. - * - * The data buffer size is chosen based on the tape's - * recommendation. - */ - - struct request *active_data_request; /* Pointer to the request which is waiting in the device request queue */ - char *data_buffer; /* The corresponding data buffer (for read/write requests) */ - int data_buffer_size; /* Data buffer size (chosen based on the tape's recommendation */ - - char *merge_buffer; /* Temporary buffer for user <-> kernel space data transfer */ - int merge_buffer_offset; - int merge_buffer_size; - - /* - * Pipeline parameters. - * - * To accomplish non-pipelined mode, we simply set the following - * variables to zero (or NULL, where appropriate). - */ - - int current_number_of_stages; /* Number of currently used stages */ - int max_number_of_stages; /* We will not allocate more than this number of stages */ - idetape_pipeline_stage_t *first_stage; /* The first stage which will be removed from the pipeline */ - idetape_pipeline_stage_t *active_stage; /* The currently active stage */ - idetape_pipeline_stage_t *next_stage; /* Will be serviced after the currently active request */ - idetape_pipeline_stage_t *last_stage; /* New requests will be added to the pipeline here */ - int error_in_pipeline_stage; /* Set when an error was detected in one of the pipeline stages */ - -} idetape_tape_t; - -/* - * The following is used to have a quick look at the tape's status - * register between requests of the other device. - */ - -#define POLL_HWIF_TAPE_DRIVE \ - if (hwif->tape_drive != NULL) { \ - if (hwif->tape_drive->tape.request_status) { \ - SELECT_DRIVE(hwif,hwif->tape_drive); \ - hwif->tape_drive->tape.last_status=GET_STAT(); \ - hwif->tape_drive->tape.request_status=0; \ - } \ - } - -#endif /* IDETAPE_H */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide.c linux/drivers/block/ide.c --- v2.1.7/linux/drivers/block/ide.c Tue Oct 29 19:58:03 1996 +++ linux/drivers/block/ide.c Wed Nov 6 14:49:32 1996 @@ -1,5 +1,5 @@ /* - * linux/drivers/block/ide.c Version 5.52 Sep 24, 1996 + * linux/drivers/block/ide.c Version 5.60 Nov 5, 1996 * * Copyright (C) 1994-1996 Linus Torvalds & authors (see below) */ @@ -261,6 +261,14 @@ * change delay_10ms() to delay_50ms() to fix problems * Version 5.52 fix incorrect invalidation of removable devices * add "hdx=slow" command line option + * Version 5.60 start to modularize the driver; the disk and ATAPI + * drivers can be compiled as loadable modules. + * move IDE probe code to ide-probe.c + * move IDE disk code to ide-disk.c + * add support for generic IDE device subdrivers + * add m68k code from Geert Uytterhoeven + * probe all interfaces by default + * add ioctl to (re)probe an interface * * Some additional driver compile-time options are in ide.h * @@ -271,6 +279,7 @@ #undef REALLY_SLOW_IO /* most systems can safely undef this */ #include +#include #include #include #include @@ -290,6 +299,7 @@ #include #include #include +#include #ifdef CONFIG_PCI #include @@ -299,18 +309,26 @@ #include "ide.h" #include "ide_modes.h" -#ifdef CONFIG_BLK_DEV_PROMISE -#include "promise.h" -#define IS_PROMISE_DRIVE (HWIF(drive)->chipset == ide_promise) -#else -#define IS_PROMISE_DRIVE (0) /* auto-NULLs out Promise code */ -#endif /* CONFIG_BLK_DEV_PROMISE */ +#ifdef CONFIG_KERNELD +#include +#endif /* CONFIG_KERNELD */ + +static const byte ide_hwif_to_major[] = {IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR}; -static const byte ide_hwif_to_major[MAX_HWIFS] = {IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR}; -static const unsigned short default_io_base[MAX_HWIFS] = {0x1f0, 0x170, 0x1e8, 0x168}; -static const byte default_irqs[MAX_HWIFS] = {14, 15, 11, 10}; static int idebus_parameter; /* holds the "idebus=" parameter */ static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ +static int initializing; /* set while initializing built-in drivers */ + +/* + * ide_lock is used by the Atari code to obtain access to the IDE interrupt, + * which is shared between several drivers. + */ +static int ide_lock = 0; + +/* + * ide_modules keeps track of the available IDE chipset/probe/driver modules. + */ +static ide_module_t *ide_modules = NULL; /* * This is declared extern in ide.h, for access by other IDE modules: @@ -349,7 +367,6 @@ #endif /* DISK_RECOVERY_TIME */ - /* * Do not even *think* about calling this! */ @@ -367,11 +384,10 @@ /* fill in any non-zero initial values */ hwif->index = index; - hwif->noprobe = (index > 1); - hwif->io_base = default_io_base[index]; - hwif->ctl_port = hwif->io_base ? hwif->io_base+0x206 : 0x000; + ide_init_hwif_ports(hwif->io_ports, ide_default_io_base(index), &hwif->irq); + hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET]; #ifdef CONFIG_BLK_DEV_HD - if (hwif->io_base == HD_DATA) + if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) hwif->noprobe = 1; /* may be overridden by ide_setup() */ #endif /* CONFIG_BLK_DEV_HD */ hwif->major = ide_hwif_to_major[index]; @@ -379,12 +395,10 @@ hwif->name[1] = 'd'; hwif->name[2] = 'e'; hwif->name[3] = '0' + index; -#ifdef CONFIG_BLK_DEV_IDETAPE - hwif->tape_drive = NULL; -#endif /* CONFIG_BLK_DEV_IDETAPE */ for (unit = 0; unit < MAX_DRIVES; ++unit) { ide_drive_t *drive = &hwif->drives[unit]; + drive->media = ide_disk; drive->select.all = (unit<<4)|0xa0; drive->hwif = hwif; drive->ctl = 0x08; @@ -431,7 +445,7 @@ /* * ide_system_bus_speed() returns what we think is the system VESA/PCI - * bus speed (in Mhz). This is used for calculating interface PIO timings. + * bus speed (in MHz). This is used for calculating interface PIO timings. * The default is 40 for known PCI systems, 50 otherwise. * The "idebus=xx" parameter can be used to override this value. * The actual value to be used is computed/displayed the first time through. @@ -447,7 +461,8 @@ #endif /* CONFIG_PCI */ else system_bus_speed = 50; /* safe default value for VESA and PCI */ - printk("ide: Assuming %dMhz system bus speed for PIO modes; override with idebus=xx\n", system_bus_speed); + printk("ide: Assuming %dMHz system bus speed for PIO modes%s\n", system_bus_speed, + idebus_parameter ? "" : "; override with idebus=xx"); } return system_bus_speed; } @@ -460,7 +475,7 @@ * of the sector count register location, with interrupts disabled * to ensure that the reads all happen together. */ -static inline void do_vlb_sync (unsigned short port) { +static inline void do_vlb_sync (ide_ioreg_t port) { (void) inb (port); (void) inb (port); (void) inb (port); @@ -472,32 +487,30 @@ */ void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) { - unsigned short io_base = HWIF(drive)->io_base; - unsigned short data_reg = io_base+IDE_DATA_OFFSET; byte io_32bit = drive->io_32bit; if (io_32bit) { #if SUPPORT_VLB_SYNC if (io_32bit & 2) { cli(); - do_vlb_sync(io_base+IDE_NSECTOR_OFFSET); - insl(data_reg, buffer, wcount); + do_vlb_sync(IDE_NSECTOR_REG); + insl(IDE_DATA_REG, buffer, wcount); if (drive->unmask) sti(); } else #endif /* SUPPORT_VLB_SYNC */ - insl(data_reg, buffer, wcount); + insl(IDE_DATA_REG, buffer, wcount); } else { #if SUPPORT_SLOW_DATA_PORTS if (drive->slow) { unsigned short *ptr = (unsigned short *) buffer; while (wcount--) { - *ptr++ = inw_p(data_reg); - *ptr++ = inw_p(data_reg); + *ptr++ = inw_p(IDE_DATA_REG); + *ptr++ = inw_p(IDE_DATA_REG); } } else #endif /* SUPPORT_SLOW_DATA_PORTS */ - insw(data_reg, buffer, wcount<<1); + insw(IDE_DATA_REG, buffer, wcount<<1); } } @@ -506,33 +519,68 @@ */ void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) { - unsigned short io_base = HWIF(drive)->io_base; - unsigned short data_reg = io_base+IDE_DATA_OFFSET; byte io_32bit = drive->io_32bit; if (io_32bit) { #if SUPPORT_VLB_SYNC if (io_32bit & 2) { cli(); - do_vlb_sync(io_base+IDE_NSECTOR_OFFSET); - outsl(data_reg, buffer, wcount); + do_vlb_sync(IDE_NSECTOR_REG); + outsl(IDE_DATA_REG, buffer, wcount); if (drive->unmask) sti(); } else #endif /* SUPPORT_VLB_SYNC */ - outsl(data_reg, buffer, wcount); + outsl(IDE_DATA_REG, buffer, wcount); } else { #if SUPPORT_SLOW_DATA_PORTS if (drive->slow) { unsigned short *ptr = (unsigned short *) buffer; while (wcount--) { - outw_p(*ptr++, data_reg); - outw_p(*ptr++, data_reg); + outw_p(*ptr++, IDE_DATA_REG); + outw_p(*ptr++, IDE_DATA_REG); } } else #endif /* SUPPORT_SLOW_DATA_PORTS */ - outsw(data_reg, buffer, wcount<<1); + outsw(IDE_DATA_REG, buffer, wcount<<1); + } +} + +/* + * The following routines are mainly used by the ATAPI drivers. + * + * These routines will round up any request for an odd number of bytes, + * so if an odd bytecount is specified, be sure that there's at least one + * extra byte allocated for the buffer. + */ +void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount) +{ + ++bytecount; +#ifdef CONFIG_ATARI + if (MACH_IS_ATARI) { + /* Atari has a byte-swapped IDE interface */ + insw_swapw(IDE_DATA_REG, buffer, bytecount / 2); + return; + } +#endif /* CONFIG_ATARI */ + ide_input_data (drive, buffer, bytecount / 4); + if ((bytecount & 0x03) >= 2) + insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1); +} + +void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount) +{ + ++bytecount; +#ifdef CONFIG_ATARI + if (MACH_IS_ATARI) { + /* Atari has a byte-swapped IDE interface */ + outsw_swapw(IDE_DATA_REG, buffer, bytecount / 2); + return; } +#endif /* CONFIG_ATARI */ + ide_output_data (drive, buffer, bytecount / 4); + if ((bytecount & 0x03) >= 2) + outsw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1); } /* @@ -540,7 +588,7 @@ * wait for an interrupt response from a drive. handler() points * at the appropriate code to handle the next interrupt, and a * timer is started to prevent us from waiting forever in case - * something goes wrong (see the timer_expiry() handler later on). + * something goes wrong (see the ide_timer_expiry() handler later on). */ void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout) { @@ -557,131 +605,38 @@ } /* - * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity" - * value for this drive (from its reported identification information). - * - * Returns: 1 if lba_capacity looks sensible - * 0 otherwise - */ -static int lba_capacity_is_ok (struct hd_driveid *id) -{ - unsigned long lba_sects = id->lba_capacity; - unsigned long chs_sects = id->cyls * id->heads * id->sectors; - unsigned long _10_percent = chs_sects / 10; - - /* perform a rough sanity check on lba_sects: within 10% is "okay" */ - if ((lba_sects - chs_sects) < _10_percent) - return 1; /* lba_capacity is good */ - - /* some drives have the word order reversed */ - lba_sects = (lba_sects << 16) | (lba_sects >> 16); - if ((lba_sects - chs_sects) < _10_percent) { - id->lba_capacity = lba_sects; /* fix it */ - return 1; /* lba_capacity is (now) good */ - } - return 0; /* lba_capacity value is bad */ -} - -/* * current_capacity() returns the capacity (in sectors) of a drive * according to its current geometry/LBA settings. */ -static unsigned long current_capacity (ide_drive_t *drive) +static unsigned long current_capacity (ide_drive_t *drive) { - struct hd_driveid *id = drive->id; - unsigned long capacity = drive->cyl * drive->head * drive->sect; - if (!drive->present) return 0; - if (drive->media != ide_disk) - return 0x7fffffff; /* cdrom or tape */ - drive->select.b.lba = 0; - /* Determine capacity, and use LBA if the drive properly supports it */ - if (id != NULL && (id->capability & 2) && lba_capacity_is_ok(id)) { - if (id->lba_capacity >= capacity) { - capacity = id->lba_capacity; - drive->select.b.lba = 1; - } - } - return (capacity - drive->sect0); + if (drive->driver != NULL) + return DRIVER(drive)->capacity(drive); + return 0; } /* * ide_geninit() is called exactly *once* for each major, from genhd.c, * at the beginning of the initial partition check for the drives. */ -static void ide_geninit (struct gendisk *gd) +void ide_geninit (struct gendisk *gd) { unsigned int unit; ide_hwif_t *hwif = gd->real_devices; for (unit = 0; unit < gd->nr_real; ++unit) { ide_drive_t *drive = &hwif->drives[unit]; -#ifdef CONFIG_BLK_DEV_IDECD - if (drive->present && drive->media == ide_cdrom) - ide_cdrom_setup(drive); -#endif /* CONFIG_BLK_DEV_IDECD */ -#ifdef CONFIG_BLK_DEV_IDETAPE - if (drive->present && drive->media == ide_tape) - idetape_setup(drive); -#endif /* CONFIG_BLK_DEV_IDETAPE */ + drive->part[0].nr_sects = current_capacity(drive); - if (!drive->present || drive->media != ide_disk) { + if (!drive->present || drive->media != ide_disk || drive->driver == NULL) drive->part[0].start_sect = -1; /* skip partition check */ - } - } -} - -/* - * init_gendisk() (as opposed to ide_geninit) is called for each major device, - * after probing for drives, to allocate partition tables and other data - * structures needed for the routines in genhd.c. ide_geninit() gets called - * somewhat later, during the partition check. - */ -static void init_gendisk (ide_hwif_t *hwif) -{ - struct gendisk *gd, **gdp; - unsigned int unit, units, minors; - int *bs; - - /* figure out maximum drive number on the interface */ - for (units = MAX_DRIVES; units > 0; --units) { - if (hwif->drives[units-1].present) - break; } - minors = units * (1<sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); - gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); - bs = kmalloc (minors*sizeof(int), GFP_KERNEL); - - memset(gd->part, 0, minors * sizeof(struct hd_struct)); - - /* cdroms and msdos f/s are examples of non-1024 blocksizes */ - blksize_size[hwif->major] = bs; - for (unit = 0; unit < minors; ++unit) - *bs++ = BLOCK_SIZE; - - for (unit = 0; unit < units; ++unit) - hwif->drives[unit].part = &gd->part[unit << PARTN_BITS]; - - gd->major = hwif->major; /* our major device number */ - gd->major_name = IDE_MAJOR_NAME; /* treated special in genhd.c */ - gd->minor_shift = PARTN_BITS; /* num bits for partitions */ - gd->max_p = 1<max_nr = units; /* max num real drives */ - gd->nr_real = units; /* current num real drives */ - gd->init = ide_geninit; /* initialization function */ - gd->real_devices= hwif; /* ptr to internal data */ - gd->next = NULL; /* linked list of major devs */ - - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ; - hwif->gd = *gdp = gd; /* link onto tail of list */ } static void do_reset1 (ide_drive_t *, int); /* needed below */ -#ifdef CONFIG_BLK_DEV_IDEATAPI /* * atapi_reset_pollfunc() gets invoked to poll the interface for completion every 50ms * during an atapi drive reset operation. If the drive has not yet responded, @@ -710,7 +665,6 @@ } hwgroup->poll_timeout = 0; /* done polling */ } -#endif /* CONFIG_BLK_DEV_IDEATAPI */ /* * reset_pollfunc() gets invoked to poll the interface for completion every 50ms @@ -761,6 +715,20 @@ hwgroup->poll_timeout = 0; /* done polling */ } +static void pre_reset (ide_drive_t *drive) +{ + if (!drive->keep_settings) { + drive->unmask = 0; + drive->io_32bit = 0; + if (drive->using_dma) { + drive->using_dma = 0; + printk("%s: disabled DMA\n", drive->name); + } + } + if (drive->driver != NULL) + DRIVER(drive)->pre_reset(drive); +} + /* * do_reset1() attempts to recover a confused drive by resetting it. * Unfortunately, resetting a disk drive actually resets all devices on @@ -786,48 +754,24 @@ save_flags(flags); cli(); /* Why ? */ -#ifdef CONFIG_BLK_DEV_IDEATAPI /* For an ATAPI device, first try an ATAPI SRST. */ - if (drive->media != ide_disk) { - if (!do_not_try_atapi) { - if (!drive->keep_settings) { - drive->unmask = 0; - drive->io_32bit = 0; - } - OUT_BYTE (drive->select.all, IDE_SELECT_REG); - udelay (20); - OUT_BYTE (WIN_SRST, IDE_COMMAND_REG); - hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; - ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20); - restore_flags (flags); - return; - } + if (drive->media != ide_disk && !do_not_try_atapi) { + pre_reset(drive); + OUT_BYTE (drive->select.all, IDE_SELECT_REG); + udelay (20); + OUT_BYTE (WIN_SRST, IDE_COMMAND_REG); + hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; + ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20); + restore_flags (flags); + return; } -#endif /* CONFIG_BLK_DEV_IDEATAPI */ /* * First, reset any device state data we were maintaining * for any of the drives on this interface. */ - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *rdrive = &hwif->drives[unit]; - rdrive->special.all = 0; - rdrive->special.b.set_geometry = 1; - rdrive->special.b.recalibrate = 1; - if (OK_TO_RESET_CONTROLLER) - rdrive->mult_count = 0; - if (!rdrive->keep_settings) { - rdrive->mult_req = 0; - rdrive->unmask = 0; - rdrive->io_32bit = 0; - if (rdrive->using_dma) { - rdrive->using_dma = 0; - printk("%s: disabled DMA\n", rdrive->name); - } - } - if (rdrive->mult_req != rdrive->mult_count) - rdrive->special.b.set_multmode = 1; - } + for (unit = 0; unit < MAX_DRIVES; ++unit) + pre_reset(&hwif->drives[unit]); #if OK_TO_RESET_CONTROLLER /* @@ -855,10 +799,6 @@ void ide_do_reset (ide_drive_t *drive) { do_reset1 (drive, 0); -#ifdef CONFIG_BLK_DEV_IDETAPE - if (drive->media == ide_tape) - drive->tape.reset_issued=1; -#endif /* CONFIG_BLK_DEV_IDETAPE */ } /* @@ -897,7 +837,7 @@ byte err = 0; save_flags (flags); - sti(); + ide_sti(); printk("%s: %s: status=0x%02x", drive->name, msg, stat); #if FANCY_STATUS_DUMPS if (drive->media == ide_disk) { @@ -1009,16 +949,10 @@ rq->errors |= ERROR_RESET; /* Mmmm.. timing problem */ if (rq->errors >= ERROR_MAX) { -#ifdef CONFIG_BLK_DEV_IDETAPE - if (drive->media == ide_tape) { - rq->errors = 0; - idetape_end_request(0, HWGROUP(drive)); - } - else -#endif /* CONFIG_BLK_DEV_IDETAPE */ + if (drive->driver != NULL) + DRIVER(drive)->end_request(0, HWGROUP(drive)); ide_end_request(0, HWGROUP(drive)); - } - else { + } else { if ((rq->errors & ERROR_RESET) == ERROR_RESET) { ++rq->errors; ide_do_reset(drive); @@ -1030,153 +964,10 @@ } /* - * read_intr() is the handler for disk read/multread interrupts - */ -static void read_intr (ide_drive_t *drive) -{ - byte stat; - int i; - unsigned int msect, nsect; - struct request *rq; - - if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { - ide_error(drive, "read_intr", stat); - return; - } - msect = drive->mult_count; -read_next: - rq = HWGROUP(drive)->rq; - if (msect) { - if ((nsect = rq->current_nr_sectors) > msect) - nsect = msect; - msect -= nsect; - } else - nsect = 1; - ide_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); -#ifdef DEBUG - printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", - drive->name, rq->sector, rq->sector+nsect-1, - (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); -#endif - rq->sector += nsect; - rq->buffer += nsect<<9; - rq->errors = 0; - i = (rq->nr_sectors -= nsect); - if ((rq->current_nr_sectors -= nsect) <= 0) - ide_end_request(1, HWGROUP(drive)); - if (i > 0) { - if (msect) - goto read_next; - ide_set_handler (drive, &read_intr, WAIT_CMD); - } -} - -/* - * write_intr() is the handler for disk write interrupts - */ -static void write_intr (ide_drive_t *drive) -{ - byte stat; - int i; - ide_hwgroup_t *hwgroup = HWGROUP(drive); - struct request *rq = hwgroup->rq; - - if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { -#ifdef DEBUG - printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n", - drive->name, rq->sector, (unsigned long) rq->buffer, - rq->nr_sectors-1); -#endif - if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { - rq->sector++; - rq->buffer += 512; - rq->errors = 0; - i = --rq->nr_sectors; - --rq->current_nr_sectors; - if (rq->current_nr_sectors <= 0) - ide_end_request(1, hwgroup); - if (i > 0) { - ide_output_data (drive, rq->buffer, SECTOR_WORDS); - ide_set_handler (drive, &write_intr, WAIT_CMD); - } - return; - } - } - ide_error(drive, "write_intr", stat); -} - -/* - * ide_multwrite() transfers a block of up to mcount sectors of data - * to a drive as part of a disk multiple-sector write operation. - */ -void ide_multwrite (ide_drive_t *drive, unsigned int mcount) -{ - struct request *rq = &HWGROUP(drive)->wrq; - - do { - unsigned int nsect = rq->current_nr_sectors; - if (nsect > mcount) - nsect = mcount; - mcount -= nsect; - - ide_output_data(drive, rq->buffer, nsect<<7); -#ifdef DEBUG - printk("%s: multwrite: sector %ld, buffer=0x%08lx, count=%d, remaining=%ld\n", - drive->name, rq->sector, (unsigned long) rq->buffer, - nsect, rq->nr_sectors - nsect); -#endif - if ((rq->nr_sectors -= nsect) <= 0) - break; - if ((rq->current_nr_sectors -= nsect) == 0) { - if ((rq->bh = rq->bh->b_reqnext) != NULL) { - rq->current_nr_sectors = rq->bh->b_size>>9; - rq->buffer = rq->bh->b_data; - } else { - panic("%s: buffer list corrupted\n", drive->name); - break; - } - } else { - rq->buffer += nsect << 9; - } - } while (mcount); -} - -/* - * multwrite_intr() is the handler for disk multwrite interrupts - */ -static void multwrite_intr (ide_drive_t *drive) -{ - byte stat; - int i; - ide_hwgroup_t *hwgroup = HWGROUP(drive); - struct request *rq = &hwgroup->wrq; - - if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { - if (stat & DRQ_STAT) { - if (rq->nr_sectors) { - ide_multwrite(drive, drive->mult_count); - ide_set_handler (drive, &multwrite_intr, WAIT_CMD); - return; - } - } else { - if (!rq->nr_sectors) { /* all done? */ - rq = hwgroup->rq; - for (i = rq->nr_sectors; i > 0;){ - i -= rq->current_nr_sectors; - ide_end_request(1, hwgroup); - } - return; - } - } - } - ide_error(drive, "multwrite_intr", stat); -} - -/* * Issue a simple drive command * The drive must be selected beforehand. */ -static void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler) +void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler) { ide_set_handler (drive, handler, WAIT_CMD); OUT_BYTE(drive->ctl,IDE_CONTROL_REG); @@ -1185,47 +976,6 @@ } /* - * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. - */ -static void set_multmode_intr (ide_drive_t *drive) -{ - byte stat = GET_STAT(); - - sti(); - if (OK_STAT(stat,READY_STAT,BAD_STAT)) { - drive->mult_count = drive->mult_req; - } else { - drive->mult_req = drive->mult_count = 0; - drive->special.b.recalibrate = 1; - (void) ide_dump_status(drive, "set_multmode", stat); - } -} - -/* - * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. - */ -static void set_geometry_intr (ide_drive_t *drive) -{ - byte stat = GET_STAT(); - - sti(); - if (!OK_STAT(stat,READY_STAT,BAD_STAT)) - ide_error(drive, "set_geometry_intr", stat); -} - -/* - * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. - */ -static void recal_intr (ide_drive_t *drive) -{ - byte stat = GET_STAT(); - - sti(); - if (!OK_STAT(stat,READY_STAT,BAD_STAT)) - ide_error(drive, "recal_intr", stat); -} - -/* * drive_cmd_intr() is invoked on completion of a special DRIVE_CMD. */ static void drive_cmd_intr (ide_drive_t *drive) @@ -1234,7 +984,7 @@ byte *args = (byte *) rq->buffer; byte stat = GET_STAT(); - sti(); + ide_sti(); if ((stat & DRQ_STAT) && args && args[3]) { byte io_32bit = drive->io_32bit; drive->io_32bit = 0; @@ -1259,38 +1009,16 @@ #ifdef DEBUG printk("%s: do_special: 0x%02x\n", drive->name, s->all); #endif - if (s->b.set_geometry) { - s->b.set_geometry = 0; - if (drive->media == ide_disk) { - OUT_BYTE(drive->sect,IDE_SECTOR_REG); - OUT_BYTE(drive->cyl,IDE_LCYL_REG); - OUT_BYTE(drive->cyl>>8,IDE_HCYL_REG); - OUT_BYTE(((drive->head-1)|drive->select.all)&0xBF,IDE_SELECT_REG); - if (!IS_PROMISE_DRIVE) - ide_cmd(drive, WIN_SPECIFY, drive->sect, &set_geometry_intr); - } - } else if (s->b.recalibrate) { - s->b.recalibrate = 0; - if (drive->media == ide_disk && !IS_PROMISE_DRIVE) - ide_cmd(drive, WIN_RESTORE, drive->sect, &recal_intr); - } else if (s->b.set_tune) { + if (s->b.set_tune) { ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc; s->b.set_tune = 0; if (tuneproc != NULL) tuneproc(drive, drive->tune_req); - } else if (s->b.set_multmode) { - s->b.set_multmode = 0; - if (drive->media == ide_disk) { - if (drive->id && drive->mult_req > drive->id->max_multsect) - drive->mult_req = drive->id->max_multsect; - if (!IS_PROMISE_DRIVE) - ide_cmd(drive, WIN_SETMULT, drive->mult_req, &set_multmode_intr); - } else - drive->mult_req = 0; + } else if (drive->driver != NULL) { + DRIVER(drive)->special(drive); } else if (s->all) { - int special = s->all; + printk("%s: bad special flag: 0x%02x\n", drive->name, s->all); s->all = 0; - printk("%s: bad special flag: 0x%02x\n", drive->name, special); } } @@ -1320,7 +1048,7 @@ } save_flags(flags); - sti(); + ide_sti(); timeout += jiffies; do { if (!((stat = GET_STAT()) & BUSY_STAT)) { @@ -1335,98 +1063,6 @@ } /* - * do_rw_disk() issues READ and WRITE commands to a disk, - * using LBA if supported, or CHS otherwise, to address sectors. - * It also takes care of issuing special DRIVE_CMDs. - */ -static inline void do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) -{ - ide_hwif_t *hwif = HWIF(drive); - unsigned short io_base = hwif->io_base; -#ifdef CONFIG_BLK_DEV_PROMISE - int use_promise_io = 0; -#endif /* CONFIG_BLK_DEV_PROMISE */ - - OUT_BYTE(drive->ctl,IDE_CONTROL_REG); - OUT_BYTE(rq->nr_sectors,io_base+IDE_NSECTOR_OFFSET); -#ifdef CONFIG_BLK_DEV_PROMISE - if (IS_PROMISE_DRIVE) { - if (hwif->is_promise2 || rq->cmd == READ) { - use_promise_io = 1; - } - } - if (drive->select.b.lba || use_promise_io) { -#else /* !CONFIG_BLK_DEV_PROMISE */ - if (drive->select.b.lba) { -#endif /* CONFIG_BLK_DEV_PROMISE */ -#ifdef DEBUG - printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n", - drive->name, (rq->cmd==READ)?"read":"writ", - block, rq->nr_sectors, (unsigned long) rq->buffer); -#endif - OUT_BYTE(block,io_base+IDE_SECTOR_OFFSET); - OUT_BYTE(block>>=8,io_base+IDE_LCYL_OFFSET); - OUT_BYTE(block>>=8,io_base+IDE_HCYL_OFFSET); - OUT_BYTE(((block>>8)&0x0f)|drive->select.all,io_base+IDE_SELECT_OFFSET); - } else { - unsigned int sect,head,cyl,track; - track = block / drive->sect; - sect = block % drive->sect + 1; - OUT_BYTE(sect,io_base+IDE_SECTOR_OFFSET); - head = track % drive->head; - cyl = track / drive->head; - OUT_BYTE(cyl,io_base+IDE_LCYL_OFFSET); - OUT_BYTE(cyl>>8,io_base+IDE_HCYL_OFFSET); - OUT_BYTE(head|drive->select.all,io_base+IDE_SELECT_OFFSET); -#ifdef DEBUG - printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n", - drive->name, (rq->cmd==READ)?"read":"writ", cyl, - head, sect, rq->nr_sectors, (unsigned long) rq->buffer); -#endif - } -#ifdef CONFIG_BLK_DEV_PROMISE - if (use_promise_io) { - do_promise_io (drive, rq); - return; - } -#endif /* CONFIG_BLK_DEV_PROMISE */ - if (rq->cmd == READ) { -#ifdef CONFIG_BLK_DEV_TRITON - if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive))) - return; -#endif /* CONFIG_BLK_DEV_TRITON */ - ide_set_handler(drive, &read_intr, WAIT_CMD); - OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, io_base+IDE_COMMAND_OFFSET); - return; - } - if (rq->cmd == WRITE) { -#ifdef CONFIG_BLK_DEV_TRITON - if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive))) - return; -#endif /* CONFIG_BLK_DEV_TRITON */ - OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, io_base+IDE_COMMAND_OFFSET); - if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { - printk("%s: no DRQ after issuing %s\n", drive->name, - drive->mult_count ? "MULTWRITE" : "WRITE"); - return; - } - if (!drive->unmask) - cli(); - if (drive->mult_count) { - HWGROUP(drive)->wrq = *rq; /* scratchpad */ - ide_set_handler (drive, &multwrite_intr, WAIT_CMD); - ide_multwrite(drive, drive->mult_count); - } else { - ide_set_handler (drive, &write_intr, WAIT_CMD); - ide_output_data(drive, rq->buffer, SECTOR_WORDS); - } - return; - } - printk("%s: bad command: %d\n", drive->name, rq->cmd); - ide_end_request(0, HWGROUP(drive)); -} - -/* * execute_drive_cmd() issues a special drive command, * usually initiated by ioctl() from the external hdparm program. */ @@ -1461,9 +1097,9 @@ { unsigned int minor, unit; unsigned long block, blockend; - ide_drive_t *drive; + ide_drive_t *drive = NULL; - sti(); + ide_sti(); #ifdef DEBUG printk("%s: do_request: current=0x%08lx\n", hwif->name, (unsigned long) rq); #endif @@ -1498,10 +1134,6 @@ while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME); #endif -#ifdef CONFIG_BLK_DEV_IDETAPE - POLL_HWIF_TAPE_DRIVE; /* macro from ide-tape.h */ -#endif /* CONFIG_BLK_DEV_IDETAPE */ - SELECT_DRIVE(hwif,drive); if (ide_wait_stat(drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { printk("%s: drive not ready for command\n", drive->name); @@ -1513,36 +1145,20 @@ execute_drive_cmd(drive, rq); return; } -#ifdef CONFIG_BLK_DEV_IDEATAPI - switch (drive->media) { - case ide_disk: - do_rw_disk (drive, rq, block); - return; -#ifdef CONFIG_BLK_DEV_IDECD - case ide_cdrom: - ide_do_rw_cdrom (drive, block); - return; -#endif /* CONFIG_BLK_DEV_IDECD */ -#ifdef CONFIG_BLK_DEV_IDETAPE - case ide_tape: - idetape_do_request (drive, rq, block); - return; -#endif /* CONFIG_BLK_DEV_IDETAPE */ - - default: - printk("%s: media type %d not supported\n", - drive->name, drive->media); - goto kill_rq; + if (drive->driver != NULL) { + DRIVER(drive)->do_request(drive, rq, block); + return; } -#else - do_rw_disk (drive, rq, block); /* simpler and faster */ - return; -#endif /* CONFIG_BLK_DEV_IDEATAPI */; + printk("%s: media type %d not supported\n", drive->name, drive->media); + goto kill_rq; } do_special(drive); return; kill_rq: - ide_end_request(0, hwif->hwgroup); + if (drive != NULL && drive->driver != NULL) + DRIVER(drive)->end_request(0, HWGROUP(drive)); + else + ide_end_request(0, hwif->hwgroup); } /* @@ -1572,7 +1188,7 @@ struct request *rq; if ((rq = hwgroup->rq) == NULL) { if (hwif->sharing_irq && hwgroup->drive) /* set nIEN */ - OUT_BYTE(hwgroup->drive->ctl|2,hwif->ctl_port); + OUT_BYTE(hwgroup->drive->ctl|2,hwif->io_ports[IDE_CONTROL_OFFSET]); /* * hwgroup->next_hwif is different from hwgroup->hwif * only when a request is inserted using "ide_next". @@ -1584,6 +1200,7 @@ if (rq != NULL && rq->rq_status != RQ_INACTIVE) goto got_rq; } while ((hwif = hwif->next) != hwgroup->next_hwif); + ide_release_lock(&ide_lock); return; /* no work left for this hwgroup */ } got_rq: @@ -1608,6 +1225,8 @@ if (hwgroup->handler == NULL) { ide_hwif_t *hgif = hwgroup->hwif; ide_hwif_t *hwif = hgif; + + ide_get_lock(&ide_lock, ide_intr, hwgroup); do { disable_irq(hwif->irq); } while ((hwif = hwif->next) != hgif); @@ -1618,33 +1237,33 @@ } } -static void do_ide0_request (void) /* invoked with cli() */ +void do_ide0_request (void) /* invoked with cli() */ { do_hwgroup_request (ide_hwifs[0].hwgroup); } #if MAX_HWIFS > 1 -static void do_ide1_request (void) /* invoked with cli() */ +void do_ide1_request (void) /* invoked with cli() */ { do_hwgroup_request (ide_hwifs[1].hwgroup); } -#endif +#endif /* MAX_HWIFS > 1 */ #if MAX_HWIFS > 2 -static void do_ide2_request (void) /* invoked with cli() */ +void do_ide2_request (void) /* invoked with cli() */ { do_hwgroup_request (ide_hwifs[2].hwgroup); } -#endif +#endif /* MAX_HWIFS > 2 */ #if MAX_HWIFS > 3 -static void do_ide3_request (void) /* invoked with cli() */ +void do_ide3_request (void) /* invoked with cli() */ { do_hwgroup_request (ide_hwifs[3].hwgroup); } -#endif +#endif /* MAX_HWIFS > 3 */ -static void timer_expiry (unsigned long data) +void ide_timer_expiry (unsigned long data) { ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data; ide_drive_t *drive = hwgroup->drive; @@ -1658,7 +1277,7 @@ hwgroup->handler = NULL; handler(drive); } else if (hwgroup->handler == NULL) { /* not waiting for anything? */ - sti(); /* drive must have responded just as the timer expired */ + ide_sti(); /* drive must have responded just as the timer expired */ printk("%s: marginal timeout\n", drive->name); } else { hwgroup->handler = NULL; /* abort the operation */ @@ -1736,17 +1355,21 @@ ide_hwgroup_t *hwgroup = dev_id; ide_handler_t *handler; + if (!ide_ack_intr (hwgroup->hwif->io_ports[IDE_DATA_OFFSET], + hwgroup->hwif->io_ports[IDE_IRQ_OFFSET])) + return; + if (irq == hwgroup->hwif->irq && (handler = hwgroup->handler) != NULL) { ide_drive_t *drive = hwgroup->drive; hwgroup->handler = NULL; del_timer(&(hwgroup->timer)); if (drive->unmask) - sti(); + ide_sti(); handler(drive); cli(); /* this is necessary, as next rq may be different irq */ if (hwgroup->handler == NULL) { SET_RECOVERY_TIMER(HWIF(drive)); - ide_do_request(hwgroup); + ide_do_request(hwgroup); } } else { unexpected_intr(irq, hwgroup); @@ -1795,12 +1418,6 @@ rq->bh = NULL; rq->bhtail = NULL; rq->next = NULL; - -#if 0 /* these are done each time through ide_do_drive_cmd() */ - rq->errors = 0; - rq->rq_status = RQ_ACTIVE; - rq->rq_dev = ????; -#endif } /* @@ -1876,96 +1493,24 @@ return rq->errors ? -EIO : 0; /* return -EIO if errors */ } -static int ide_open(struct inode * inode, struct file * filp) +/* + * This routine is called to flush all partitions and partition tables + * for a changed disk, and then re-read the new partition table. + * If we are revalidating a disk because of a media change, then we + * enter with usage == 0. If we are using an ioctl, we automatically have + * usage == 1 (we need an open channel to use an ioctl :-), so this + * is our limit. + */ +int ide_revalidate_disk(kdev_t i_rdev) { ide_drive_t *drive; - unsigned long flags; + unsigned int p, major, minor; + long flags; - if ((drive = get_info_ptr(inode->i_rdev)) == NULL) - return -ENXIO; - save_flags(flags); - cli(); - while (drive->busy) - sleep_on(&drive->wqueue); - drive->usage++; - restore_flags(flags); -#ifdef CONFIG_BLK_DEV_IDECD - if (drive->media == ide_cdrom) - return ide_cdrom_open (inode, filp, drive); -#endif /* CONFIG_BLK_DEV_IDECD */ -#ifdef CONFIG_BLK_DEV_IDETAPE - if (drive->media == ide_tape) - return idetape_blkdev_open (inode, filp, drive); -#endif /* CONFIG_BLK_DEV_IDETAPE */ - if (drive->removable && drive->usage == 1) { - byte door_lock[] = {WIN_DOORLOCK,0,0,0}; - struct request rq; - check_disk_change(inode->i_rdev); - ide_init_drive_cmd (&rq); - rq.buffer = door_lock; - /* - * Ignore the return code from door_lock, - * since the open() has already succeeded, - * and the door_lock is irrelevant at this point. - */ - (void) ide_do_drive_cmd(drive, &rq, ide_wait); - } - return 0; -} - -/* - * Releasing a block device means we sync() it, so that it can safely - * be forgotten about... - */ -static void ide_release(struct inode * inode, struct file * file) -{ - ide_drive_t *drive; - - if ((drive = get_info_ptr(inode->i_rdev)) != NULL) { - fsync_dev(inode->i_rdev); - drive->usage--; -#ifdef CONFIG_BLK_DEV_IDECD - if (drive->media == ide_cdrom) { - ide_cdrom_release (inode, file, drive); - return; - } -#endif /* CONFIG_BLK_DEV_IDECD */ -#ifdef CONFIG_BLK_DEV_IDETAPE - if (drive->media == ide_tape) { - idetape_blkdev_release (inode, file, drive); - return; - } -#endif /* CONFIG_BLK_DEV_IDETAPE */ - if (drive->removable && !drive->usage) { - byte door_unlock[] = {WIN_DOORUNLOCK,0,0,0}; - struct request rq; - invalidate_buffers(inode->i_rdev); - ide_init_drive_cmd (&rq); - rq.buffer = door_unlock; - (void) ide_do_drive_cmd(drive, &rq, ide_wait); - } - } -} - -/* - * This routine is called to flush all partitions and partition tables - * for a changed disk, and then re-read the new partition table. - * If we are revalidating a disk because of a media change, then we - * enter with usage == 0. If we are using an ioctl, we automatically have - * usage == 1 (we need an open channel to use an ioctl :-), so this - * is our limit. - */ -static int revalidate_disk(kdev_t i_rdev) -{ - ide_drive_t *drive; - unsigned int p, major, minor; - long flags; - - if ((drive = get_info_ptr(i_rdev)) == NULL) - return -ENODEV; - - major = MAJOR(i_rdev); - minor = drive->select.b.unit << PARTN_BITS; + if ((drive = get_info_ptr(i_rdev)) == NULL) + return -ENODEV; + major = MAJOR(i_rdev); + minor = drive->select.b.unit << PARTN_BITS; save_flags(flags); cli(); if (drive->busy || (drive->usage > 1)) { @@ -1973,6 +1518,7 @@ return -EBUSY; }; drive->busy = 1; + MOD_INC_USE_COUNT; restore_flags(flags); for (p = 0; p < (1<part[0].nr_sects = current_capacity(drive); - if (drive->media != ide_disk) + if (drive->media != ide_disk || drive->driver == NULL) drive->part[0].start_sect = -1; resetup_one_dev(HWIF(drive)->gd, drive->select.b.unit); drive->busy = 0; wake_up(&drive->wqueue); + MOD_DEC_USE_COUNT; return 0; } -static int write_fs_long (unsigned long useraddr, long value) +static void revalidate_drives (void) { - int err; + ide_hwif_t *hwif; + ide_drive_t *drive; + int index, unit; - if (NULL == (long *)useraddr) - return -EINVAL; - if ((err = verify_area(VERIFY_WRITE, (long *)useraddr, sizeof(long)))) - return err; - put_user((unsigned)value, (long *) useraddr); - return 0; + for (index = 0; index < MAX_HWIFS; ++index) { + hwif = &ide_hwifs[index]; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &ide_hwifs[index].drives[unit]; + if (drive->revalidate) { + drive->revalidate = 0; + if (!initializing) + (void) ide_revalidate_disk(MKDEV(hwif->major, unit<type == type) + (void) module->init(); + module = module->next; + } + revalidate_drives(); +} + +static int ide_open(struct inode * inode, struct file * filp) +{ + ide_drive_t *drive; + int rc; + + if ((drive = get_info_ptr(inode->i_rdev)) == NULL) + return -ENXIO; + MOD_INC_USE_COUNT; + if (drive->driver == NULL) + ide_init_module(IDE_DRIVER_MODULE); +#ifdef CONFIG_KERNELD + if (drive->driver == NULL) { + if (drive->media == ide_disk) + (void) request_module("ide-disk"); + if (drive->media == ide_cdrom) + (void) request_module("ide-cd"); + if (drive->media == ide_tape) + (void) request_module("ide-tape"); + if (drive->media == ide_floppy) + (void) request_module("ide-floppy"); + } +#endif /* CONFIG_KERNELD */ + while (drive->busy) + sleep_on(&drive->wqueue); + drive->usage++; + if (drive->driver != NULL) { + if ((rc = DRIVER(drive)->open(inode, filp, drive))) + MOD_DEC_USE_COUNT; + return rc; + } + printk ("%s: driver not present\n", drive->name); + drive->usage--; + MOD_DEC_USE_COUNT; + return -ENXIO; +} + +/* + * Releasing a block device means we sync() it, so that it can safely + * be forgotten about... + */ +static void ide_release(struct inode * inode, struct file * file) +{ + ide_drive_t *drive; + + if ((drive = get_info_ptr(inode->i_rdev)) != NULL) { + fsync_dev(inode->i_rdev); + drive->usage--; + if (drive->driver != NULL) + DRIVER(drive)->release(inode, file, drive); + MOD_DEC_USE_COUNT; + } +} + +void ide_unregister (unsigned int index) +{ + struct gendisk *gd, **gdp; + ide_drive_t *drive; + ide_hwif_t *hwif, *g; + ide_hwgroup_t *hwgroup; + int irq_count = 0, unit; + unsigned long flags; + + if (index >= MAX_HWIFS) + return; + save_flags(flags); + cli(); + hwif = &ide_hwifs[index]; + if (!hwif->present) + goto abort; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &hwif->drives[unit]; + if (!drive->present) + continue; + if (drive->busy || drive->usage) + goto abort; + if (drive->driver != NULL && DRIVER(drive)->cleanup(drive)) + goto abort; + if (drive->id != NULL) { + kfree(drive->id); + drive->id = NULL; + } + drive->present = 0; + } + hwif->present = 0; + hwgroup = hwif->hwgroup; + + /* + * free the irq if we were the only hwif using it + */ + g = hwgroup->hwif; + do { + if (g->irq == hwif->irq) + ++irq_count; + g = g->next; + } while (g != hwgroup->hwif); + if (irq_count == 1) + free_irq(hwif->irq, hwgroup); + + /* + * Note that we only release the standard ports, + * and do not even try to handle any extra ports + * allocated for weird IDE interface chipsets. + */ + ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 8); + ide_release_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1); + + /* + * Remove us from the hwgroup, and free + * the hwgroup if we were the only member + */ + while (hwgroup->hwif->next != hwif) + hwgroup->hwif = hwgroup->hwif->next; + hwgroup->hwif->next = hwif->next; + if (hwgroup->hwif == hwif) + hwgroup->hwif = hwif->next; + if (hwgroup->next_hwif == hwif) + hwgroup->next_hwif = hwif->next; + if (hwgroup->hwif == hwif) + kfree(hwgroup); + + /* + * Remove us from the kernel's knowledge + */ + unregister_blkdev(hwif->major, hwif->name); + kfree(blksize_size[hwif->major]); + blk_dev[hwif->major].request_fn = NULL; + blksize_size[hwif->major] = NULL; + for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) + if (*gdp == hwif->gd) + break; + if (*gdp == NULL) + printk("gd not in disk chain!\n"); + else { + gd = *gdp; *gdp = gd->next; + kfree(gd->sizes); + kfree(gd->part); + kfree(gd); + } + init_hwif_data (index); /* restore hwif data to pristine status */ +abort: + restore_flags(flags); +} + +int ide_register (int arg1, int arg2, int irq) +{ + int index, retry = 1; + ide_hwif_t *hwif; + ide_ioreg_t data_port = (ide_ioreg_t) arg1, ctl_port = (ide_ioreg_t) arg2; + + do { + for (index = 0; index < MAX_HWIFS; ++index) { + hwif = &ide_hwifs[index]; + if (hwif->io_ports[IDE_DATA_OFFSET] == data_port) + goto found; + } + for (index = 0; index < MAX_HWIFS; ++index) { + hwif = &ide_hwifs[index]; + if (!hwif->present) { + ide_init_hwif_ports(hwif->io_ports, data_port, &hwif->irq); + if (ctl_port) + hwif->io_ports[IDE_CONTROL_OFFSET] = ctl_port; + hwif->irq = irq; + goto found; + } + } + for (index = 0; index < MAX_HWIFS; index++) + ide_unregister(index); + } while (retry--); + return -1; +found: + if (hwif->present) + ide_unregister(index); + if (hwif->present) + return -1; + hwif->noprobe = 0; + ide_init_module(IDE_PROBE_MODULE); + ide_init_module(IDE_DRIVER_MODULE); + return hwif->present ? index : -1; } static int ide_ioctl (struct inode *inode, struct file *file, @@ -2026,13 +1772,11 @@ { struct hd_geometry *loc = (struct hd_geometry *) arg; if (!loc || drive->media != ide_disk) return -EINVAL; - err = verify_area(VERIFY_WRITE, loc, sizeof(*loc)); - if (err) return err; - put_user(drive->bios_head, (byte *) &loc->heads); - put_user(drive->bios_sect, (byte *) &loc->sectors); - put_user(drive->bios_cyl, (unsigned short *) &loc->cylinders); - put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, - (unsigned long *) &loc->start); + if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; + if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; + if (put_user(drive->bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT; + if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, + (unsigned long *) &loc->start)) return -EFAULT; return 0; } case BLKFLSBUF: @@ -2048,48 +1792,46 @@ return 0; case BLKRAGET: - return write_fs_long(arg, read_ahead[MAJOR(inode->i_rdev)]); + return put_user(read_ahead[MAJOR(inode->i_rdev)], (long *) arg); case BLKGETSIZE: /* Return device size */ - return write_fs_long(arg, drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects); + return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (long *) arg); + case BLKRRPART: /* Re-read partition tables */ if (!suser()) return -EACCES; - return revalidate_disk(inode->i_rdev); + return ide_revalidate_disk(inode->i_rdev); case HDIO_GET_KEEPSETTINGS: - return write_fs_long(arg, drive->keep_settings); + return put_user(drive->keep_settings, (long *) arg); case HDIO_GET_UNMASKINTR: - return write_fs_long(arg, drive->unmask); + return put_user(drive->unmask, (long *) arg); case HDIO_GET_DMA: - return write_fs_long(arg, drive->using_dma); + return put_user(drive->using_dma, (long *) arg); case HDIO_GET_32BIT: - return write_fs_long(arg, drive->io_32bit); + return put_user(drive->io_32bit, (long *) arg); case HDIO_GET_MULTCOUNT: - return write_fs_long(arg, drive->mult_count); + return put_user(drive->mult_count, (long *) arg); case HDIO_GET_IDENTITY: - if (!arg || (MINOR(inode->i_rdev) & PARTN_MASK)) + if (MINOR(inode->i_rdev) & PARTN_MASK) return -EINVAL; if (drive->id == NULL) return -ENOMSG; - err = verify_area(VERIFY_WRITE, (char *)arg, sizeof(*drive->id)); - if (!err) - copy_to_user((char *)arg, (char *)drive->id, sizeof(*drive->id)); - return err; + if (copy_to_user((char *)arg, (char *)drive->id, sizeof(*drive->id))) + return -EFAULT; + return 0; - case HDIO_GET_NOWERR: - return write_fs_long(arg, drive->bad_wstat == BAD_R_STAT); + case HDIO_GET_NOWERR: + return put_user(drive->bad_wstat == BAD_R_STAT, (long *) arg); case HDIO_SET_DMA: if (!suser()) return -EACCES; -#ifdef CONFIG_BLK_DEV_IDECD - if (drive->media == ide_cdrom) + if (drive->driver != NULL && !DRIVER(drive)->supports_dma) return -EPERM; -#endif /* CONFIG_BLK_DEV_IDECD */ if (!drive->id || !(drive->id->capability & 1) || !HWIF(drive)->dmaproc) return -EPERM; case HDIO_SET_KEEPSETTINGS: @@ -2166,28 +1908,23 @@ byte args[4], *argbuf = args; int argsize = 4; if (!suser()) return -EACCES; - if (NULL == (void *) arg) { - err = ide_do_drive_cmd(drive, &rq, ide_wait); - } else if (!(err = verify_area(VERIFY_READ,(void *)arg, 4))) { - copy_from_user(args, (void *)arg, 4); - if (args[3]) { - argsize = 4 + (SECTOR_WORDS * 4 * args[3]); - argbuf = kmalloc(argsize, GFP_KERNEL); - if (argbuf == NULL) - return -ENOMEM; - argbuf[0] = args[0]; - argbuf[1] = args[1]; - argbuf[2] = args[2]; - argbuf[3] = args[3]; - } - if (!(err = verify_area(VERIFY_WRITE,(void *)arg, argsize))) { - rq.buffer = argbuf; - err = ide_do_drive_cmd(drive, &rq, ide_wait); - copy_to_user((void *)arg, argbuf, argsize); - } - if (argsize > 4) - kfree(argbuf); - } + if (NULL == (void *) arg) + return ide_do_drive_cmd(drive, &rq, ide_wait); + if (copy_from_user(args, (void *)arg, 4)) + return -EFAULT; + if (args[3]) { + argsize = 4 + (SECTOR_WORDS * 4 * args[3]); + argbuf = kmalloc(argsize, GFP_KERNEL); + if (argbuf == NULL) + return -ENOMEM; + memcpy(argbuf, args, 4); + } + rq.buffer = argbuf; + err = ide_do_drive_cmd(drive, &rq, ide_wait); + if (copy_to_user((void *)arg, argbuf, argsize)) + err = -EFAULT; + if (argsize > 4) + kfree(argbuf); return err; } case HDIO_SET_PIO_MODE: @@ -2208,17 +1945,22 @@ (void) ide_do_drive_cmd (drive, &rq, ide_wait); return 0; + case HDIO_SCAN_HWIF: + { + int args[3]; + if (!suser()) return -EACCES; + if (copy_from_user(args, (void *)arg, 3 * sizeof(int))) + return -EFAULT; + if (ide_register(args[0], args[1], args[2]) == -1) + return -EIO; + return 0; + } + RO_IOCTLS(inode->i_rdev, arg); default: -#ifdef CONFIG_BLK_DEV_IDECD - if (drive->media == ide_cdrom) - return ide_cdrom_ioctl(drive, inode, file, cmd, arg); -#endif /* CONFIG_BLK_DEV_IDECD */ -#ifdef CONFIG_BLK_DEV_IDETAPE - if (drive->media == ide_tape) - return idetape_blkdev_ioctl(drive, inode, file, cmd, arg); -#endif /* CONFIG_BLK_DEV_IDETAPE */ + if (drive->driver != NULL) + return DRIVER(drive)->ioctl(drive, inode, file, cmd, arg); return -EPERM; } } @@ -2229,12 +1971,8 @@ if ((drive = get_info_ptr(i_rdev)) == NULL) return -ENODEV; -#ifdef CONFIG_BLK_DEV_IDECD - if (drive->media == ide_cdrom) - return ide_cdrom_check_media_change (drive); -#endif /* CONFIG_BLK_DEV_IDECD */ - if (drive->removable) /* for disks */ - return 1; /* always assume it was changed */ + if (drive->driver != NULL) + return DRIVER(drive)->media_change(drive); return 0; } @@ -2265,506 +2003,18 @@ *p++ = '\0'; } -static inline void do_identify (ide_drive_t *drive, byte cmd) -{ - int bswap; - struct hd_driveid *id; - unsigned long capacity, check; - - id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_KERNEL); - ide_input_data(drive, id, SECTOR_WORDS);/* read 512 bytes of id info */ - sti(); - -#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) - /* - * EATA SCSI controllers do a hardware ATA emulation: - * Ignore them if there is a driver for them available. - */ - if ((id->model[0] == 'P' && id->model[1] == 'M') - || (id->model[0] == 'S' && id->model[1] == 'K')) { - printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model); - drive->present = 0; - return; - } -#endif - - /* - * WIN_IDENTIFY returns little-endian info, - * WIN_PIDENTIFY *usually* returns little-endian info. - */ - bswap = 1; - if (cmd == WIN_PIDENTIFY) { - if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */ - || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */ - || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */ - bswap = 0; /* Vertos drives may still be weird */ - } - ide_fixstring (id->model, sizeof(id->model), bswap); - ide_fixstring (id->fw_rev, sizeof(id->fw_rev), bswap); - ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap); - -#ifdef CONFIG_BLK_DEV_IDEATAPI - /* - * Check for an ATAPI device - */ - if (cmd == WIN_PIDENTIFY) { - byte type = (id->config >> 8) & 0x1f; - printk("%s: %s, ATAPI ", drive->name, id->model); -#ifdef CONFIG_BLK_DEV_PROMISE - if (HWIF(drive)->is_promise2) { - printk(" -- not supported on 2nd Promise port\n"); - drive->present = 0; - return; - } -#endif /* CONFIG_BLK_DEV_PROMISE */ - switch (type) { - case 0: /* Early cdrom models used zero */ - case 5: -#ifdef CONFIG_BLK_DEV_IDECD - printk ("CDROM drive\n"); - drive->media = ide_cdrom; - drive->present = 1; - drive->removable = 1; - return; -#else - printk ("CDROM "); - break; -#endif /* CONFIG_BLK_DEV_IDECD */ - case 1: -#ifdef CONFIG_BLK_DEV_IDETAPE - printk ("TAPE drive"); - if (idetape_identify_device (drive,id)) { - drive->media = ide_tape; - drive->present = 1; - drive->removable = 1; - if (drive->autotune != 2 && HWIF(drive)->dmaproc != NULL) { - if (!HWIF(drive)->dmaproc(ide_dma_check, drive)) - printk(", DMA"); - } - printk("\n"); - } - else { - drive->present = 0; - printk ("\nide-tape: the tape is not supported by this version of the driver\n"); - } - return; -#else - printk ("TAPE "); - break; -#endif /* CONFIG_BLK_DEV_IDETAPE */ - default: - drive->present = 0; - printk("Type %d - Unknown device\n", type); - return; - } - drive->present = 0; - printk("- not supported by this kernel\n"); - return; - } -#endif /* CONFIG_BLK_DEV_IDEATAPI */ - - /* check for removable disks (eg. SYQUEST), ignore 'WD' drives */ - if (id->config & (1<<7)) { /* removable disk ? */ - if (id->model[0] != 'W' || id->model[1] != 'D') - drive->removable = 1; - } - - /* SunDisk drives: treat as non-removable, force one unit */ - if (id->model[0] == 'S' && id->model[1] == 'u') { - drive->removable = 0; - if (drive->select.all & (1<<4)) { - drive->present = 0; - return; - } - } - - drive->media = ide_disk; - /* Extract geometry if we did not already have one for the drive */ - if (!drive->present) { - drive->present = 1; - drive->cyl = drive->bios_cyl = id->cyls; - drive->head = drive->bios_head = id->heads; - drive->sect = drive->bios_sect = id->sectors; - } - /* Handle logical geometry translation by the drive */ - if ((id->field_valid & 1) && id->cur_cyls && id->cur_heads - && (id->cur_heads <= 16) && id->cur_sectors) - { - /* - * Extract the physical drive geometry for our use. - * Note that we purposely do *not* update the bios info. - * This way, programs that use it (like fdisk) will - * still have the same logical view as the BIOS does, - * which keeps the partition table from being screwed. - * - * An exception to this is the cylinder count, - * which we reexamine later on to correct for 1024 limitations. - */ - drive->cyl = id->cur_cyls; - drive->head = id->cur_heads; - drive->sect = id->cur_sectors; - - /* check for word-swapped "capacity" field in id information */ - capacity = drive->cyl * drive->head * drive->sect; - check = (id->cur_capacity0 << 16) | id->cur_capacity1; - if (check == capacity) { /* was it swapped? */ - /* yes, bring it into little-endian order: */ - id->cur_capacity0 = (capacity >> 0) & 0xffff; - id->cur_capacity1 = (capacity >> 16) & 0xffff; - } - } - /* Use physical geometry if what we have still makes no sense */ - if ((!drive->head || drive->head > 16) && id->heads && id->heads <= 16) { - drive->cyl = id->cyls; - drive->head = id->heads; - drive->sect = id->sectors; - } - /* Correct the number of cyls if the bios value is too small */ - if (drive->sect == drive->bios_sect && drive->head == drive->bios_head) { - if (drive->cyl > drive->bios_cyl) - drive->bios_cyl = drive->cyl; - } - - (void) current_capacity (drive); /* initialize LBA selection */ - - printk ("%s: %.40s, %ldMB w/%dkB Cache, %sCHS=%d/%d/%d", - drive->name, id->model, current_capacity(drive)/2048L, id->buf_size/2, - drive->select.b.lba ? "LBA, " : "", - drive->bios_cyl, drive->bios_head, drive->bios_sect); - - drive->mult_count = 0; - if (id->max_multsect) { - drive->mult_req = INITIAL_MULT_COUNT; - if (drive->mult_req > id->max_multsect) - drive->mult_req = id->max_multsect; - if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect)) - drive->special.b.set_multmode = 1; - } - if (drive->autotune != 2 && HWIF(drive)->dmaproc != NULL) { - if (!(HWIF(drive)->dmaproc(ide_dma_check, drive))) - printk(", DMA"); - } - printk("\n"); -} - /* - * Delay for *at least* 50ms. As we don't know how much time is left - * until the next tick occurs, we wait an extra tick to be safe. - * This is used only during the probing/polling for drives at boot time. + * stridx() returns the offset of c within s, + * or -1 if c is '\0' or not found within s. */ -static void delay_50ms (void) +static int stridx (const char *s, char c) { - unsigned long timer = jiffies + ((HZ + 19)/20) + 1; - while (timer > jiffies); + char *i = strchr(s, c); + return (i && c) ? i - s : -1; } /* - * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive - * and waits for a response. It also monitors irqs while this is - * happening, in hope of automatically determining which one is - * being used by the interface. - * - * Returns: 0 device was identified - * 1 device timed-out (no response to identify request) - * 2 device aborted the command (refused to identify itself) - */ -static int try_to_identify (ide_drive_t *drive, byte cmd) -{ - int hd_status, rc; - unsigned long timeout; - int irqs = 0; - - if (!HWIF(drive)->irq) { /* already got an IRQ? */ - probe_irq_off(probe_irq_on()); /* clear dangling irqs */ - irqs = probe_irq_on(); /* start monitoring irqs */ - OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* enable device irq */ - } - - delay_50ms(); /* take a deep breath */ - if ((IN_BYTE(IDE_ALTSTATUS_REG) ^ IN_BYTE(IDE_STATUS_REG)) & ~INDEX_STAT) { - printk("%s: probing with STATUS instead of ALTSTATUS\n", drive->name); - hd_status = IDE_STATUS_REG; /* ancient Seagate drives */ - } else - hd_status = IDE_ALTSTATUS_REG; /* use non-intrusive polling */ - -#if CONFIG_BLK_DEV_PROMISE - if (IS_PROMISE_DRIVE) { - if (promise_cmd(drive,PROMISE_IDENTIFY)) { - if (irqs) - (void) probe_irq_off(irqs); - return 1; - } - } else -#endif /* CONFIG_BLK_DEV_PROMISE */ - OUT_BYTE(cmd,IDE_COMMAND_REG); /* ask drive for ID */ - timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - timeout += jiffies; - do { - if (jiffies > timeout) { - if (irqs) - (void) probe_irq_off(irqs); - return 1; /* drive timed-out */ - } - delay_50ms(); /* give drive a breather */ - } while (IN_BYTE(hd_status) & BUSY_STAT); - - delay_50ms(); /* wait for IRQ and DRQ_STAT */ - if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) { - unsigned long flags; - save_flags(flags); - cli(); /* some systems need this */ - do_identify(drive, cmd); /* drive returned ID */ - rc = 0; /* drive responded with ID */ - (void) GET_STAT(); /* clear drive IRQ */ - restore_flags(flags); - } else - rc = 2; /* drive refused ID */ - if (!HWIF(drive)->irq) { - irqs = probe_irq_off(irqs); /* get our irq number */ - if (irqs > 0) { - HWIF(drive)->irq = irqs; /* save it for later */ - irqs = probe_irq_on(); - OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* mask device irq */ - udelay(5); - (void) probe_irq_off(irqs); - (void) probe_irq_off(probe_irq_on()); /* clear self-inflicted irq */ - (void) GET_STAT(); /* clear drive IRQ */ - - } else { /* Mmmm.. multiple IRQs.. don't know which was ours */ - printk("%s: IRQ probe failed (%d)\n", drive->name, irqs); -#ifdef CONFIG_BLK_DEV_CMD640 -#ifdef CMD640_DUMP_REGS - if (HWIF(drive)->chipset == ide_cmd640) { - printk("%s: Hmmm.. probably a driver problem.\n", drive->name); - CMD640_DUMP_REGS; - } -#endif /* CMD640_DUMP_REGS */ -#endif /* CONFIG_BLK_DEV_CMD640 */ - } - } - return rc; -} - -/* - * do_probe() has the difficult job of finding a drive if it exists, - * without getting hung up if it doesn't exist, without trampling on - * ethernet cards, and without leaving any IRQs dangling to haunt us later. - * - * If a drive is "known" to exist (from CMOS or kernel parameters), - * but does not respond right away, the probe will "hang in there" - * for the maximum wait time (about 30 seconds), otherwise it will - * exit much more quickly. - * - * Returns: 0 device was identified - * 1 device timed-out (no response to identify request) - * 2 device aborted the command (refused to identify itself) - * 3 bad status from device (possible for ATAPI drives) - * 4 probe was not attempted because failure was obvious - */ -static int do_probe (ide_drive_t *drive, byte cmd) -{ - int rc; - ide_hwif_t *hwif = HWIF(drive); -#ifdef CONFIG_BLK_DEV_IDEATAPI - if (drive->present) { /* avoid waiting for inappropriate probes */ - if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY)) - return 4; - } -#endif /* CONFIG_BLK_DEV_IDEATAPI */ -#ifdef DEBUG - printk("probing for %s: present=%d, media=%d, probetype=%s\n", - drive->name, drive->present, drive->media, - (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI"); -#endif - SELECT_DRIVE(hwif,drive); - delay_50ms(); - if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) { - OUT_BYTE(0xa0,IDE_SELECT_REG); /* exit with drive0 selected */ - delay_50ms(); /* allow BUSY_STAT to assert & clear */ - return 3; /* no i/f present: avoid killing ethernet cards */ - } - - if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT) - || drive->present || cmd == WIN_PIDENTIFY) - { - if ((rc = try_to_identify(drive,cmd))) /* send cmd and wait */ - rc = try_to_identify(drive,cmd); /* failed: try again */ - if (rc == 1) - printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT()); - (void) GET_STAT(); /* ensure drive irq is clear */ - } else { - rc = 3; /* not present or maybe ATAPI */ - } - if (drive->select.b.unit != 0) { - OUT_BYTE(0xa0,IDE_SELECT_REG); /* exit with drive0 selected */ - delay_50ms(); - (void) GET_STAT(); /* ensure drive irq is clear */ - } - return rc; -} - -/* - * probe_for_drive() tests for existence of a given drive using do_probe(). - * - * Returns: 0 no device was found - * 1 device was found (note: drive->present might still be 0) - */ -static inline byte probe_for_drive (ide_drive_t *drive) -{ - if (drive->noprobe) /* skip probing? */ - return drive->present; - if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */ -#ifdef CONFIG_BLK_DEV_IDEATAPI - (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */ -#endif /* CONFIG_BLK_DEV_IDEATAPI */ - } - if (!drive->present) - return 0; /* drive not found */ - if (drive->id == NULL) { /* identification failed? */ - if (drive->media == ide_disk) { - printk ("%s: non-IDE drive, CHS=%d/%d/%d\n", - drive->name, drive->cyl, drive->head, drive->sect); - } -#ifdef CONFIG_BLK_DEV_IDECD - else if (drive->media == ide_cdrom) { - printk("%s: ATAPI cdrom (?)\n", drive->name); - } -#endif /* CONFIG_BLK_DEV_IDECD */ - else { - drive->present = 0; /* nuke it */ - } - } - return 1; /* drive was found */ -} - -/* - * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc - * controller that is BIOS compatible with ST-506, and thus showing up in our - * BIOS table, but not register compatible, and therefore not present in CMOS. - * - * Furthermore, we will assume that our ST-506 drives are the primary - * drives in the system -- the ones reflected as drive 1 or 2. The first - * drive is stored in the high nibble of CMOS byte 0x12, the second in the low - * nibble. This will be either a 4 bit drive type or 0xf indicating use byte - * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS. A non-zero value - * means we have an AT controller hard disk for that drive. - * - * Of course, there is no guarantee that either drive is actually on the - * "primary" IDE interface, but we don't bother trying to sort that out here. - * If a drive is not actually on the primary interface, then these parameters - * will be ignored. This results in the user having to supply the logical - * drive geometry as a boot parameter for each drive not on the primary i/f. - * - * The only "perfect" way to handle this would be to modify the setup.[cS] code - * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info - * for us during initialization. I have the necessary docs -- any takers? -ml - */ -static void probe_cmos_for_drives (ide_hwif_t *hwif) -{ -#ifdef __i386__ - extern struct drive_info_struct drive_info; - byte cmos_disks, *BIOS = (byte *) &drive_info; - int unit; - -#ifdef CONFIG_BLK_DEV_PROMISE - if (hwif->is_promise2) - return; -#endif /* CONFIG_BLK_DEV_PROMISE */ - outb_p(0x12,0x70); /* specify CMOS address 0x12 */ - cmos_disks = inb_p(0x71); /* read the data from 0x12 */ - /* Extract drive geometry from CMOS+BIOS if not already setup */ - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - if ((cmos_disks & (0xf0 >> (unit*4))) && !drive->present && !drive->nobios) { - drive->cyl = drive->bios_cyl = *(unsigned short *)BIOS; - drive->head = drive->bios_head = *(BIOS+2); - drive->sect = drive->bios_sect = *(BIOS+14); - drive->ctl = *(BIOS+8); - drive->present = 1; - } - BIOS += 16; - } -#endif -} - -/* - * This routine only knows how to look for drive units 0 and 1 - * on an interface, so any setting of MAX_DRIVES > 2 won't work here. - */ -static void probe_hwif (ide_hwif_t *hwif) -{ - unsigned int unit; - - if (hwif->noprobe) - return; - if (hwif->io_base == HD_DATA) - probe_cmos_for_drives (hwif); -#if CONFIG_BLK_DEV_PROMISE - if (!hwif->is_promise2 && - (check_region(hwif->io_base,8) || check_region(hwif->ctl_port,1))) { -#else - if (check_region(hwif->io_base,8) || check_region(hwif->ctl_port,1)) { -#endif /* CONFIG_BLK_DEV_PROMISE */ - int msgout = 0; - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - if (drive->present) { - drive->present = 0; - printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name); - msgout = 1; - } - } - if (!msgout) - printk("%s: ports already in use, skipping probe\n", hwif->name); - } else { - unsigned long flags; - save_flags(flags); - - sti(); /* needed for jiffies and irq probing */ - /* - * Second drive should only exist if first drive was found, - * but a lot of cdrom drives are configured as single slaves. - */ - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - (void) probe_for_drive (drive); - if (drive->present && drive->media == ide_disk) { - if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { - printk("%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", - drive->name, drive->head); - drive->present = 0; - } - } - if (drive->present && !hwif->present) { - hwif->present = 1; - request_region(hwif->io_base, 8, hwif->name); - request_region(hwif->ctl_port, 1, hwif->name); - } - } - restore_flags(flags); - for (unit = 0; unit < MAX_DRIVES; ++unit) { - ide_drive_t *drive = &hwif->drives[unit]; - if (drive->present && drive->media != ide_tape) { - ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc; - if (tuneproc != NULL && drive->autotune == 1) - tuneproc(drive, 255); /* auto-tune PIO mode */ - } - } - } -} - -/* - * stridx() returns the offset of c within s, - * or -1 if c is '\0' or not found within s. - */ -static int stridx (const char *s, char c) -{ - char *i = strchr(s, c); - return (i && c) ? i - s : -1; -} - -/* - * match_parm() does parsing for ide_setup(): + * match_parm() does parsing for ide_setup(): * * 1. the first char of s must be '='. * 2. if the remainder matches one of the supplied keywords, @@ -2837,7 +2087,7 @@ * and quite likely to cause trouble with * older/odd IDE drives. * - * "idebus=xx" : inform IDE driver of VESA/PCI bus speed in Mhz, + * "idebus=xx" : inform IDE driver of VESA/PCI bus speed in MHz, * where "xx" is between 20 and 66 inclusive, * used when tuning chipset PIO modes. * For PCI bus, 25 is correct for a P75 system, @@ -2892,7 +2142,7 @@ if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) { const char *hd_words[] = {"none", "noprobe", "nowerr", "cdrom", "serialize", "autotune", "noautotune", - "slow", NULL}; + "slow", "swapdata", NULL}; unit = s[2] - 'a'; hw = unit / MAX_DRIVES; unit = unit % MAX_DRIVES; @@ -2925,6 +2175,9 @@ case -8: /* "slow" */ drive->slow = 1; goto done; + case -9: /* swapdata */ + drive->bswap = 1; + goto done; case 3: /* cyl,head,sect */ drive->media = ide_disk; drive->cyl = drive->bios_cyl = vals[0]; @@ -2961,7 +2214,7 @@ * Be VERY CAREFUL changing this: note hardcoded indexes below */ const char *ide_words[] = {"noprobe", "serialize", "autotune", "noautotune", - "qd6580", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", NULL}; + "qd6580", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", "reset", NULL}; hw = s[3] - '0'; hwif = &ide_hwifs[hw]; i = match_parm(&s[4], ide_words, vals, 3); @@ -2969,7 +2222,7 @@ /* * Cryptic check to ensure chipset not already set for hwif: */ - if (i > 0 || i <= -5) { + if (i > 0 || (i <= -5 && i != -12)) { if (hwif->chipset != ide_unknown) goto bad_option; if (i <= -5) { @@ -2980,10 +2233,14 @@ */ if (hw != 0) goto bad_hwif; + printk("\n"); } } switch (i) { + case -12: /* "reset" */ + hwif->reset = 1; + goto done; #ifdef CONFIG_BLK_DEV_PROMISE case -11: /* "dc4030" */ { @@ -3062,8 +2319,8 @@ case 2: /* base,ctl */ vals[2] = 0; /* default irq = probe for it */ case 3: /* base,ctl,irq */ - hwif->io_base = vals[0]; - hwif->ctl_port = vals[1]; + ide_init_hwif_ports(hwif->io_ports, (ide_ioreg_t) vals[0], &hwif->irq); + hwif->io_ports[IDE_CONTROL_OFFSET] = (ide_ioreg_t) vals[1]; hwif->irq = vals[2]; hwif->noprobe = 0; hwif->chipset = ide_generic; @@ -3155,149 +2412,8 @@ return 1; } -#if MAX_HWIFS > 1 -/* - * save_match() is used to simplify logic in init_irq() below. - * - * A loophole here is that we may not know about a particular - * hwif's irq until after that hwif is actually probed/initialized.. - * This could be a problem for the case where an hwif is on a - * dual interface that requires serialization (eg. cmd640) and another - * hwif using one of the same irqs is initialized beforehand. - * - * This routine detects and reports such situations, but does not fix them. - */ -static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match) -{ - ide_hwif_t *m = *match; - - if (m && m->hwgroup && m->hwgroup != new->hwgroup) { - if (!new->hwgroup) - return; - printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name); - } - if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */ - *match = new; -} -#endif /* MAX_HWIFS > 1 */ - -/* - * This routine sets up the irq for an ide interface, and creates a new - * hwgroup for the irq/hwif if none was previously assigned. - * - * Much of the code is for correctly detecting/handling irq sharing - * and irq serialization situations. This is somewhat complex because - * it handles static as well as dynamic (PCMCIA) IDE interfaces. - * - * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with - * interrupts completely disabled. This can be bad for interrupt latency, - * but anything else has led to problems on some machines. We re-enable - * interrupts as much as we can safely do in most places. - */ -static int init_irq (ide_hwif_t *hwif) -{ - unsigned long flags; -#if MAX_HWIFS > 1 - unsigned int index; -#endif /* MAX_HWIFS > 1 */ - ide_hwgroup_t *hwgroup; - ide_hwif_t *match = NULL; - - save_flags(flags); - cli(); - - hwif->hwgroup = NULL; -#if MAX_HWIFS > 1 - /* - * Group up with any other hwifs that share our irq(s). - */ - for (index = 0; index < MAX_HWIFS; index++) { - ide_hwif_t *h = &ide_hwifs[index]; - if (h->hwgroup) { /* scan only initialized hwif's */ - if (hwif->irq == h->irq) { - hwif->sharing_irq = h->sharing_irq = 1; - save_match(hwif, h, &match); - } - if (hwif->serialized) { - ide_hwif_t *mate = &ide_hwifs[hwif->index^1]; - if (index == mate->index || h->irq == mate->irq) - save_match(hwif, h, &match); - } - if (h->serialized) { - ide_hwif_t *mate = &ide_hwifs[h->index^1]; - if (hwif->irq == mate->irq) - save_match(hwif, h, &match); - } - } - } -#endif /* MAX_HWIFS > 1 */ - /* - * If we are still without a hwgroup, then form a new one - */ - if (match) { - hwgroup = match->hwgroup; - } else { - hwgroup = kmalloc(sizeof(ide_hwgroup_t), GFP_KERNEL); - hwgroup->hwif = hwgroup->next_hwif = hwif->next = hwif; - hwgroup->rq = NULL; - hwgroup->handler = NULL; - if (hwif->drives[0].present) - hwgroup->drive = &hwif->drives[0]; - else - hwgroup->drive = &hwif->drives[1]; - hwgroup->poll_timeout = 0; - init_timer(&hwgroup->timer); - hwgroup->timer.function = &timer_expiry; - hwgroup->timer.data = (unsigned long) hwgroup; - } - - /* - * Allocate the irq, if not already obtained for another hwif - */ - if (!match || match->irq != hwif->irq) { - if (request_irq(hwif->irq, ide_intr, SA_INTERRUPT, hwif->name, hwgroup)) { - if (!match) - kfree(hwgroup); - restore_flags(flags); - return 1; - } - } - - /* - * Everything is okay, so link us into the hwgroup - */ - hwif->hwgroup = hwgroup; - hwif->next = hwgroup->hwif->next; - hwgroup->hwif->next = hwif; - - restore_flags(flags); /* safe now that hwif->hwgroup is set up */ - - printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name, - hwif->io_base, hwif->io_base+7, hwif->ctl_port, hwif->irq); - if (match) - printk(" (%sed with %s)", hwif->sharing_irq ? "shar" : "serializ", match->name); - printk("\n"); - return 0; -} - -static struct file_operations ide_fops = { - NULL, /* lseek - default */ - block_read, /* read - general block-dev read */ - block_write, /* write - general block-dev write */ - NULL, /* readdir - bad */ - NULL, /* select */ - ide_ioctl, /* ioctl */ - NULL, /* mmap */ - ide_open, /* open */ - ide_release, /* release */ - block_fsync /* fsync */ - ,NULL, /* fasync */ - ide_check_media_change, /* check_media_change */ - revalidate_disk /* revalidate */ -}; - #ifdef CONFIG_PCI -#if defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) +#if defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) || defined(CONFIG_BLK_DEV_OPTI621) typedef void (ide_pci_init_proc_t)(byte, byte); @@ -3319,7 +2435,7 @@ restore_flags(flags); } -#endif /* defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) */ +#endif /* defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) || defined(CONFIG_BLK_DEV_OPTI621) */ #endif /* CONFIG_PCI */ /* @@ -3350,6 +2466,9 @@ ide_probe_pci (PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371_0, &ide_init_triton, 1); ide_probe_pci (PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_1, &ide_init_triton, 0); #endif /* CONFIG_BLK_DEV_TRITON */ +#ifdef CONFIG_BLK_DEV_OPTI621 + ide_probe_pci (PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C621, &ide_init_opti621, 0); +#endif /* CONFIG_BLK_DEV_OPTI621 */ } #endif /* CONFIG_PCI */ #ifdef CONFIG_BLK_DEV_CMD640 @@ -3363,196 +2482,287 @@ #endif } -static int hwif_init (int h) +void ide_init_builtin_drivers (void) { - ide_hwif_t *hwif = &ide_hwifs[h]; - void (*rfn)(void); - - if (!hwif->present) - return 0; - if (!hwif->irq) { - if (!(hwif->irq = default_irqs[h])) { - printk("%s: DISABLED, NO IRQ\n", hwif->name); - return (hwif->present = 0); - } - } -#ifdef CONFIG_BLK_DEV_HD - if (hwif->irq == HD_IRQ && hwif->io_base != HD_DATA) { - printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name); - return (hwif->present = 0); - } -#endif /* CONFIG_BLK_DEV_HD */ - - hwif->present = 0; /* we set it back to 1 if all is ok below */ - switch (hwif->major) { - case IDE0_MAJOR: rfn = &do_ide0_request; break; -#if MAX_HWIFS > 1 - case IDE1_MAJOR: rfn = &do_ide1_request; break; -#endif -#if MAX_HWIFS > 2 - case IDE2_MAJOR: rfn = &do_ide2_request; break; -#endif -#if MAX_HWIFS > 3 - case IDE3_MAJOR: rfn = &do_ide3_request; break; -#endif - default: - printk("%s: request_fn NOT DEFINED\n", hwif->name); - return (hwif->present = 0); - } - if (register_blkdev (hwif->major, hwif->name, &ide_fops)) { - printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major); - } else if (init_irq (hwif)) { - printk("%s: UNABLE TO GET IRQ %d\n", hwif->name, hwif->irq); - (void) unregister_blkdev (hwif->major, hwif->name); - } else { - init_gendisk(hwif); - blk_dev[hwif->major].request_fn = rfn; - read_ahead[hwif->major] = 8; /* (4kB) */ - hwif->present = 1; /* success */ - } - return hwif->present; -} - -/* - * This is gets invoked once during initialization, to set *everything* up - */ -int ide_init (void) -{ - int index; - - init_ide_data (); /* * Probe for special "known" interface chipsets */ probe_for_hwifs (); - /* - * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports - */ - for (index = 0; index < MAX_HWIFS; ++index) - probe_hwif (&ide_hwifs[index]); - for (index = 0; index < MAX_HWIFS; ++index) - hwif_init (index); - +#ifdef CONFIG_BLK_DEV_IDE +#ifdef __mc68000__ + if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { + ide_get_lock(&ide_lock, ide_intr, NULL); + disable_irq(ide_hwifs[0].irq); + } +#endif /* __mc68000__ */ + + (void) ideprobe_init(); + +#ifdef __mc68000__ + if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { + enable_irq(ide_hwifs[0].irq); + ide_release_lock(&ide_lock); + } +#endif /* __mc68000__ */ +#endif /* CONFIG_BLK_DEV_IDE */ + +#ifdef CONFIG_BLK_DEV_IDEDISK + (void) idedisk_init(); +#endif /* CONFIG_BLK_DEV_IDEDISK */ +#ifdef CONFIG_BLK_DEV_IDECD + (void) ide_cdrom_init(); +#endif /* CONFIG_BLK_DEV_IDECD */ #ifdef CONFIG_BLK_DEV_IDETAPE - idetape_register_chrdev(); /* Register character device interface to the ide tape */ + (void) idetape_init(); #endif /* CONFIG_BLK_DEV_IDETAPE */ - - return 0; +#ifdef CONFIG_BLK_DEV_IDEFLOPPY + (void) idefloppy_init(); +#endif /* CONFIG_BLK_DEV_IDEFLOPPY */ } -#ifdef CONFIG_BLK_DEV_IDE_PCMCIA -int ide_register(int io_base, int ctl_port, int irq) +static int default_cleanup (ide_drive_t *drive) { - int index, i, rc = -1; - ide_hwif_t *hwif; + return ide_unregister_subdriver(drive); +} + +static void default_do_request(ide_drive_t *drive, struct request *rq, unsigned long block) +{ + ide_end_request(0, HWGROUP(drive)); +} + +static void default_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +{ + ide_end_request(uptodate, hwgroup); +} + +static int default_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return -EIO; +} + +static int default_open (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + drive->usage--; + return -EIO; +} + +static void default_release (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ +} + +static int default_check_media_change (ide_drive_t *drive) +{ + return 1; +} + +static void default_pre_reset (ide_drive_t *drive) +{ +} + +static unsigned long default_capacity (ide_drive_t *drive) +{ + return 0x7fffffff; /* cdrom or tape */ +} + +static void default_special (ide_drive_t *drive) +{ + special_t *s = &drive->special; + + s->all = 0; + drive->mult_req = 0; +} + +static void setup_driver_defaults (ide_drive_t *drive) +{ + ide_driver_t *d = drive->driver; + + if (d->cleanup == NULL) d->cleanup = default_cleanup; + if (d->do_request == NULL) d->do_request = default_do_request; + if (d->end_request == NULL) d->end_request = default_end_request; + if (d->ioctl == NULL) d->ioctl = default_ioctl; + if (d->open == NULL) d->open = default_open; + if (d->release == NULL) d->release = default_release; + if (d->media_change == NULL) d->media_change = default_check_media_change; + if (d->pre_reset == NULL) d->pre_reset = default_pre_reset; + if (d->capacity == NULL) d->capacity = default_capacity; + if (d->special == NULL) d->special = default_special; +} + +ide_drive_t *ide_scan_devices (byte media, ide_driver_t *driver, int n) +{ + unsigned int unit, index, i; ide_drive_t *drive; - unsigned long flags; + for (index = 0, i = 0; index < MAX_HWIFS; ++index) { + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &ide_hwifs[index].drives[unit]; + if (drive->present && drive->media == media && + drive->driver == driver && ++i > n) + return drive; + } + } + return NULL; +} + +int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version) +{ + unsigned long flags; + save_flags(flags); cli(); - for (index = 0; index < MAX_HWIFS; ++index) { - hwif = &ide_hwifs[index]; - if (hwif->present) { - if (hwif->io_base == io_base || hwif->ctl_port == ctl_port) - break; /* this ide port already exists */ - } else { - hwif->io_base = io_base; - hwif->ctl_port = ctl_port; - hwif->irq = irq; - hwif->noprobe = 0; - probe_hwif(hwif); - if (!hwif_init(index)) - break; - for (i = 0; i < hwif->gd->nr_real; i++) { - drive = &hwif->drives[i]; - revalidate_disk(MKDEV(hwif->major, i<present && drive->media == ide_cdrom) - ide_cdrom_setup(drive); -#endif /* CONFIG_BLK_DEV_IDECD */ - } - rc = index; - break; - } + if (version != IDE_SUBDRIVER_VERSION || !drive->present || drive->driver != NULL || + drive->busy || drive->usage || drive->media != driver->media) { + restore_flags(flags); + return 1; } + drive->driver = driver; + setup_driver_defaults(drive); restore_flags(flags); - return rc; + if (driver->supports_dma && !drive->using_dma && drive->autotune != 2 && HWIF(drive)->dmaproc != NULL) + (void) (HWIF(drive)->dmaproc(ide_dma_check, drive)); + drive->revalidate = 1; + return 0; } -void ide_unregister (unsigned int index) +int ide_unregister_subdriver (ide_drive_t *drive) { - struct gendisk *gd, **gdp; - ide_hwif_t *hwif, *g; - ide_hwgroup_t *hwgroup; - int irq_count = 0; unsigned long flags; - - if (index >= MAX_HWIFS) - return; + save_flags(flags); cli(); - hwif = &ide_hwifs[index]; - if (!hwif->present || hwif->drives[0].busy || hwif->drives[1].busy) { + if (drive->usage || drive->busy || drive->driver == NULL || DRIVER(drive)->busy) { restore_flags(flags); - return; + return 1; } - hwif->present = 0; - hwgroup = hwif->hwgroup; + drive->driver = NULL; + restore_flags(flags); + return 0; +} - /* - * free the irq if we were the only hwif using it - */ - g = hwgroup->hwif; - do { - if (g->irq == hwif->irq) - ++irq_count; - g = g->next; - } while (g != hwgroup->hwif); - if (irq_count == 1) - free_irq(hwif->irq, hwgroup); +int ide_register_module (ide_module_t *module) +{ + ide_module_t *p = ide_modules; - /* - * Note that we only release the standard ports, - * and do not even try to handle any extra ports - * allocated for weird IDE interface chipsets. - */ - release_region(hwif->io_base, 8); - release_region(hwif->ctl_port, 1); + while (p) { + if (p == module) + return 1; + p = p->next; + } + module->next = ide_modules; + ide_modules = module; + revalidate_drives(); + return 0; +} + +void ide_unregister_module (ide_module_t *module) +{ + ide_module_t **p; + + for (p = &ide_modules; (*p) && (*p) != module; p = &((*p)->next)); + if (*p) + *p = (*p)->next; +} + +struct file_operations ide_fops[] = {{ + NULL, /* lseek - default */ + block_read, /* read - general block-dev read */ + block_write, /* write - general block-dev write */ + NULL, /* readdir - bad */ + NULL, /* select */ + ide_ioctl, /* ioctl */ + NULL, /* mmap */ + ide_open, /* open */ + ide_release, /* release */ + block_fsync, /* fsync */ + NULL, /* fasync */ + ide_check_media_change, /* check_media_change */ + ide_revalidate_disk /* revalidate */ +}}; + +static struct symbol_table ide_syms = { +#include + X(ide_hwifs), + X(ide_register_module), X(ide_unregister_module), /* - * Remove us from the hwgroup, and free - * the hwgroup if we were the only member + * Probe module */ - while (hwgroup->hwif->next != hwif) - hwgroup->hwif = hwgroup->hwif->next; - hwgroup->hwif->next = hwif->next; - if (hwgroup->hwif == hwif) - hwgroup->hwif = hwif->next; - if (hwgroup->next_hwif == hwif) - hwgroup->next_hwif = hwif->next; - if (hwgroup->hwif == hwif) - kfree(hwgroup); + X(ide_timer_expiry), X(ide_intr), + X(ide_geninit), X(ide_fops), + X(do_ide0_request), +#if MAX_HWIFS > 1 + X(do_ide1_request), +#endif /* MAX_HWIFS > 1 */ +#if MAX_HWIFS > 2 + X(do_ide2_request), +#endif /* MAX_HWIFS > 2 */ +#if MAX_HWIFS > 3 + X(do_ide3_request), +#endif /* MAX_HWIFS > 3 */ /* - * Remove us from the kernel's knowledge + * Driver module */ - unregister_blkdev(hwif->major, hwif->name); - kfree(blksize_size[hwif->major]); - blk_dev[hwif->major].request_fn = NULL; - blksize_size[hwif->major] = NULL; - for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) - if (*gdp == hwif->gd) - break; - if (*gdp == NULL) - printk("gd not in disk chain!\n"); - else { - gd = *gdp; *gdp = gd->next; - kfree(gd->sizes); - kfree(gd->part); - kfree(gd); + X(ide_scan_devices), X(ide_register_subdriver), + X(ide_unregister_subdriver), X(ide_input_data), + X(ide_output_data), X(atapi_input_bytes), + X(atapi_output_bytes), X(ide_set_handler), + X(ide_dump_status), X(ide_error), + X(ide_fixstring), X(ide_wait_stat), + X(ide_do_reset), X(ide_init_drive_cmd), + X(ide_do_drive_cmd), X(ide_end_drive_cmd), + X(ide_end_request), X(ide_revalidate_disk), + X(ide_cmd), + + X(ide_register), X(ide_unregister), +#include +}; + +/* + * This is gets invoked once during initialization, to set *everything* up + */ +int ide_init (void) +{ + init_ide_data (); + + initializing = 1; + ide_init_builtin_drivers(); + initializing = 0; + + (void) register_symtab(&ide_syms); + return 0; +} + +#ifdef MODULE +char *options = NULL; + +static void parse_options (char *line) +{ + char *next = line; + + if (line == NULL || !*line) + return; + while ((line = next) != NULL) { + if ((next = strchr(line,' ')) != NULL) + *next++ = 0; + if (!strncmp(line,"ide",3) || (!strncmp(line,"hd",2) && line[2] != '=')) + ide_setup(line); } - init_hwif_data (index); /* restore hwif data to pristine status */ - restore_flags(flags); } -#endif /* CONFIG_BLK_DEV_IDE_PCMCIA */ + +int init_module (void) +{ + parse_options(options); + return ide_init(); +} + +void cleanup_module (void) +{ + int index; + + for (index = 0; index < MAX_HWIFS; ++index) + ide_unregister(index); +} +#endif /* MODULE */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/ide.h linux/drivers/block/ide.h --- v2.1.7/linux/drivers/block/ide.h Mon Sep 30 11:20:10 1996 +++ linux/drivers/block/ide.h Thu Nov 7 19:52:18 1996 @@ -1,10 +1,11 @@ /* * linux/drivers/block/ide.h * - * Copyright (C) 1994, 1995 Linus Torvalds & authors + * Copyright (C) 1994-1996 Linus Torvalds & authors */ #include +#include /* * This is the multiple IDE interface driver, as evolved from hd.c. @@ -51,10 +52,6 @@ #endif #endif /* CONFIG_BLK_DEV_CMD640 */ -#if defined(CONFIG_BLK_DEV_IDECD) || defined(CONFIG_BLK_DEV_IDETAPE) -#define CONFIG_BLK_DEV_IDEATAPI 1 -#endif - /* * IDE_DRIVE_CMD is used to implement many features of the hdparm utility */ @@ -80,12 +77,13 @@ #undef REALLY_FAST_IO #endif +#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) +#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup)) + /* * Definitions for accessing IDE controller registers */ - -#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) -#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup)) +#define IDE_NR_PORTS (10) #define IDE_DATA_OFFSET (0) #define IDE_ERROR_OFFSET (1) @@ -95,21 +93,29 @@ #define IDE_HCYL_OFFSET (5) #define IDE_SELECT_OFFSET (6) #define IDE_STATUS_OFFSET (7) +#define IDE_CONTROL_OFFSET (8) +#define IDE_IRQ_OFFSET (9) + #define IDE_FEATURE_OFFSET IDE_ERROR_OFFSET #define IDE_COMMAND_OFFSET IDE_STATUS_OFFSET -#define IDE_DATA_REG (HWIF(drive)->io_base+IDE_DATA_OFFSET) -#define IDE_ERROR_REG (HWIF(drive)->io_base+IDE_ERROR_OFFSET) -#define IDE_NSECTOR_REG (HWIF(drive)->io_base+IDE_NSECTOR_OFFSET) -#define IDE_SECTOR_REG (HWIF(drive)->io_base+IDE_SECTOR_OFFSET) -#define IDE_LCYL_REG (HWIF(drive)->io_base+IDE_LCYL_OFFSET) -#define IDE_HCYL_REG (HWIF(drive)->io_base+IDE_HCYL_OFFSET) -#define IDE_SELECT_REG (HWIF(drive)->io_base+IDE_SELECT_OFFSET) -#define IDE_STATUS_REG (HWIF(drive)->io_base+IDE_STATUS_OFFSET) -#define IDE_CONTROL_REG (HWIF(drive)->ctl_port) +#define IDE_DATA_REG (HWIF(drive)->io_ports[IDE_DATA_OFFSET]) +#define IDE_ERROR_REG (HWIF(drive)->io_ports[IDE_ERROR_OFFSET]) +#define IDE_NSECTOR_REG (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET]) +#define IDE_SECTOR_REG (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET]) +#define IDE_LCYL_REG (HWIF(drive)->io_ports[IDE_LCYL_OFFSET]) +#define IDE_HCYL_REG (HWIF(drive)->io_ports[IDE_HCYL_OFFSET]) +#define IDE_SELECT_REG (HWIF(drive)->io_ports[IDE_SELECT_OFFSET]) +#define IDE_STATUS_REG (HWIF(drive)->io_ports[IDE_STATUS_OFFSET]) +#define IDE_CONTROL_REG (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]) +#define IDE_IRQ_REG (HWIF(drive)->io_ports[IDE_IRQ_OFFSET]) + #define IDE_FEATURE_REG IDE_ERROR_REG #define IDE_COMMAND_REG IDE_STATUS_REG #define IDE_ALTSTATUS_REG IDE_CONTROL_REG +#define IDE_IREASON_REG IDE_NSECTOR_REG +#define IDE_BCOUNTL_REG IDE_LCYL_REG +#define IDE_BCOUNTH_REG IDE_HCYL_REG #ifdef REALLY_FAST_IO #define OUT_BYTE(b,p) outb((b),(p)) @@ -136,9 +142,6 @@ #define PARTN_BITS 6 /* number of minor dev bits for partitions */ #define PARTN_MASK ((1<selectproc) \ hwif->selectproc(drive); \ else \ - OUT_BYTE((drive)->select.all, hwif->io_base+IDE_SELECT_OFFSET); \ + OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); \ } #else -#define SELECT_DRIVE(hwif,drive) OUT_BYTE((drive)->select.all, hwif->io_base+IDE_SELECT_OFFSET); +#define SELECT_DRIVE(hwif,drive) OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); #endif /* CONFIG_BLK_DEV_HT6560B || CONFIG_BLK_DEV_PROMISE */ -#ifdef CONFIG_BLK_DEV_IDETAPE -#include "ide-tape.h" -#endif /* CONFIG_BLK_DEV_IDETAPE */ - -#ifdef CONFIG_BLK_DEV_IDECD - -struct atapi_request_sense { - unsigned char error_code : 7; - unsigned char valid : 1; - byte reserved1; - unsigned char sense_key : 4; - unsigned char reserved2 : 1; - unsigned char ili : 1; - unsigned char reserved3 : 2; - byte info[4]; - byte sense_len; - byte command_info[4]; - byte asc; - byte ascq; - byte fru; - byte sense_key_specific[3]; -}; - -struct packet_command { - char *buffer; - int buflen; - int stat; - struct atapi_request_sense *sense_data; - unsigned char c[12]; -}; - - -/* Structure of a MSF cdrom address. */ -struct atapi_msf { - byte reserved; - byte minute; - byte second; - byte frame; -}; - - -/* Space to hold the disk TOC. */ - -#define MAX_TRACKS 99 -struct atapi_toc_header { - unsigned short toc_length; - byte first_track; - byte last_track; -}; - -struct atapi_toc_entry { - byte reserved1; - unsigned control : 4; - unsigned adr : 4; - byte track; - byte reserved2; - union { - unsigned lba; - struct atapi_msf msf; - } addr; -}; - -struct atapi_toc { - int last_session_lba; - int xa_flag; - unsigned capacity; - struct atapi_toc_header hdr; - struct atapi_toc_entry ent[MAX_TRACKS+1]; - /* One extra for the leadout. */ -}; - - -/* This structure is annoyingly close to, but not identical with, - the cdrom_subchnl structure from cdrom.h. */ -struct atapi_cdrom_subchnl -{ - u_char acdsc_reserved; - u_char acdsc_audiostatus; - u_short acdsc_length; - u_char acdsc_format; - - u_char acdsc_adr: 4; - u_char acdsc_ctrl: 4; - u_char acdsc_trk; - u_char acdsc_ind; - union { - struct atapi_msf msf; - int lba; - } acdsc_absaddr; - union { - struct atapi_msf msf; - int lba; - } acdsc_reladdr; -}; - - -/* Extra per-device info for cdrom drives. */ -struct cdrom_info { - - /* Buffer for table of contents. NULL if we haven't allocated - a TOC buffer for this device yet. */ - - struct atapi_toc *toc; - - /* Sector buffer. If a read request wants only the first part - of a cdrom block, we cache the rest of the block here, - in the expectation that that data is going to be wanted soon. - SECTOR_BUFFERED is the number of the first buffered sector, - and NSECTORS_BUFFERED is the number of sectors in the buffer. - Before the buffer is allocated, we should have - SECTOR_BUFFER == NULL and NSECTORS_BUFFERED == 0. */ - - unsigned long sector_buffered; - unsigned long nsectors_buffered; - char *sector_buffer; - - /* The result of the last successful request sense command - on this device. */ - struct atapi_request_sense sense_data; -}; - -#endif /* CONFIG_BLK_DEV_IDECD */ - /* * Now for the data we need to maintain per-drive: ide_drive_t */ -typedef enum {ide_disk, ide_cdrom, ide_tape} ide_media_t; +#define ide_disk 0x20 +#define ide_cdrom 0x5 +#define ide_tape 0x1 +#define ide_floppy 0x0 typedef union { unsigned all : 8; /* all of the bits together */ @@ -306,17 +189,6 @@ } b; } special_t; -typedef union { - unsigned all : 8; /* all of the bits together */ - struct { - unsigned head : 4; /* always zeros here */ - unsigned unit : 1; /* drive select number, 0 or 1 */ - unsigned bit5 : 1; /* always 1 */ - unsigned lba : 1; /* using LBA instead of CHS */ - unsigned bit7 : 1; /* always 1 */ - } b; - } select_t; - typedef struct ide_drive_s { special_t special; /* special action flags */ unsigned present : 1; /* drive is physically present */ @@ -332,10 +204,12 @@ unsigned nobios : 1; /* flag: do not probe bios for drive */ unsigned slow : 1; /* flag: slow data port */ unsigned autotune : 2; /* 1=autotune, 2=noautotune, 0=default */ + unsigned revalidate : 1; /* request revalidation */ + unsigned bswap : 1; /* flag: byte swap data */ #if FAKE_FDISK_FOR_EZDRIVE unsigned remap_0_to_1 : 1; /* flag: partitioned with ezdrive */ #endif /* FAKE_FDISK_FOR_EZDRIVE */ - ide_media_t media; /* disk, cdrom, tape */ + byte media; /* disk, cdrom, tape, floppy, ... */ select_t select; /* basic drive/head select reg value */ byte ctl; /* "normal" value for IDE_CONTROL_REG */ byte ready_stat; /* min status value for drive ready */ @@ -357,12 +231,8 @@ struct hd_driveid *id; /* drive model identification info */ struct hd_struct *part; /* drive partition table */ char name[4]; /* drive name, such as "hda" */ -#ifdef CONFIG_BLK_DEV_IDECD - struct cdrom_info cdrom_info; /* for ide-cd.c */ -#endif /* CONFIG_BLK_DEV_IDECD */ -#ifdef CONFIG_BLK_DEV_IDETAPE - idetape_tape_t tape; /* for ide-tape.c */ -#endif /* CONFIG_BLK_DEV_IDETAPE */ + void *driver; /* (ide_driver_t *) */ + void *driver_data; /* extra driver data */ } ide_drive_t; /* @@ -416,8 +286,7 @@ typedef struct hwif_s { struct hwif_s *next; /* for linked-list in ide_hwgroup_t */ void *hwgroup; /* actually (ide_hwgroup_t *) */ - unsigned short io_base; /* base io port addr */ - unsigned short ctl_port; /* usually io_base+0x206 */ + ide_ioreg_t io_ports[IDE_NR_PORTS]; /* task file registers */ ide_drive_t drives[MAX_DRIVES]; /* drive info */ struct gendisk *gd; /* gendisk structure */ ide_tuneproc_t *tuneproc; /* routine to tune PIO mode for drives */ @@ -427,9 +296,9 @@ ide_dmaproc_t *dmaproc; /* dma read/write/abort routine */ unsigned long *dmatable; /* dma physical region descriptor table */ unsigned short dma_base; /* base addr for dma ports (triton) */ - byte irq; /* our irq number */ + int irq; /* our irq number */ byte major; /* our major number */ - char name[5]; /* name of interface, eg. "ide0" */ + char name[6]; /* name of interface, eg. "ide0" */ byte index; /* 0 for ide0; 1 for ide1; ... */ hwif_chipset_t chipset; /* sub-module for tuning.. */ unsigned noprobe : 1; /* don't probe for this interface */ @@ -439,16 +308,10 @@ #ifdef CONFIG_BLK_DEV_PROMISE unsigned is_promise2: 1; /* 2nd i/f on promise DC4030 */ #endif /* CONFIG_BLK_DEV_PROMISE */ + unsigned reset : 1; /* reset after probe */ #if (DISK_RECOVERY_TIME > 0) unsigned long last_time; /* time when previous rq was done */ #endif -#ifdef CONFIG_BLK_DEV_IDECD - struct request request_sense_request; /* from ide-cd.c */ - struct packet_command request_sense_pc; /* from ide-cd.c */ -#endif /* CONFIG_BLK_DEV_IDECD */ -#ifdef CONFIG_BLK_DEV_IDETAPE - ide_drive_t *tape_drive; /* Pointer to the tape on this interface */ -#endif /* CONFIG_BLK_DEV_IDETAPE */ } ide_hwif_t; /* @@ -468,6 +331,55 @@ } ide_hwgroup_t; /* + * Subdrivers support. + */ +#define IDE_SUBDRIVER_VERSION 0 + +typedef int (ide_cleanup_proc)(ide_drive_t *); +typedef void (ide_do_request_proc)(ide_drive_t *, struct request *, unsigned long); +typedef void (ide_end_request_proc)(byte, ide_hwgroup_t *); +typedef int (ide_ioctl_proc)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long); +typedef int (ide_open_proc)(struct inode *, struct file *, ide_drive_t *); +typedef void (ide_release_proc)(struct inode *, struct file *, ide_drive_t *); +typedef int (ide_check_media_change_proc)(ide_drive_t *); +typedef void (ide_pre_reset_proc)(ide_drive_t *); +typedef unsigned long (ide_capacity_proc)(ide_drive_t *); +typedef void (ide_special_proc)(ide_drive_t *); + +typedef struct ide_driver_s { + byte media; + unsigned busy : 1; + unsigned supports_dma : 1; + ide_cleanup_proc *cleanup; + ide_do_request_proc *do_request; + ide_end_request_proc *end_request; + ide_ioctl_proc *ioctl; + ide_open_proc *open; + ide_release_proc *release; + ide_check_media_change_proc *media_change; + ide_pre_reset_proc *pre_reset; + ide_capacity_proc *capacity; + ide_special_proc *special; + } ide_driver_t; + +#define DRIVER(drive) ((ide_driver_t *)((drive)->driver)) + +/* + * IDE modules. + */ +#define IDE_CHIPSET_MODULE 0 /* not supported yet */ +#define IDE_PROBE_MODULE 1 +#define IDE_DRIVER_MODULE 2 + +typedef int (ide_module_init_proc)(void); + +typedef struct ide_module_s { + int type; + ide_module_init_proc *init; + struct ide_module_s *next; +} ide_module_t; + +/* * ide_hwifs[] is the master data structure used to keep track * of just about everything in ide.c. Whenever possible, routines * should be using pointers to a drive (ide_drive_t *) or @@ -485,22 +397,17 @@ #define IDE_DRIVER /* "parameter" for blk.h */ #include -#if (DISK_RECOVERY_TIME > 0) -void ide_set_recovery_timer (ide_hwif_t *); -#define SET_RECOVERY_TIMER(drive) ide_set_recovery_timer (drive) -#else -#define SET_RECOVERY_TIMER(drive) -#endif - /* - * This is used for (nearly) all data transfers from the IDE interface + * This is used for (nearly) all data transfers from/to the IDE interface */ void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount); +void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount); /* - * This is used for (nearly) all data transfers to the IDE interface + * This is used for (nearly) all ATAPI data transfers from/to the IDE interface */ -void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount); +void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount); +void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount); /* * This is used on exit from the driver, to designate the next irq handler @@ -520,6 +427,12 @@ void ide_error (ide_drive_t *drive, const char *msg, byte stat); /* + * Issue a simple drive command + * The drive must be selected beforehand. + */ +void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler); + +/* * ide_fixstring() cleans up and (optionally) byte-swaps a text string, * removing leading/trailing blanks and compressing internal blanks. * It is primarily used to tidy up the model name/number fields as @@ -611,7 +524,7 @@ /* * ide_system_bus_speed() returns what we think is the system VESA/PCI - * bus speed (in Mhz). This is used for calculating interface PIO timings. + * bus speed (in MHz). This is used for calculating interface PIO timings. * The default is 40 for known PCI systems, 50 otherwise. * The "idebus=xx" parameter can be used to override this value. */ @@ -623,76 +536,61 @@ */ void ide_multwrite (ide_drive_t *drive, unsigned int mcount); -#ifdef CONFIG_BLK_DEV_IDECD -/* - * These are routines in ide-cd.c invoked from ide.c - */ -void ide_do_rw_cdrom (ide_drive_t *, unsigned long); -int ide_cdrom_ioctl (ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long); -int ide_cdrom_check_media_change (ide_drive_t *); -int ide_cdrom_open (struct inode *, struct file *, ide_drive_t *); -void ide_cdrom_release (struct inode *, struct file *, ide_drive_t *); -void ide_cdrom_setup (ide_drive_t *); -#endif /* CONFIG_BLK_DEV_IDECD */ - -#ifdef CONFIG_BLK_DEV_IDETAPE - -/* - * Functions in ide-tape.c which are invoked from ide.c: - */ - -/* - * idetape_identify_device is called during device probing stage to - * probe for an ide atapi tape drive and to initialize global variables - * in ide-tape.c which provide the link between the character device - * and the corresponding block device. - * - * Returns 1 if an ide tape was detected and is supported. - * Returns 0 otherwise. - */ - -int idetape_identify_device (ide_drive_t *drive,struct hd_driveid *id); - -/* - * idetape_setup is called a bit later than idetape_identify_device, - * during the search for disk partitions, to initialize various tape - * state variables in ide_drive_t *drive. - */ - -void idetape_setup (ide_drive_t *drive); - -/* - * idetape_do_request is our request function. It is called by ide.c - * to process a new request. - */ - -void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long block); +void ide_revalidate_drives (void); -/* - * idetape_end_request is used to finish servicing a request, and to - * insert a pending pipeline request into the main device queue. - */ - -void idetape_end_request (byte uptodate, ide_hwgroup_t *hwgroup); - -/* - * Block device interface functions. - */ - -int idetape_blkdev_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg); -int idetape_blkdev_open (struct inode *inode, struct file *filp, ide_drive_t *drive); -void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t *drive); +void ide_timer_expiry (unsigned long data); +void ide_intr (int irq, void *dev_id, struct pt_regs *regs); +void ide_geninit (struct gendisk *gd); +void do_ide0_request (void); +#if MAX_HWIFS > 1 +void do_ide1_request (void); +#endif +#if MAX_HWIFS > 2 +void do_ide2_request (void); +#endif +#if MAX_HWIFS > 3 +void do_ide3_request (void); +#endif +void ide_init_subdrivers (void); -/* - * idetape_register_chrdev initializes the character device interface to - * the ide tape drive. - */ - -void idetape_register_chrdev (void); +#ifndef _IDE_C +extern struct file_operations ide_fops[]; +#endif +#ifdef CONFIG_BLK_DEV_IDECD +int ide_cdrom_init (void); +#endif /* CONFIG_BLK_DEV_IDECD */ +#ifdef CONFIG_BLK_DEV_IDETAPE +int idetape_init (void); #endif /* CONFIG_BLK_DEV_IDETAPE */ +#ifdef CONFIG_BLK_DEV_IDEFLOPPY +int idefloppy_init (void); +#endif /* CONFIG_BLK_DEV_IDEFLOPPY */ +#ifdef CONFIG_BLK_DEV_IDEDISK +int idedisk_init (void); +#endif /* CONFIG_BLK_DEV_IDEDISK */ + +int ide_register_module (ide_module_t *module); +void ide_unregister_module (ide_module_t *module); +ide_drive_t *ide_scan_devices (byte media, ide_driver_t *driver, int n); +int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version); +int ide_unregister_subdriver (ide_drive_t *drive); #ifdef CONFIG_BLK_DEV_TRITON void ide_init_triton (byte, byte); #endif /* CONFIG_BLK_DEV_TRITON */ + +#ifdef CONFIG_BLK_DEV_OPTI621 +void ide_init_opti621 (byte, byte); +#endif /* CONFIG_BLK_DEV_OPTI621 */ + +#ifdef CONFIG_BLK_DEV_IDE +int ideprobe_init (void); +#endif /* CONFIG_BLK_DEV_IDE */ + +#ifdef CONFIG_BLK_DEV_PROMISE +#include "promise.h" +#define IS_PROMISE_DRIVE (HWIF(drive)->chipset == ide_promise) +#else +#define IS_PROMISE_DRIVE (0) /* auto-NULLs out Promise code */ +#endif /* CONFIG_BLK_DEV_PROMISE */ diff -u --recursive --new-file v2.1.7/linux/drivers/block/opti621.c linux/drivers/block/opti621.c --- v2.1.7/linux/drivers/block/opti621.c Thu Jan 1 02:00:00 1970 +++ linux/drivers/block/opti621.c Wed Nov 6 14:49:33 1996 @@ -0,0 +1,337 @@ +/* + * linux/drivers/block/opti621.c Version 0.1 Oct 26, 1996 + * + * Copyright (C) 1996 Linus Torvalds & author (see below) + */ + +/* + * OPTi 82C621 chipset EIDE controller driver + * Author: Jaromir Koutek (E-mail: Jaromir.Koutek@st.mff.cuni.cz) + * + * Some parts of code are from ali14xx.c and from rz1000.c. + * I used docs from OPTi databook, from ftp.opti.com, file 9123-0002.ps + * and disassembled/traced setupvic.exe (DOS program). + * It increases kernel code about 2 kB. + * My card is Octek PIDE 1.01 (on card) or OPTiViC (program). + * It has a place for a secondary connector in circuit, but nothing + * is there. It cost about $25. Also BIOS says no address for + * secondary controller (see bellow in ide_init_opti621). + * I've only tested this on my system, which only has one disk. + * It's Western Digital WDAC2850, with PIO mode 3. The PCI bus + * is at 20 MHz (I have DX2/80, I tried PCI at 40, but I got random + * lockups). I tried the OCTEK double speed CD-ROM and + * it does not work! But I can't boot DOS also, so it's probably + * hardware fault. I have connected Conner 80MB, the Seagate 850MB (no + * problems) and Seagate 1GB (as slave, WD as master). My experiences + * with the third, 1GB drive: I got 3MB/s (hdparm), but sometimes + * it slows to about 100kB/s! I don't know why and I have + * not this drive now, so I can't try it again. + * If you have two disk, please boot in single mode and carefully + * (you can boot on read-only fs) try to set PIO mode 0 etc. + * The main problem with OPTi is that some timings for master + * and slave must be the same. For example, if you have master + * PIO 3 and slave PIO 0, driver have to set some timings of + * master for PIO 0. Second problem is that opti621_tune_drive + * got only one drive to set, but have to set both drives. + * This is solved in opti621_compute_pios. If you don't set + * the second drive, opti621_compute_pios use ide_get_best_pio_mode + * for autoselect mode (you can change it to PIO 0, if you want). + * If you then set the second drive to another PIO, the old value + * (automatically selected) will be overrided by yours. + * I don't know what there is a 25/33MHz switch in configuration + * register, driver is written for use at any frequency which get + * (use idebus=xx to select PCI bus speed). + * Use ide0=autotune for automatical tune of the PIO modes. + * If you get strange results, do not use this and set PIO manually + * by hdparm. + * I write this driver because I lost the paper ("manual") with + * settings of jumpers on the card and I have to boot Linux with + * Loadlin except LILO, cause I have to run the setupvic.exe program + * already or I get disk errors (my test: rpm -Vf + * /usr/X11R6/bin/XF86_SVGA - or any big file). + * Some numbers from hdparm -t /dev/hda: + * Timing buffer-cache reads: 32 MB in 3.02 seconds =10.60 MB/sec + * Timing buffered disk reads: 16 MB in 5.52 seconds = 2.90 MB/sec + * I have 4 Megs/s before, but I don't know why (maybe bad hdparm). + * If you tried this driver, please send me a E-mail of your experiences. + * My E-mail address is Jaromir.Koutek@st.mff.cuni.cz (I hope + * till 30. 6. 2000), otherwise you can try miri@atrey.karlin.mff.cuni.cz. + * I think OPTi is trademark of OPTi, Octek is trademark of Octek and so on. + */ + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ +#define OPTI621_DEBUG /* define for debug messages */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ide.h" +#include "ide_modes.h" +#include +#include + +#define OPTI621_MAX_PIO 3 +/* In fact, I do not have any PIO 4 drive + * (address: 25 ns, data: 70 ns, recovery: 35 ns), + * but OPTi 82C621 is programmable and it can do (minimal values): + * on 40MHz PCI bus (pulse 25 ns): + * address: 25 ns, data: 25 ns, recovery: 50 ns; + * on 20MHz PCI bus (pulse 50 ns): + * address: 50 ns, data: 50 ns, recovery: 100 ns. + */ + +/* #define READ_PREFETCH 0 */ +/* Uncommnent for disable read prefetch. + * There is some readprefetch capatibility in hdparm, + * but when I type hdparm -P 1 /dev/hda, I got errors + * and till reset drive is inacessible. + * This (hw) read prefetch is safe on my drive. + */ + +#ifndef READ_PREFETCH +#define READ_PREFETCH 0x40 /* read prefetch is enabled */ +#endif /* else read prefetch is disabled */ + +#define READ_REG 0 /* index of Read cycle timing register */ +#define WRITE_REG 1 /* index of Write cycle timing register */ +#define MISC_REG 6 /* index of Miscellaneous register */ +#define CNTRL_REG 3 /* index of Control register */ +int reg_base; +int opti621_primary_base, opti621_secondary_base; + +#define PIO_NOT_EXIST 254 +#define PIO_DONT_KNOW 255 +int opti621_drive_pio_modes[4]; +/* there are stored pio numbers from other calls of opti621_tune_drive */ + +void opti621_compute_pios(ide_hwif_t *drv, int second_contr, int slave_drive, byte pio) +/* Store values into opti621_drive_pio_modes: + * second_contr - 0 for primary controller, 1 for secondary + * slave_drive - 0 -> pio is for master, 1 -> pio is for slave + * pio - PIO mode for selected drive (for other we don't know) + */ +{ + ide_drive_t *p1, *p2, *drive; + int i; + + i = 2*second_contr; + p1 = &drv->drives[0]; + p2 = &drv->drives[1]; + drive = &drv->drives[slave_drive]; + pio = ide_get_best_pio_mode(drive, pio, OPTI621_MAX_PIO, NULL); + opti621_drive_pio_modes[i+slave_drive]=pio; + + if (p1->present) { + if (opti621_drive_pio_modes[i]==PIO_DONT_KNOW) + opti621_drive_pio_modes[i]=ide_get_best_pio_mode(p1, + 255, OPTI621_MAX_PIO, NULL); + /* we don't know the selected PIO mode, so we have to autoselect */ + } else + opti621_drive_pio_modes[i]=PIO_NOT_EXIST; + if (p2->present) { + if (opti621_drive_pio_modes[i+1]==PIO_DONT_KNOW) + opti621_drive_pio_modes[i+1]=ide_get_best_pio_mode(p2, + 255, OPTI621_MAX_PIO, NULL); + /* we don't know the selected PIO mode, so we have to autoselect */ + } else + opti621_drive_pio_modes[i+1]=PIO_NOT_EXIST; + /* in opti621_drive_pio_modes[i] and [i+1] are valid PIO modes (or PIO_NOT_EXIST, + if drive is not connected), we can continue */ +#ifdef OPTI621_DEBUG + printk("%s: (master): ", p1->name); + if (p1->present) + printk("PIO mode %d\n", opti621_drive_pio_modes[i]); + else + printk("not present\n"); + printk("%s: (slave): ", p2->name); + if (p2->present) + printk("PIO mode %d\n", opti621_drive_pio_modes[i+1]); + else + printk("not present\n"); +#endif +} + +int cmpt_clk(int time, int bus_speed) +/* Returns (rounded up) time in clocks for time in ns, + * with bus_speed in MHz. + * Example: bus_speed = 40 MHz, time = 80 ns + * 1000/40 = 25 ns (clk value), + * 80/25 = 3.2, rounded up to 4 (I hope ;-)). + * Use idebus=xx to select right frequency. + */ +{ + return ((time*bus_speed+999)/1000); +} + +void write_reg(byte value, int reg) +/* Write value to register reg, base of register + * is at reg_base (0x1f0 primary, 0x170 secondary, + * if not changed by PCI configuration). + * This is from setupvic.exe program. + */ +{ + inw(reg_base+1); + inw(reg_base+1); + outb(3, reg_base+2); + outb(value, reg_base+reg); + outb(0x83, reg_base+2); +} + +byte read_reg(int reg) +/* Read value from register reg, base of register + * is at reg_base (0x1f0 primary, 0x170 secondary, + * if not changed by PCI configuration). + * This is from setupvic.exe program. + */ +{ + byte ret; + inw(reg_base+1); + inw(reg_base+1); + outb(3, reg_base+2); + ret=inb(reg_base+reg); + outb(0x83, reg_base+2); + return ret; +} + +typedef struct pio_clocks_s { + int address_time; /* Address setup (clocks) */ + int data_time; /* Active/data pulse (clocks) */ + int recovery_time; /* Recovery time (clocks) */ +} pio_clocks_t; + +void compute_clocks(int pio, pio_clocks_t *clks) +{ + if (pio!=PIO_NOT_EXIST) { + int adr_setup, data_pls, bus_speed; + bus_speed = ide_system_bus_speed(); + adr_setup = ide_pio_timings[pio].setup_time; + data_pls = ide_pio_timings[pio].active_time; + clks->address_time = cmpt_clk(adr_setup, bus_speed); + clks->data_time = cmpt_clk(data_pls, bus_speed); + clks->recovery_time = cmpt_clk(ide_pio_timings[pio].cycle_time + -adr_setup-data_pls, bus_speed); + if (clks->address_time<1) clks->address_time = 1; + if (clks->address_time>4) clks->address_time = 4; + if (clks->data_time<1) clks->data_time = 1; + if (clks->data_time>16) clks->data_time = 16; + if (clks->recovery_time<2) clks->recovery_time = 2; + if (clks->recovery_time>17) clks->recovery_time = 17; + } else { + clks->address_time = 1; + clks->data_time = 1; + clks->recovery_time = 2; + /* minimal values */ + } +} + +static void opti621_tune_drive (ide_drive_t *drive, byte pio) +/* Main tune procedure, hooked by tuneproc. */ +{ + /* primary and secondary drives share some (but not same) registers, + so we have to program both drives */ + unsigned long flags; + byte pio1, pio2; + int second_contr, slave_drive; + pio_clocks_t first, second; + int ax, drdy; + byte cycle1, cycle2, misc; + + second_contr=HWIF(drive)->index; + if ((second_contr!=0) && (second_contr!=1)) + return; /* invalid controller number */ + if (((second_contr==0) && (opti621_primary_base==0)) || + ((second_contr==1) && (opti621_secondary_base==0))) + return; /* controller is unaccessible/not exist */ + slave_drive = drive->select.b.unit; + /* set opti621_drive_pio_modes[] */ + opti621_compute_pios(HWIF(drive), second_contr, slave_drive, pio); + + reg_base = second_contr ? opti621_primary_base : opti621_secondary_base; + + pio1 = opti621_drive_pio_modes[second_contr*2]; + pio2 = opti621_drive_pio_modes[second_contr*2+1]; + + compute_clocks(pio1, &first); + compute_clocks(pio2, &second); + + ax = (first.address_timename, ax, first.data_time, first.recovery_time, drdy); + printk("%s: slave: address: %d, data: %d, recovery: %d, drdy: %d [clk]\n", + HWIF(drive)->name, ax, second.data_time, second.recovery_time, drdy); +#endif + + save_flags(flags); + cli(); + + outb(0xc0, reg_base+CNTRL_REG); /* allow Register-B */ + outb(0xff, reg_base+5); /* hmm, setupvic.exe does this ;-) */ + inb(reg_base+CNTRL_REG); /* if reads 0xff, adapter not exist? */ + read_reg(CNTRL_REG); /* if reads 0xc0, no interface exist? */ + read_reg(5); /* read version, probably 0 */ + + /* programming primary drive - 0 or 2 */ + write_reg(0, MISC_REG); /* select Index-0 for Register-A */ + write_reg(cycle1, READ_REG); /* set read cycle timings */ + write_reg(cycle1, WRITE_REG); /* set write cycle timings */ + + /* programming secondary drive - 1 or 3 */ + write_reg(1, MISC_REG); /* select Index-1 for Register-B */ + write_reg(cycle2, READ_REG); /* set read cycle timings */ + write_reg(cycle2, WRITE_REG); /* set write cycle timings */ + + write_reg(0x85, CNTRL_REG); /* use Register-A for drive 0 (or 2) and + Register-B for drive 1 (or 3) */ + + write_reg(misc, MISC_REG); /* set address setup, DRDY timings + and read prefetch for both drives */ + + restore_flags(flags); +} + +void ide_init_opti621 (byte bus, byte fn) +/* Init controller. Called on kernel boot. */ +{ + int rc, i; + unsigned char sreg; + unsigned short reg; + unsigned int dreg; + unsigned char revision; + for (i=0; i<4; i++) + opti621_drive_pio_modes[i] = PIO_DONT_KNOW; + printk("ide: OPTi 82C621 on PCI bus %d function %d\n", bus, fn); + if ((rc = pcibios_read_config_byte (bus, fn, 0x08, &sreg))) + goto quit; + revision = sreg; + if ((rc = pcibios_read_config_dword (bus, fn, 0x10, &dreg))) + goto quit; + opti621_primary_base = ((dreg==0) || (dreg>0xffff)) ? 0 : dreg-1; + if ((rc = pcibios_read_config_dword (bus, fn, 0x18, &dreg))) + goto quit; + opti621_secondary_base = ((dreg==0) || (dreg>0xffff)) ? 0 : dreg-1; + printk("ide: revision %d, primary: 0x%04x, secondary: 0x%04x\n", + revision, opti621_primary_base, opti621_secondary_base); + if ((rc = pcibios_read_config_word (bus, fn, PCI_COMMAND, ®))) + goto quit; + if (!(reg & 1)) { + printk("ide: ports are not enabled (BIOS)\n"); + } else { + ide_hwifs[0].tuneproc = &opti621_tune_drive; + ide_hwifs[1].tuneproc = &opti621_tune_drive; + } + quit: if (rc) printk("ide: pcibios access failed - %s\n", pcibios_strerror(rc)); +} diff -u --recursive --new-file v2.1.7/linux/drivers/block/promise.c linux/drivers/block/promise.c --- v2.1.7/linux/drivers/block/promise.c Sat Apr 20 11:28:29 1996 +++ linux/drivers/block/promise.c Wed Nov 6 14:49:33 1996 @@ -172,14 +172,16 @@ hwif->selectproc = second_hwif->selectproc = &promise_selectproc; /* Shift the remaining interfaces down by one */ for (i=MAX_HWIFS-1 ; i > hwif->index+1 ; i--) { + ide_hwif_t *h = &ide_hwifs[i]; + printk("Shifting i/f %d values to i/f %d\n",i-1,i); - ide_hwifs[i].io_base = ide_hwifs[i-1].io_base; - ide_hwifs[i].ctl_port = ide_hwifs[i-1].ctl_port; - ide_hwifs[i].noprobe = ide_hwifs[i-1].noprobe; + ide_init_hwif_ports(h->io_ports, (h-1)->io_ports[IDE_DATA_OFFSET], NULL); + h->io_ports[IDE_CONTROL_OFFSET] = (h-1)->io_ports[IDE_CONTROL_OFFSET]; + h->noprobe = (h-1)->noprobe; } second_hwif->is_promise2 = 1; - second_hwif->io_base = hwif->io_base; - second_hwif->ctl_port = hwif->ctl_port; + ide_init_hwif_ports(second_hwif->io_ports, hwif->io_ports[IDE_DATA_OFFSET], NULL); + second_hwif->io_ports[IDE_CONTROL_OFFSET] = hwif->io_ports[IDE_CONTROL_OFFSET]; second_hwif->irq = hwif->irq; for (i=0; i<2 ; i++) { hwif->drives[i].io_32bit = 3; @@ -309,12 +311,11 @@ void do_promise_io (ide_drive_t *drive, struct request *rq) { unsigned long timeout; - unsigned short io_base = HWIF(drive)->io_base; byte stat; if (rq->cmd == READ) { ide_set_handler(drive, &promise_read_intr, WAIT_CMD); - OUT_BYTE(PROMISE_READ, io_base+IDE_COMMAND_OFFSET); + OUT_BYTE(PROMISE_READ, IDE_COMMAND_REG); /* The card's behaviour is odd at this point. If the data is available, DRQ will be true, and no interrupt will be generated by the card. If this is the case, we need to simulate @@ -336,7 +337,7 @@ */ return; } - if(IN_BYTE(io_base+IDE_SELECT_OFFSET) & 0x01) + if(IN_BYTE(IDE_SELECT_REG) & 0x01) return; udelay(1); } while (jiffies < timeout); @@ -345,7 +346,7 @@ return; } if (rq->cmd == WRITE) { - OUT_BYTE(PROMISE_WRITE, io_base+IDE_COMMAND_OFFSET); + OUT_BYTE(PROMISE_WRITE, IDE_COMMAND_REG); if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { printk("%s: no DRQ after issuing PROMISE_WRITE\n", drive->name); return; diff -u --recursive --new-file v2.1.7/linux/drivers/block/triton.c linux/drivers/block/triton.c --- v2.1.7/linux/drivers/block/triton.c Sun Sep 22 09:53:44 1996 +++ linux/drivers/block/triton.c Wed Nov 6 14:49:33 1996 @@ -12,11 +12,6 @@ * Pretty much the same code will work for the OPTi "Viper" chipset. * Look for DMA support for this in linux kernel 2.1.xx, when it appears. * - * DMA is currently supported only for hard disk drives (not cdroms). - * - * Support for cdroms will likely be added at a later date, - * after broader experience has been obtained with hard disks. - * * Up to four drives may be enabled for DMA, and the Triton chipset will * (hopefully) arbitrate the PCI bus among them. Note that the i82371 chip * provides a single "line buffer" for the BM IDE function, so performance of @@ -102,7 +97,6 @@ * * And, yes, Intel Zappa boards really *do* use the Triton IDE ports. */ -#include #include #include #include @@ -198,13 +192,8 @@ * is always composed of two adjacent physical 4kB pages rather * than two possibly non-adjacent physical 4kB pages. */ - if (bh == NULL) { /* paging and tape requests have (rq->bh == NULL) */ + if (bh == NULL) { /* paging requests have (rq->bh == NULL) */ addr = virt_to_bus (rq->buffer); -#ifdef CONFIG_BLK_DEV_IDETAPE - if (drive->media == ide_tape) - size = drive->tape.pc->request_transfer; - else -#endif /* CONFIG_BLK_DEV_IDETAPE */ size = rq->nr_sectors << 9; } else { /* group sequential buffers into one large buffer */ @@ -317,10 +306,8 @@ outl(virt_to_bus (HWIF(drive)->dmatable), dma_base + 4); /* PRD table */ outb(reading, dma_base); /* specify r/w */ outb(inb(dma_base+2)|0x06, dma_base+2); /* clear status bits */ -#ifdef CONFIG_BLK_DEV_IDEATAPI if (drive->media != ide_disk) return 0; -#endif /* CONFIG_BLK_DEV_IDEATAPI */ ide_set_handler(drive, &dma_intr, WAIT_CMD); /* issue cmd to drive */ OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG); outb(inb(dma_base)|1, dma_base); /* begin DMA */ @@ -449,14 +436,14 @@ #endif /* DISPLAY_TRITON_TIMINGS */ ide_hwif_t *hwif = &ide_hwifs[h]; unsigned short time; - if (hwif->io_base == 0x1f0) { + if (hwif->io_ports[IDE_DATA_OFFSET] == 0x1f0) { time = timings & 0xffff; if ((time & 0x8000) == 0) /* interface enabled? */ continue; hwif->chipset = ide_triton; if (dma_enabled) init_triton_dma(hwif, bmiba); - } else if (hwif->io_base == 0x170) { + } else if (hwif->io_ports[IDE_DATA_OFFSET] == 0x170) { time = timings >> 16; if ((time & 0x8000) == 0) /* interface enabled? */ continue; @@ -475,7 +462,7 @@ { byte stime; if (pcibios_read_config_byte(bus, fn, 0x44, &stime)) { - if (hwif->io_base == 0x1f0) { + if (hwif->io_ports[IDE_DATA_OFFSET] == 0x1f0) { s_clks = ~stime >> 6; r_clks = ~stime >> 4; } else { diff -u --recursive --new-file v2.1.7/linux/drivers/cdrom/mcdx.c linux/drivers/cdrom/mcdx.c --- v2.1.7/linux/drivers/cdrom/mcdx.c Wed Oct 16 10:48:10 1996 +++ linux/drivers/cdrom/mcdx.c Sat Nov 2 13:45:19 1996 @@ -64,6 +64,7 @@ #include #include #include +#include #include #define MAJOR_NR MITSUMI_X_CDROM_MAJOR diff -u --recursive --new-file v2.1.7/linux/drivers/char/ChangeLog linux/drivers/char/ChangeLog --- v2.1.7/linux/drivers/char/ChangeLog Thu Jun 6 22:23:08 1996 +++ linux/drivers/char/ChangeLog Sat Nov 9 11:17:31 1996 @@ -1,6 +1,41 @@ +Fri Nov 8 20:19:50 1996 Theodore Ts'o + + * n_tty.c (n_tty_flush_buffer): Only call driver->unthrottle() if + the tty was previous throttled. + (n_tty_set_termios, write_chan): Add changes suggested by + Simon P. Allen to allow hardware cooking. + + * tty_ioctl.c (set_termios): If we get a signal while waiting for + the tty to drain, return -EINTR. + + * serial.c (change_speed): Add support for CREAD, as required by + POSIX. + +Sat Nov 2 20:43:10 1996 Theodore Ts'o + + * serial.c: Wholesale changes. Added support for the Startech + 16650 and 16650V2 chips. (WARNING: the new startech + 16650A may or may not work!) Added support for the + TI16750 (not yet tested). Split async_struct into a + transient part (async_struct) and a permanent part + (serial_state) which contains the configuration + information for the ports. Added new driver routines + wait_until_sent() and send_xchar() to help with POSIX + compliance. Added support for radio clocks which waggle + the carrier detect line (CONFIG_HARD_PPS). + + * tty_ioctl.c (tty_wait_until_sent): Added call to new driver + function tty->driver.wait_until_sent(), which returns when + the tty's device xmit buffers are drained. Needed for + full POSIX compliance. + + (send_prio_char): New function, called by the ioctl's + TCIOFF and TCION; uses the new driver call send_xchar(), + which will send the XON or XOFF character at high priority + (and even if tty output is stopped). + Wed Jun 5 18:52:04 1996 Theodore Ts'o - * tty_io.c (do_tty_hangup): * pty.c (pty_close): When closing a pty, make sure packet mode is cleared. diff -u --recursive --new-file v2.1.7/linux/drivers/char/console.c linux/drivers/char/console.c --- v2.1.7/linux/drivers/char/console.c Tue Oct 29 19:58:05 1996 +++ linux/drivers/char/console.c Thu Nov 7 11:25:56 1996 @@ -1459,7 +1459,8 @@ ok = tc && (c >= 32 || (!utf && !(((disp_ctrl ? CTRL_ALWAYS : CTRL_ACTION) >> c) & 1))) - && (c != 127 || disp_ctrl); + && (c != 127 || disp_ctrl) + && (c != 128+27); if (vc_state == ESnormal && ok) { /* Now try to find out how to display it */ @@ -1499,6 +1500,8 @@ * of an escape sequence. */ switch (c) { + case 0: + continue; case 7: if (bell_duration) kd_mksound(bell_pitch, bell_duration); diff -u --recursive --new-file v2.1.7/linux/drivers/char/keyb_m68k.c linux/drivers/char/keyb_m68k.c --- v2.1.7/linux/drivers/char/keyb_m68k.c Fri Apr 5 17:16:48 1996 +++ linux/drivers/char/keyb_m68k.c Thu Nov 7 11:25:56 1996 @@ -508,8 +508,9 @@ #define A_CFLEX '^' #define A_TILDE '~' #define A_DIAER '"' -static unsigned char ret_diacr[] = - {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER }; +#define A_CEDIL ',' +static unsigned char ret_diacr[NR_DEAD] = + {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER, A_CEDIL }; /* If a dead key pressed twice, output a character corresponding to it, */ /* otherwise just remember the dead key. */ diff -u --recursive --new-file v2.1.7/linux/drivers/char/keyboard.c linux/drivers/char/keyboard.c --- v2.1.7/linux/drivers/char/keyboard.c Wed Oct 9 08:55:19 1996 +++ linux/drivers/char/keyboard.c Thu Nov 7 11:25:56 1996 @@ -822,8 +822,9 @@ #define A_CFLEX '^' #define A_TILDE '~' #define A_DIAER '"' -static unsigned char ret_diacr[] = - {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER }; +#define A_CEDIL ',' +static unsigned char ret_diacr[NR_DEAD] = + {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER, A_CEDIL }; /* If a dead key pressed twice, output a character corresponding to it, */ /* otherwise just remember the dead key. */ diff -u --recursive --new-file v2.1.7/linux/drivers/char/misc.c linux/drivers/char/misc.c --- v2.1.7/linux/drivers/char/misc.c Fri Nov 1 17:13:16 1996 +++ linux/drivers/char/misc.c Wed Nov 6 14:41:17 1996 @@ -67,6 +67,7 @@ extern int atixl_busmouse_init(void); extern int sun_mouse_init(void); extern void watchdog_init(void); +extern void wdt_init(void); extern void pcwatchdog_init(void); extern int rtc_init(void); @@ -222,7 +223,10 @@ #endif #ifdef CONFIG_SOFT_WATCHDOG watchdog_init(); -#endif +#endif +#ifdef CONFIG_WDT + wdt_init(); +#endif #ifdef CONFIG_PCWATCHDOG pcwatchdog_init(); #endif diff -u --recursive --new-file v2.1.7/linux/drivers/char/n_tty.c linux/drivers/char/n_tty.c --- v2.1.7/linux/drivers/char/n_tty.c Tue Oct 29 19:58:06 1996 +++ linux/drivers/char/n_tty.c Sat Nov 9 11:17:31 1996 @@ -76,8 +76,9 @@ if (!tty->link) return; - if (tty->driver.unthrottle) - (tty->driver.unthrottle)(tty); + if (tty->driver.unthrottle && + clear_bit(TTY_THROTTLED, &tty->flags)) + tty->driver.unthrottle(tty); if (tty->link->packet) { tty->ctrl_status |= TIOCPKT_FLUSHREAD; wake_up_interruptible(&tty->link->read_wait); @@ -629,6 +630,11 @@ return; tty->icanon = (L_ICANON(tty) != 0); + if (tty->flags & (1<raw = 1; + tty->real_raw = 1; + return; + } if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) || I_ICRNL(tty) || I_INLCR(tty) || L_ICANON(tty) || I_IXON(tty) || L_ISIG(tty) || L_ECHO(tty) || @@ -948,7 +954,7 @@ retval = -EIO; break; } - if (O_OPOST(tty)) { + if (O_OPOST(tty) && !(tty->flags & (1< 0) { get_user(c, b); if (opost(c, tty) < 0) diff -u --recursive --new-file v2.1.7/linux/drivers/char/serial.c linux/drivers/char/serial.c --- v2.1.7/linux/drivers/char/serial.c Tue Oct 29 19:58:07 1996 +++ linux/drivers/char/serial.c Sat Nov 9 11:17:31 1996 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ #include static char *serial_name = "Serial driver"; -static char *serial_version = "4.13"; +static char *serial_version = "4.20"; DECLARE_TASK_QUEUE(tq_serial); @@ -109,8 +110,27 @@ static volatile int rs_triggered; static int rs_wild_int_mask; -static void autoconfig(struct async_struct * info); +static void autoconfig(struct serial_state * info); static void change_speed(struct async_struct *info); +static void rs_wait_until_sent(struct tty_struct *tty, int timeout); + +/* + * Here we define the default xmit fifo size used for each type of + * UART + */ +static struct serial_uart_config uart_config[] = { + { "unknown", 1, 0 }, + { "8250", 1, 0 }, + { "16450", 1, 0 }, + { "16550", 1, 0 }, + { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO }, + { "cirrus", 1, 0 }, + { "ST16650", 1, UART_CLEAR_FIFO |UART_STARTECH }, + { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO | + UART_STARTECH }, + { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO}, + { 0, 0} +}; /* * This assumes you have a 1.8432 MHz clock for your UART. @@ -146,7 +166,7 @@ #define C_P(card,port) (((card)<<6|(port)<<3) + 1) -struct async_struct rs_table[] = { +struct serial_state rs_table[] = { /* UART CLK PORT IRQ FLAGS */ { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ @@ -204,7 +224,7 @@ #endif }; -#define NR_PORTS (sizeof(rs_table)/sizeof(struct async_struct)) +#define NR_PORTS (sizeof(rs_table)/sizeof(struct serial_state)) static struct tty_struct *serial_table[NR_PORTS]; static struct termios *serial_termios[NR_PORTS]; @@ -252,7 +272,7 @@ */ static int baud_table[] = { 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, - 9600, 19200, 38400, 57600, 115200, 0 }; + 9600, 19200, 38400, 57600, 115200, 230400, 460800, 0 }; static inline unsigned int serial_in(struct async_struct *info, int offset) { @@ -400,11 +420,14 @@ do { ch = serial_inp(info, UART_RX); + if (*status & UART_LSR_BI) + *status &= ~(UART_LSR_FE | UART_LSR_PE); if (*status & info->ignore_status_mask) { if (++ignored > 100) break; goto ignore_char; } + *status &= info->read_status_mask; if (tty->flip.count >= TTY_FLIPBUF_SIZE) break; tty->flip.count++; @@ -425,7 +448,7 @@ *tty->flip.flag_buf_ptr++ = 0; *tty->flip.char_buf_ptr++ = ch; ignore_char: - *status = serial_inp(info, UART_LSR) & info->read_status_mask; + *status = serial_inp(info, UART_LSR); } while (*status & UART_LSR_DR); queue_task_irq_off(&tty->flip.tqueue, &tq_timer); #ifdef SERIAL_DEBUG_INTR @@ -477,19 +500,27 @@ static _INLINE_ void check_modem_status(struct async_struct *info) { int status; + struct async_icount *icount; status = serial_in(info, UART_MSR); if (status & UART_MSR_ANY_DELTA) { + icount = &info->state->icount; /* update input line counters */ if (status & UART_MSR_TERI) - info->icount.rng++; + icount->rng++; if (status & UART_MSR_DDSR) - info->icount.dsr++; - if (status & UART_MSR_DDCD) - info->icount.dcd++; + icount->dsr++; + if (status & UART_MSR_DDCD) { + icount->dcd++; +#ifdef CONFIG_HARD_PPS + if ((info->flags & ASYNC_HARDPPS_CD) && + (status & UART_MSR_DCD)) + hardpps(); +#endif + } if (status & UART_MSR_DCTS) - info->icount.cts++; + icount->cts++; wake_up_interruptible(&info->delta_msr_wait); } @@ -569,7 +600,7 @@ info->last_active = jiffies; - status = serial_inp(info, UART_LSR) & info->read_status_mask; + status = serial_inp(info, UART_LSR); #ifdef SERIAL_DEBUG_INTR printk("status = %x...", status); #endif @@ -594,7 +625,8 @@ } while (end_mark != info); if (multi->port_monitor) printk("rs port monitor (normal) irq %d: 0x%x, 0x%x\n", - info->irq, first_multi, inb(multi->port_monitor)); + info->state->irq, first_multi, + inb(multi->port_monitor)); #ifdef SERIAL_DEBUG_INTR printk("end.\n"); #endif @@ -624,7 +656,7 @@ first_multi = inb(multi->port_monitor); do { - status = serial_inp(info, UART_LSR) & info->read_status_mask; + status = serial_inp(info, UART_LSR); #ifdef SERIAL_DEBUG_INTR printk("status = %x...", status); #endif @@ -643,7 +675,8 @@ info->last_active = jiffies; if (multi->port_monitor) printk("rs port monitor (single) irq %d: 0x%x, 0x%x\n", - info->irq, first_multi, inb(multi->port_monitor)); + info->state->irq, first_multi, + inb(multi->port_monitor)); #ifdef SERIAL_DEBUG_INTR printk("end.\n"); #endif @@ -683,7 +716,7 @@ info->last_active = jiffies; - status = serial_inp(info, UART_LSR) & info->read_status_mask; + status = serial_inp(info, UART_LSR); #ifdef SERIAL_DEBUG_INTR printk("status = %x...", status); #endif @@ -707,7 +740,7 @@ } if (multi->port_monitor) printk("rs port monitor irq %d: 0x%x, 0x%x\n", - info->irq, first_multi, + info->state->irq, first_multi, inb(multi->port_monitor)); if ((inb(multi->port1) & multi->mask1) != multi->match1) continue; @@ -910,13 +943,13 @@ unsigned long flags; int retval; void (*handler)(int, void *, struct pt_regs *); + struct serial_state *state= info->state; unsigned long page; page = get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; - save_flags(flags); cli(); if (info->flags & ASYNC_INITIALIZED) { @@ -925,7 +958,7 @@ return 0; } - if (!info->port || !info->type) { + if (!state->port || !state->type) { if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); free_page(page); @@ -938,23 +971,30 @@ info->xmit_buf = (unsigned char *) page; #ifdef SERIAL_DEBUG_OPEN - printk("starting up ttys%d (irq %d)...", info->line, info->irq); + printk("starting up ttys%d (irq %d)...", info->line, state->irq); #endif + if (uart_config[info->state->type].flags & UART_STARTECH) { + /* Wake up UART */ + serial_outp(info, UART_LCR, 0xBF); + serial_outp(info, UART_EFR, UART_EFR_ECB); + serial_outp(info, UART_IER, 0); + serial_outp(info, UART_EFR, 0); + serial_outp(info, UART_LCR, 0); + } + + if (info->state->type == PORT_16750) { + /* Wake up UART */ + serial_outp(info, UART_IER, 0); + } + /* * Clear the FIFO buffers and disable them * (they will be reenabled in change_speed()) */ - if (info->type == PORT_16650) { - serial_outp(info, UART_FCR, (UART_FCR_CLEAR_RCVR | - UART_FCR_CLEAR_XMIT)); - info->xmit_fifo_size = 1; /* disabled for now */ - } else if (info->type == PORT_16550A) { + if (uart_config[state->type].flags & UART_CLEAR_FIFO) serial_outp(info, UART_FCR, (UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)); - info->xmit_fifo_size = 16; - } else - info->xmit_fifo_size = 1; /* * At this point there's no way the LSR could still be 0xFF; @@ -974,18 +1014,18 @@ /* * Allocate the IRQ if necessary */ - if (info->irq && (!IRQ_ports[info->irq] || - !IRQ_ports[info->irq]->next_port)) { - if (IRQ_ports[info->irq]) { - free_irq(info->irq, NULL); - if (rs_multiport[info->irq].port1) + if (state->irq && (!IRQ_ports[state->irq] || + !IRQ_ports[state->irq]->next_port)) { + if (IRQ_ports[state->irq]) { + free_irq(state->irq, NULL); + if (rs_multiport[state->irq].port1) handler = rs_interrupt_multi; else handler = rs_interrupt; } else handler = rs_interrupt_single; - retval = request_irq(info->irq, handler, IRQ_T(info), + retval = request_irq(state->irq, handler, IRQ_T(info), "serial", NULL); if (retval) { restore_flags(flags); @@ -1022,7 +1062,7 @@ info->MCR |= UART_MCR_OUT1 | UART_MCR_OUT2; info->MCR_noint |= UART_MCR_OUT1 | UART_MCR_OUT2; #endif - if (info->irq == 0) + if (state->irq == 0) info->MCR = info->MCR_noint; serial_outp(info, UART_MCR, info->MCR); @@ -1055,11 +1095,11 @@ * Insert serial port into IRQ chain. */ info->prev_port = 0; - info->next_port = IRQ_ports[info->irq]; + info->next_port = IRQ_ports[state->irq]; if (info->next_port) info->next_port->prev_port = info; - IRQ_ports[info->irq] = info; - figure_IRQ_timeout(info->irq); + IRQ_ports[state->irq] = info; + figure_IRQ_timeout(state->irq); /* * Set up serial timers... @@ -1084,14 +1124,17 @@ static void shutdown(struct async_struct * info) { unsigned long flags; + struct serial_state *state; int retval; if (!(info->flags & ASYNC_INITIALIZED)) return; + state = info->state; + #ifdef SERIAL_DEBUG_OPEN printk("Shutting down serial port %d (irq %d)....", info->line, - info->irq); + state->irq); #endif save_flags(flags); cli(); /* Disable interrupts */ @@ -1110,24 +1153,24 @@ if (info->prev_port) info->prev_port->next_port = info->next_port; else - IRQ_ports[info->irq] = info->next_port; - figure_IRQ_timeout(info->irq); + IRQ_ports[state->irq] = info->next_port; + figure_IRQ_timeout(state->irq); /* * Free the IRQ, if necessary */ - if (info->irq && (!IRQ_ports[info->irq] || - !IRQ_ports[info->irq]->next_port)) { - if (IRQ_ports[info->irq]) { - free_irq(info->irq, NULL); - retval = request_irq(info->irq, rs_interrupt_single, + if (state->irq && (!IRQ_ports[state->irq] || + !IRQ_ports[state->irq]->next_port)) { + if (IRQ_ports[state->irq]) { + free_irq(state->irq, NULL); + retval = request_irq(state->irq, rs_interrupt_single, IRQ_T(info), "serial", NULL); if (retval) printk("serial shutdown: request_irq: error %d" " Couldn't reacquire IRQ.\n", retval); } else - free_irq(info->irq, NULL); + free_irq(state->irq, NULL); } if (info->xmit_buf) { @@ -1155,7 +1198,18 @@ if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); - + + if (uart_config[info->state->type].flags & UART_STARTECH) { + /* Arrange to enter sleep mode */ + serial_outp(info, UART_LCR, 0xBF); + serial_outp(info, UART_EFR, UART_EFR_ECB); + serial_outp(info, UART_IER, UART_IERX_SLEEP); + serial_outp(info, UART_LCR, 0); + } + if (info->state->type == PORT_16750) { + /* Arrange to enter sleep mode */ + serial_outp(info, UART_IER, UART_IERX_SLEEP); + } info->flags &= ~ASYNC_INITIALIZED; restore_flags(flags); } @@ -1167,8 +1221,8 @@ static void change_speed(struct async_struct *info) { unsigned short port; - int quot = 0; - unsigned cflag,cval,fcr; + int quot = 0, baud_base; + unsigned cflag, cval, fcr = 0; int i; if (!info->tty || !info->tty->termios) @@ -1179,7 +1233,7 @@ i = cflag & CBAUD; if (i & CBAUDEX) { i &= ~CBAUDEX; - if (i < 1 || i > 2) + if (i < 1 || i > 4) info->tty->termios->c_cflag &= ~CBAUDEX; else i += 15; @@ -1189,29 +1243,28 @@ i += 1; if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI) i += 2; + if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI) + i += 3; + if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP) + i += 4; if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST) - quot = info->custom_divisor; + quot = info->state->custom_divisor; } + baud_base = info->state->baud_base; if (quot) { info->timeout = ((info->xmit_fifo_size*HZ*15*quot) / - info->baud_base) + 2; + baud_base) + 2; } else if (baud_table[i] == 134) { - quot = (2*info->baud_base / 269); + quot = (2*baud_base / 269); info->timeout = (info->xmit_fifo_size*HZ*30/269) + 2; } else if (baud_table[i]) { - quot = info->baud_base / baud_table[i]; + quot = baud_base / baud_table[i]; info->timeout = (info->xmit_fifo_size*HZ*15/baud_table[i]) + 2; } else { quot = 0; info->timeout = 0; } - if (quot) { - info->MCR |= UART_MCR_DTR; - info->MCR_noint |= UART_MCR_DTR; - cli(); - serial_out(info, UART_MCR, info->MCR); - sti(); - } else { + if (!quot) { info->MCR &= ~UART_MCR_DTR; info->MCR_noint &= ~UART_MCR_DTR; cli(); @@ -1234,28 +1287,19 @@ cval |= UART_LCR_PARITY; if (!(cflag & PARODD)) cval |= UART_LCR_EPAR; - if (info->type == PORT_16550A) { - if ((info->baud_base / quot) < 2400) + if (uart_config[info->state->type].flags & UART_USE_FIFO) { + if ((info->state->baud_base / quot) < 2400) fcr = UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1; else fcr = UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_8; - } else if (info->type == PORT_16650) { - /* - * On the 16650, we disable the FIFOs altogether - * because of a design bug in how the implement - * things. We could support it by completely changing - * how we handle the interrupt driver, but not today.... - * - * N.B. Because there's no way to set a FIFO trigger - * at 1 char, we'd probably disable at speed below - * 2400 baud anyway... - */ - fcr = 0; - } else - fcr = 0; + } + if (info->state->type == PORT_16750) + fcr |= UART_FCR7_64BYTE; /* CTS flow control flag and modem status interrupts */ info->IER &= ~UART_IER_MSI; + if (info->flags & ASYNC_HARDPPS_CD) + info->IER |= UART_IER_MSI; if (cflag & CRTSCTS) { info->flags |= ASYNC_CTS_FLOW; info->IER |= UART_IER_MSI; @@ -1280,32 +1324,37 @@ if (I_BRKINT(info->tty) || I_PARMRK(info->tty)) info->read_status_mask |= UART_LSR_BI; + /* + * Characters to ignore + */ info->ignore_status_mask = 0; -#if 0 - /* This should be safe, but for some broken bits of hardware... */ - if (I_IGNPAR(info->tty)) { + if (I_IGNPAR(info->tty)) info->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE; - info->read_status_mask |= UART_LSR_PE | UART_LSR_FE; - } -#endif if (I_IGNBRK(info->tty)) { info->ignore_status_mask |= UART_LSR_BI; - info->read_status_mask |= UART_LSR_BI; /* * If we're ignore parity and break indicators, ignore * overruns too. (For real raw support). */ - if (I_IGNPAR(info->tty)) { - info->ignore_status_mask |= UART_LSR_OE | \ - UART_LSR_PE | UART_LSR_FE; - info->read_status_mask |= UART_LSR_OE | \ - UART_LSR_PE | UART_LSR_FE; - } + if (I_IGNPAR(info->tty)) + info->ignore_status_mask |= UART_LSR_OE; } + /* + * !!! ignore all characters if CREAD is not set + */ + if ((cflag & CREAD) == 0) + info->ignore_status_mask |= UART_LSR_DR; cli(); + if (uart_config[info->state->type].flags & UART_STARTECH) { + serial_outp(info, UART_LCR, 0xBF); + serial_outp(info, UART_EFR, + (cflag & CRTSCTS) ? UART_EFR_CTS : 0); + } serial_outp(info, UART_LCR, cval | UART_LCR_DLAB); /* set DLAB */ serial_outp(info, UART_DLL, quot & 0xff); /* LS of divisor */ serial_outp(info, UART_DLM, quot >> 8); /* MS of divisor */ + if (info->state->type == PORT_16750) + serial_outp(info, UART_FCR, fcr); /* set fcr */ serial_outp(info, UART_LCR, cval); /* reset DLAB */ serial_outp(info, UART_FCR, fcr); /* set fcr */ sti(); @@ -1438,6 +1487,25 @@ } /* + * This function is used to send a high-priority XON/XOFF character to + * the device + */ +void rs_send_xchar(struct tty_struct *tty, char ch) +{ + struct async_struct *info = (struct async_struct *)tty->driver_data; + + if (serial_paranoia_check(info, tty->device, "rs_send_char")) + return; + + info->x_char = ch; + if (ch) { + /* Make sure transmit interrupts are on */ + info->IER |= UART_IER_THRI; + serial_out(info, UART_IER, info->IER); + } +} + +/* * ------------------------------------------------------------ * rs_throttle() * @@ -1459,10 +1527,12 @@ return; if (I_IXOFF(tty)) - info->x_char = STOP_CHAR(tty); + rs_send_xchar(tty, STOP_CHAR(tty)); - info->MCR &= ~UART_MCR_RTS; - info->MCR_noint &= ~UART_MCR_RTS; + if (tty->termios->c_cflag & CRTSCTS) { + info->MCR &= ~UART_MCR_RTS; + info->MCR_noint &= ~UART_MCR_RTS; + } cli(); serial_out(info, UART_MCR, info->MCR); sti(); @@ -1485,10 +1555,12 @@ if (info->x_char) info->x_char = 0; else - info->x_char = START_CHAR(tty); + rs_send_xchar(tty, START_CHAR(tty)); + } + if (tty->termios->c_cflag & CRTSCTS) { + info->MCR |= UART_MCR_RTS; + info->MCR_noint |= UART_MCR_RTS; } - info->MCR |= UART_MCR_RTS; - info->MCR_noint |= UART_MCR_RTS; cli(); serial_out(info, UART_MCR, info->MCR); sti(); @@ -1504,20 +1576,22 @@ struct serial_struct * retinfo) { struct serial_struct tmp; - + struct serial_state *state = info->state; + if (!retinfo) return -EFAULT; memset(&tmp, 0, sizeof(tmp)); - tmp.type = info->type; - tmp.line = info->line; - tmp.port = info->port; - tmp.irq = info->irq; - tmp.flags = info->flags; - tmp.baud_base = info->baud_base; - tmp.close_delay = info->close_delay; - tmp.closing_wait = info->closing_wait; - tmp.custom_divisor = info->custom_divisor; - tmp.hub6 = info->hub6; + tmp.type = state->type; + tmp.line = state->line; + tmp.port = state->port; + tmp.irq = state->irq; + tmp.flags = state->flags; + tmp.xmit_fifo_size = state->xmit_fifo_size; + tmp.baud_base = state->baud_base; + tmp.close_delay = state->close_delay; + tmp.closing_wait = state->closing_wait; + tmp.custom_divisor = state->custom_divisor; + tmp.hub6 = state->hub6; copy_to_user(retinfo,&tmp,sizeof(*retinfo)); return 0; } @@ -1526,29 +1600,32 @@ struct serial_struct * new_info) { struct serial_struct new_serial; - struct async_struct old_info; + struct serial_state old_state, *state; unsigned int i,change_irq,change_port; int retval = 0; if (!new_info) return -EFAULT; copy_from_user(&new_serial,new_info,sizeof(new_serial)); - old_info = *info; - - change_irq = new_serial.irq != info->irq; - change_port = (new_serial.port != info->port) || (new_serial.hub6 != info->hub6); - + state = info->state; + old_state = *state; + + change_irq = new_serial.irq != state->irq; + change_port = (new_serial.port != state->port) || + (new_serial.hub6 != state->hub6); + if (!suser()) { if (change_irq || change_port || - (new_serial.baud_base != info->baud_base) || - (new_serial.type != info->type) || - (new_serial.close_delay != info->close_delay) || + (new_serial.baud_base != state->baud_base) || + (new_serial.type != state->type) || + (new_serial.close_delay != state->close_delay) || + (new_serial.xmit_fifo_size != state->xmit_fifo_size) || ((new_serial.flags & ~ASYNC_USR_MASK) != - (info->flags & ~ASYNC_USR_MASK))) + (state->flags & ~ASYNC_USR_MASK))) return -EPERM; - info->flags = ((info->flags & ~ASYNC_USR_MASK) | + state->flags = ((state->flags & ~ASYNC_USR_MASK) | (new_serial.flags & ASYNC_USR_MASK)); - info->custom_divisor = new_serial.custom_divisor; + state->custom_divisor = new_serial.custom_divisor; goto check_and_exit; } @@ -1563,13 +1640,13 @@ /* Make sure address is not already in use */ if (new_serial.type) { for (i = 0 ; i < NR_PORTS; i++) - if ((info != &rs_table[i]) && + if ((state != &rs_table[i]) && (rs_table[i].port == new_serial.port) && rs_table[i].type) return -EADDRINUSE; } - if ((change_port || change_irq) && (info->count > 1)) + if ((change_port || change_irq) && (state->count > 1)) return -EBUSY; /* @@ -1577,36 +1654,43 @@ * At this point, we start making changes..... */ - info->baud_base = new_serial.baud_base; - info->flags = ((info->flags & ~ASYNC_FLAGS) | + state->baud_base = new_serial.baud_base; + state->flags = ((state->flags & ~ASYNC_FLAGS) | (new_serial.flags & ASYNC_FLAGS)); - info->custom_divisor = new_serial.custom_divisor; - info->type = new_serial.type; - info->close_delay = new_serial.close_delay * HZ/100; - info->closing_wait = new_serial.closing_wait * HZ/100; + info->flags = ((state->flags & ~ASYNC_INTERNAL_FLAGS) | + (info->flags & ASYNC_INTERNAL_FLAGS)); + state->custom_divisor = new_serial.custom_divisor; + state->type = new_serial.type; + state->close_delay = new_serial.close_delay * HZ/100; + state->closing_wait = new_serial.closing_wait * HZ/100; + info->xmit_fifo_size = state->xmit_fifo_size = + new_serial.xmit_fifo_size; - release_region(info->port,8); + release_region(state->port,8); if (change_port || change_irq) { /* * We need to shutdown the serial port at the old * port/irq combination. */ shutdown(info); - info->irq = new_serial.irq; - info->port = new_serial.port; - info->hub6 = new_serial.hub6; + state->irq = new_serial.irq; + info->port = state->port = new_serial.port; + info->hub6 = state->hub6 = new_serial.hub6; } - if(info->type != PORT_UNKNOWN) - request_region(info->port,8,"serial(set)"); + if (state->type != PORT_UNKNOWN) + request_region(state->port,8,"serial(set)"); check_and_exit: - if (!info->port || !info->type) + if (!state->port || !state->type) return 0; - if (info->flags & ASYNC_INITIALIZED) { - if (((old_info.flags & ASYNC_SPD_MASK) != - (info->flags & ASYNC_SPD_MASK)) || - (old_info.custom_divisor != info->custom_divisor)) + if (state->type != old_state.type) + state->xmit_fifo_size = + uart_config[state->type].dfl_xmit_fifo_size; + if (state->flags & ASYNC_INITIALIZED) { + if (((old_state.flags & ASYNC_SPD_MASK) != + (state->flags & ASYNC_SPD_MASK)) || + (old_state.custom_divisor != state->custom_divisor)) change_speed(info); } else retval = startup(info); @@ -1713,13 +1797,13 @@ if (!suser()) return -EPERM; - if (info->count > 1) + if (info->state->count > 1) return -EBUSY; shutdown(info); cli(); - autoconfig(info); + autoconfig(info->state); sti(); retval = startup(info); @@ -1795,7 +1879,7 @@ struct serial_multiport_struct ret; struct rs_multiport_struct *multi; - multi = &rs_multiport[info->irq]; + multi = &rs_multiport[info->state->irq]; ret.port_monitor = multi->port_monitor; @@ -1815,7 +1899,7 @@ ret.mask4 = multi->mask4; ret.match4 = multi->match4; - ret.irq = info->irq; + ret.irq = info->state->irq; copy_to_user(retinfo,&ret,sizeof(*retinfo)); return 0; @@ -1827,6 +1911,7 @@ { struct serial_multiport_struct new_multi; struct rs_multiport_struct *multi; + struct serial_state *state; int was_multi, now_multi; int retval; void (*handler)(int, void *, struct pt_regs *); @@ -1835,14 +1920,16 @@ return -EPERM; if (!in_multi) return -EFAULT; + state = info->state; + copy_from_user(&new_multi, in_multi, sizeof(struct serial_multiport_struct)); - if (new_multi.irq != info->irq || info->irq == 0 || - !IRQ_ports[info->irq]) + if (new_multi.irq != state->irq || state->irq == 0 || + !IRQ_ports[state->irq]) return -EINVAL; - multi = &rs_multiport[info->irq]; + multi = &rs_multiport[state->irq]; was_multi = (multi->port1 != 0); multi->port_monitor = new_multi.port_monitor; @@ -1881,15 +1968,15 @@ now_multi = (multi->port1 != 0); - if (IRQ_ports[info->irq]->next_port && + if (IRQ_ports[state->irq]->next_port && (was_multi != now_multi)) { - free_irq(info->irq, NULL); + free_irq(state->irq, NULL); if (now_multi) handler = rs_interrupt_multi; else handler = rs_interrupt; - retval = request_irq(info->irq, handler, IRQ_T(info), + retval = request_irq(state->irq, handler, IRQ_T(info), "serial", NULL); if (retval) { printk("Couldn't reallocate serial interrupt " @@ -2036,7 +2123,8 @@ */ case TIOCMIWAIT: cli(); - cprev = info->icount; /* note the counters on entry */ + /* note the counters on entry */ + cprev = info->state->icount; sti(); while (1) { interruptible_sleep_on(&info->delta_msr_wait); @@ -2044,7 +2132,7 @@ if (current->signal & ~current->blocked) return -ERESTARTSYS; cli(); - cnow = info->icount; /* atomic copy */ + cnow = info->state->icount; /* atomic copy */ sti(); if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) @@ -2071,7 +2159,7 @@ if (error) return error; cli(); - cnow = info->icount; + cnow = info->state->icount; sti(); p_cuser = (struct serial_icounter_struct *) arg; put_user(cnow.cts, &p_cuser->cts); @@ -2097,6 +2185,14 @@ change_speed(info); + if (!(old_termios->c_cflag & CBAUD) && + (tty->termios->c_cflag & CBAUD)) { + info->MCR |= UART_MCR_DTR; + info->MCR_noint |= UART_MCR_DTR; + cli(); + serial_out(info, UART_MCR, info->MCR); + sti(); + } if ((old_termios->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) { tty->hw_stopped = 0; @@ -2129,11 +2225,13 @@ static void rs_close(struct tty_struct *tty, struct file * filp) { struct async_struct * info = (struct async_struct *)tty->driver_data; + struct serial_state *state; unsigned long flags; - unsigned long timeout; if (!info || serial_paranoia_check(info, tty->device, "rs_close")) return; + + state = info->state; save_flags(flags); cli(); @@ -2145,26 +2243,26 @@ } #ifdef SERIAL_DEBUG_OPEN - printk("rs_close ttys%d, count = %d\n", info->line, info->count); + printk("rs_close ttys%d, count = %d\n", info->line, state->count); #endif - if ((tty->count == 1) && (info->count != 1)) { + if ((tty->count == 1) && (state->count != 1)) { /* * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. Info->count should always + * structure will be freed. state->count should always * be one in these conditions. If it's greater than * one, we've got real problems, since it means the * serial port won't be shutdown. */ printk("rs_close: bad serial port count; tty->count is 1, " - "info->count is %d\n", info->count); - info->count = 1; + "state->count is %d\n", state->count); + state->count = 1; } - if (--info->count < 0) { + if (--state->count < 0) { printk("rs_close: bad serial port count for ttys%d: %d\n", - info->line, info->count); - info->count = 0; + info->line, state->count); + state->count = 0; } - if (info->count) { + if (state->count) { DBG_CNT("before DEC-2"); MOD_DEC_USE_COUNT; restore_flags(flags); @@ -2176,9 +2274,9 @@ * separate termios for callout and dialin. */ if (info->flags & ASYNC_NORMAL_ACTIVE) - info->normal_termios = *tty->termios; + info->state->normal_termios = *tty->termios; if (info->flags & ASYNC_CALLOUT_ACTIVE) - info->callout_termios = *tty->termios; + info->state->callout_termios = *tty->termios; /* * Now we wait for the transmit buffer to clear; and we notify * the line discipline to only process XON/XOFF characters. @@ -2201,14 +2299,7 @@ * has completely drained; this is especially * important if there is a transmit FIFO! */ - timeout = jiffies+HZ; - while (!(serial_inp(info, UART_LSR) & UART_LSR_TEMT)) { - current->state = TASK_INTERRUPTIBLE; - current->timeout = jiffies + info->timeout; - schedule(); - if (jiffies > timeout) - break; - } + rs_wait_until_sent(tty, HZ); } shutdown(info); if (tty->driver.flush_buffer) @@ -2234,19 +2325,49 @@ } /* + * rs_wait_until_sent() --- wait until the transmitter is empty + */ +static void rs_wait_until_sent(struct tty_struct *tty, int timeout) +{ + struct async_struct * info = (struct async_struct *)tty->driver_data; + unsigned long orig_jiffies; + + if (serial_paranoia_check(info, tty->device, "rs_wait_until_sent")) + return; + + orig_jiffies = jiffies; + current->state = TASK_INTERRUPTIBLE; + current->counter = 0; /* make us low-priority */ + while (!(serial_inp(info, UART_LSR) & UART_LSR_TEMT)) { + current->timeout = jiffies + info->timeout; + schedule(); + if (current->signal & ~current->blocked) + break; + if (timeout && ((orig_jiffies + timeout) > jiffies)) + break; + if (jiffies > timeout) + break; + } + current->state = TASK_RUNNING; +} + +/* * rs_hangup() --- called by tty_hangup() when a hangup is signaled. */ void rs_hangup(struct tty_struct *tty) { struct async_struct * info = (struct async_struct *)tty->driver_data; + struct serial_state *state = info->state; if (serial_paranoia_check(info, tty->device, "rs_hangup")) return; + + state = info->state; rs_flush_buffer(tty); shutdown(info); info->event = 0; - info->count = 0; + state->count = 0; info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CALLOUT_ACTIVE); info->tty = 0; wake_up_interruptible(&info->open_wait); @@ -2261,6 +2382,7 @@ struct async_struct *info) { struct wait_queue wait = { current, NULL }; + struct serial_state *state = info->state; int retval; int do_clocal = 0; @@ -2314,7 +2436,7 @@ } if (info->flags & ASYNC_CALLOUT_ACTIVE) { - if (info->normal_termios.c_cflag & CLOCAL) + if (state->normal_termios.c_cflag & CLOCAL) do_clocal = 1; } else { if (tty->termios->c_cflag & CLOCAL) @@ -2324,7 +2446,7 @@ /* * Block waiting for the carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, info->count is dropped by one, so that + * this loop, state->count is dropped by one, so that * rs_close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ @@ -2332,11 +2454,11 @@ add_wait_queue(&info->open_wait, &wait); #ifdef SERIAL_DEBUG_OPEN printk("block_til_ready before block: ttys%d, count = %d\n", - info->line, info->count); + state->line, state->count); #endif cli(); if (!tty_hung_up_p(filp)) - info->count--; + state->count--; sti(); info->blocked_open++; while (1) { @@ -2370,24 +2492,60 @@ } #ifdef SERIAL_DEBUG_OPEN printk("block_til_ready blocking: ttys%d, count = %d\n", - info->line, info->count); + info->line, state->count); #endif schedule(); } current->state = TASK_RUNNING; remove_wait_queue(&info->open_wait, &wait); if (!tty_hung_up_p(filp)) - info->count++; + state->count++; info->blocked_open--; #ifdef SERIAL_DEBUG_OPEN printk("block_til_ready after blocking: ttys%d, count = %d\n", - info->line, info->count); + info->line, state->count); #endif if (retval) return retval; info->flags |= ASYNC_NORMAL_ACTIVE; return 0; -} +} + +int get_async_struct(int line, struct async_struct **ret_info) +{ + struct async_struct *info; + struct serial_state *sstate; + + sstate = rs_table + line; + sstate->count++; + if (sstate->info) { + *ret_info = sstate->info; + return 0; + } + info = kmalloc(sizeof(struct async_struct), GFP_KERNEL); + if (!info) { + sstate->count--; + return -ENOMEM; + } + memset(info, 0, sizeof(struct async_struct)); + info->magic = SERIAL_MAGIC; + info->port = sstate->port; + info->flags = sstate->flags; + info->xmit_fifo_size = sstate->xmit_fifo_size; + info->line = line; + info->tqueue.routine = do_softint; + info->tqueue.data = info; + info->tqueue_hangup.routine = do_serial_hangup; + info->tqueue_hangup.data = info; + info->state = sstate; + if (sstate->info) { + kfree_s(info, sizeof(struct async_struct)); + *ret_info = sstate->info; + return 0; + } + *ret_info = sstate->info = info; + return 0; +} /* * This routine is called whenever a serial port is opened. It @@ -2404,15 +2562,16 @@ line = MINOR(tty->device) - tty->driver.minor_start; if ((line < 0) || (line >= NR_PORTS)) return -ENODEV; - info = rs_table + line; + retval = get_async_struct(line, &info); + if (retval) + return retval; if (serial_paranoia_check(info, tty->device, "rs_open")) return -ENODEV; #ifdef SERIAL_DEBUG_OPEN printk("rs_open %s%d, count = %d\n", tty->driver.name, info->line, - info->count); + info->state->count); #endif - info->count++; tty->driver_data = info; info->tty = tty; @@ -2443,11 +2602,12 @@ return retval; } - if ((info->count == 1) && (info->flags & ASYNC_SPLIT_TERMIOS)) { + if ((info->state->count == 1) && + (info->flags & ASYNC_SPLIT_TERMIOS)) { if (tty->driver.subtype == SERIAL_TYPE_NORMAL) - *tty->termios = info->normal_termios; + *tty->termios = info->state->normal_termios; else - *tty->termios = info->callout_termios; + *tty->termios = info->state->callout_termios; change_speed(info); } @@ -2587,16 +2747,18 @@ * whether or not this UART is a 16550A or not, since this will * determine whether or not we can use its FIFO features or not. */ -static void autoconfig(struct async_struct * info) +static void autoconfig(struct serial_state * state) { unsigned char status1, status2, scratch, scratch2; - unsigned port = info->port; + struct async_struct *info, scr_info; unsigned long flags; - info->type = PORT_UNKNOWN; + state->type = PORT_UNKNOWN; - if (!port) + if (!state->port) return; + info = &scr_info; /* This is just for serial_{in,out} */ + info->port = state->port; save_flags(flags); cli(); @@ -2628,7 +2790,7 @@ * manufacturer would be stupid enough to design a board * that conflicts with COM 1-4 --- we hope! */ - if (!(info->flags & ASYNC_SKIP_TEST)) { + if (!(state->flags & ASYNC_SKIP_TEST)) { scratch = serial_inp(info, UART_MCR); serial_outp(info, UART_MCR, UART_MCR_LOOP | scratch); scratch2 = serial_inp(info, UART_MSR); @@ -2646,39 +2808,52 @@ * If the AUTO_IRQ flag is set, try to do the automatic IRQ * detection. */ - if (info->flags & ASYNC_AUTO_IRQ) - info->irq = do_auto_irq(info); + if (state->flags & ASYNC_AUTO_IRQ) + state->irq = do_auto_irq(info); scratch2 = serial_in(info, UART_LCR); - serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB); + serial_outp(info, UART_LCR, 0xBF); /* set up for StarTech test */ serial_outp(info, UART_EFR, 0); /* EFR is the same as FCR */ - serial_outp(info, UART_LCR, scratch2); + serial_outp(info, UART_LCR, 0); serial_outp(info, UART_FCR, UART_FCR_ENABLE_FIFO); scratch = serial_in(info, UART_IIR) >> 6; - info->xmit_fifo_size = 1; switch (scratch) { case 0: - info->type = PORT_16450; + state->type = PORT_16450; break; case 1: - info->type = PORT_UNKNOWN; + state->type = PORT_UNKNOWN; break; case 2: - info->type = PORT_16550; + state->type = PORT_16550; break; case 3: - serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB); - if (serial_in(info, UART_EFR) == 0) { - info->type = PORT_16650; - info->xmit_fifo_size = 32; - } else { - info->type = PORT_16550A; - info->xmit_fifo_size = 16; - } - serial_outp(info, UART_LCR, scratch2); + state->type = PORT_16550A; break; } - if (info->type == PORT_16450) { + if (state->type == PORT_16550A) { + /* Check for Startech UART's */ + serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB); + if (serial_in(info, UART_EFR) == 0) { + state->type = PORT_16650; + } else { + serial_outp(info, UART_LCR, 0xBF); + if (serial_in(info, UART_EFR) == 0) + state->type = PORT_16650V2; + } + } + if (state->type == PORT_16550A) { + /* Check for TI 16750 */ + serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB); + serial_outp(info, UART_FCR, + UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); + scratch = serial_in(info, UART_IIR) >> 5; + if (scratch == 7) + state->type = PORT_16750; + serial_outp(info, UART_FCR, UART_FCR_ENABLE_FIFO); + } + serial_outp(info, UART_LCR, scratch2); + if (state->type == PORT_16450) { scratch = serial_in(info, UART_SCR); serial_outp(info, UART_SCR, 0xa5); status1 = serial_in(info, UART_SCR); @@ -2687,8 +2862,10 @@ serial_outp(info, UART_SCR, scratch); if ((status1 != 0xa5) || (status2 != 0x5a)) - info->type = PORT_8250; + state->type = PORT_8250; } + state->xmit_fifo_size = uart_config[state->type].dfl_xmit_fifo_size; + request_region(info->port,8,"serial(auto)"); /* @@ -2726,7 +2903,7 @@ int rs_init(void) { int i; - struct async_struct * info; + struct serial_state * state; init_bh(SERIAL_BH, do_serial_bh); timer_table[RS_TIMER].fn = rs_timer; @@ -2773,10 +2950,12 @@ serial_driver.ioctl = rs_ioctl; serial_driver.throttle = rs_throttle; serial_driver.unthrottle = rs_unthrottle; + serial_driver.send_xchar = rs_send_xchar; serial_driver.set_termios = rs_set_termios; serial_driver.stop = rs_stop; serial_driver.start = rs_start; serial_driver.hangup = rs_hangup; + serial_driver.wait_until_sent = rs_wait_until_sent; /* * The callout device is just like normal device except for @@ -2792,63 +2971,31 @@ if (tty_register_driver(&callout_driver)) panic("Couldn't register callout driver\n"); - for (i = 0, info = rs_table; i < NR_PORTS; i++,info++) { - info->magic = SERIAL_MAGIC; - info->line = i; - info->tty = 0; - info->type = PORT_UNKNOWN; - info->custom_divisor = 0; - info->close_delay = 5*HZ/10; - info->closing_wait = 30*HZ; - info->x_char = 0; - info->event = 0; - info->count = 0; - info->blocked_open = 0; - info->tqueue.routine = do_softint; - info->tqueue.data = info; - info->tqueue_hangup.routine = do_serial_hangup; - info->tqueue_hangup.data = info; - info->callout_termios =callout_driver.init_termios; - info->normal_termios = serial_driver.init_termios; - info->open_wait = 0; - info->close_wait = 0; - info->delta_msr_wait = 0; - info->icount.cts = info->icount.dsr = - info->icount.rng = info->icount.dcd = 0; - info->next_port = 0; - info->prev_port = 0; - if (info->irq == 2) - info->irq = 9; - if (info->type == PORT_UNKNOWN) { - if (!(info->flags & ASYNC_BOOT_AUTOCONF)) + for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) { + state->magic = SSTATE_MAGIC; + state->line = i; + state->type = PORT_UNKNOWN; + state->custom_divisor = 0; + state->close_delay = 5*HZ/10; + state->closing_wait = 30*HZ; + state->callout_termios = callout_driver.init_termios; + state->normal_termios = serial_driver.init_termios; + state->icount.cts = state->icount.dsr = + state->icount.rng = state->icount.dcd = 0; + if (state->irq == 2) + state->irq = 9; + if (state->type == PORT_UNKNOWN) { + if (!(state->flags & ASYNC_BOOT_AUTOCONF)) continue; - autoconfig(info); - if (info->type == PORT_UNKNOWN) + autoconfig(state); + if (state->type == PORT_UNKNOWN) continue; } - printk(KERN_INFO "tty%02d%s at 0x%04x (irq = %d)", info->line, - (info->flags & ASYNC_FOURPORT) ? " FourPort" : "", - info->port, info->irq); - switch (info->type) { - case PORT_8250: - printk(" is a 8250\n"); - break; - case PORT_16450: - printk(" is a 16450\n"); - break; - case PORT_16550: - printk(" is a 16550\n"); - break; - case PORT_16550A: - printk(" is a 16550A\n"); - break; - case PORT_16650: - printk(" is a 16650\n"); - break; - default: - printk("\n"); - break; - } + printk(KERN_INFO "tty%02d%s at 0x%04x (irq = %d) is a %s\n", + state->line, + (state->flags & ASYNC_FOURPORT) ? " FourPort" : "", + state->port, state->irq, + uart_config[state->type].name); } register_symtab(&serial_syms); return 0; @@ -2862,7 +3009,7 @@ { int i; unsigned long flags; - struct async_struct *info; + struct serial_state *state; save_flags(flags); cli(); @@ -2880,51 +3027,40 @@ restore_flags(flags); return -1; } - info = &rs_table[i]; + state = &rs_table[i]; if (rs_table[i].count) { restore_flags(flags); printk("Couldn't configure serial #%d (port=%d,irq=%d): " "device already open\n", i, req->port, req->irq); return -1; } - info->irq = req->irq; - info->port = req->port; - info->flags = req->flags; - autoconfig(info); - if (info->type == PORT_UNKNOWN) { + state->irq = req->irq; + state->port = req->port; + state->flags = req->flags; + autoconfig(state); + if (state->type == PORT_UNKNOWN) { restore_flags(flags); printk("register_serial(): autoconfig failed\n"); return -1; } - printk(KERN_INFO "tty%02d at 0x%04x (irq = %d)", info->line, - info->port, info->irq); - switch (info->type) { - case PORT_8250: - printk(" is a 8250\n"); break; - case PORT_16450: - printk(" is a 16450\n"); break; - case PORT_16550: - printk(" is a 16550\n"); break; - case PORT_16550A: - printk(" is a 16550A\n"); break; - default: - printk("\n"); break; - } + printk(KERN_INFO "tty%02d at 0x%04x (irq = %d) is a %s\n", + state->line, state->port, state->irq, + uart_config[state->type].name); restore_flags(flags); - return info->line; + return state->line; } void unregister_serial(int line) { unsigned long flags; - struct async_struct *info = &rs_table[line]; + struct serial_state *state = &rs_table[line]; save_flags(flags); cli(); - if (info->tty) - tty_hangup(info->tty); - info->type = PORT_UNKNOWN; - printk(KERN_INFO "tty%02d unloaded\n", info->line); + if (state->info && state->info->tty) + tty_hangup(state->info->tty); + state->type = PORT_UNKNOWN; + printk(KERN_INFO "tty%02d unloaded\n", state->line); restore_flags(flags); } diff -u --recursive --new-file v2.1.7/linux/drivers/char/tty_ioctl.c linux/drivers/char/tty_ioctl.c --- v2.1.7/linux/drivers/char/tty_ioctl.c Tue Oct 29 19:58:10 1996 +++ linux/drivers/char/tty_ioctl.c Sat Nov 9 11:17:31 1996 @@ -62,11 +62,19 @@ #endif current->state = TASK_INTERRUPTIBLE; if (current->signal & ~current->blocked) - break; + goto stop_waiting; if (!tty->driver.chars_in_buffer(tty)) break; schedule(); } while (current->timeout); + if (tty->driver.wait_until_sent) { + if (current->timeout == -1) + timeout = 0; + else + timeout = current->timeout - jiffies; + tty->driver.wait_until_sent(tty, timeout); + } +stop_waiting: current->state = TASK_RUNNING; remove_wait_queue(&tty->write_wait, &wait); } @@ -169,8 +177,11 @@ if ((opt & TERMIOS_FLUSH) && tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); - if (opt & TERMIOS_WAIT) + if (opt & TERMIOS_WAIT) { tty_wait_until_sent(tty, 0); + if (current->signal & ~current->blocked) + return -EINTR; + } change_termios(tty, &tmp_termios); return 0; @@ -371,6 +382,24 @@ } #endif +/* + * Send a high priority character to the tty. + */ +void send_prio_char(struct tty_struct *tty, char ch) +{ + int was_stopped = tty->stopped; + + if (tty->driver.send_xchar) { + tty->driver.send_xchar(tty, ch); + return; + } + if (was_stopped) + start_tty(tty); + tty->driver.write(tty, 0, &ch, 1); + if (was_stopped) + stop_tty(tty); +} + int n_tty_ioctl(struct tty_struct * tty, struct file * file, unsigned int cmd, unsigned long arg) { @@ -440,13 +469,11 @@ break; case TCIOFF: if (STOP_CHAR(tty) != __DISABLED_CHAR) - tty->driver.write(tty, 0, - &STOP_CHAR(tty), 1); + send_prio_char(tty, STOP_CHAR(tty)); break; case TCION: if (START_CHAR(tty) != __DISABLED_CHAR) - tty->driver.write(tty, 0, - &START_CHAR(tty), 1); + send_prio_char(tty, START_CHAR(tty)); break; default: return -EINVAL; @@ -538,6 +565,8 @@ if (retval) return retval; tty_wait_until_sent(tty, 0); + if (current->signal & ~current->blocked) + return -EINTR; if (!tty->driver.ioctl) return 0; tty->driver.ioctl(tty, file, cmd, arg); diff -u --recursive --new-file v2.1.7/linux/drivers/net/3c501.c linux/drivers/net/3c501.c --- v2.1.7/linux/drivers/net/3c501.c Mon May 6 12:26:07 1996 +++ linux/drivers/net/3c501.c Wed Nov 6 14:41:17 1996 @@ -237,8 +237,6 @@ static int el1_probe1(struct device *dev, int ioaddr) { -#ifndef MODULE - const char *mname; /* Vendor name */ unsigned char station_addr[6]; int autoirq = 0; @@ -343,8 +341,6 @@ */ ether_setup(dev); - -#endif /* !MODULE */ return 0; } diff -u --recursive --new-file v2.1.7/linux/drivers/net/3c59x.c linux/drivers/net/3c59x.c --- v2.1.7/linux/drivers/net/3c59x.c Tue Oct 29 19:58:11 1996 +++ linux/drivers/net/3c59x.c Thu Nov 7 11:25:56 1996 @@ -634,7 +634,7 @@ /* Switch to register set 7 for normal use. */ EL3WINDOW(7); - /* Set reciever mode: presumably accept b-case and phys addr only. */ + /* Set receiver mode: presumably accept b-case and phys addr only. */ set_rx_mode(dev); outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */ diff -u --recursive --new-file v2.1.7/linux/drivers/net/ppp.c linux/drivers/net/ppp.c --- v2.1.7/linux/drivers/net/ppp.c Tue Oct 29 19:58:12 1996 +++ linux/drivers/net/ppp.c Sun Nov 3 11:04:41 1996 @@ -136,6 +136,7 @@ static int rcv_proto_ip (struct ppp *, __u16, __u8 *, int); static int rcv_proto_ipx (struct ppp *, __u16, __u8 *, int); +static int rcv_proto_ipv6 (struct ppp *, __u16, __u8 *, int); static int rcv_proto_vjc_comp (struct ppp *, __u16, __u8 *, int); static int rcv_proto_vjc_uncomp (struct ppp *, __u16, __u8 *, int); static int rcv_proto_unknown (struct ppp *, __u16, __u8 *, int); @@ -264,6 +265,7 @@ ppp_proto_type proto_list[] = { { PPP_IP, rcv_proto_ip }, { PPP_IPX, rcv_proto_ipx }, + { PPP_IPV6, rcv_proto_ipv6 }, { PPP_VJC_COMP, rcv_proto_vjc_comp }, { PPP_VJC_UNCOMP, rcv_proto_vjc_uncomp }, { PPP_LQR, rcv_proto_lqr }, @@ -1239,6 +1241,18 @@ } /* + * Process the receipt of an IPV6 frame + */ + +static int +rcv_proto_ipv6 (struct ppp *ppp, __u16 proto, __u8 * data, int count) +{ + if (((ppp2dev (ppp)->flags & IFF_UP) != 0) && (count > 0)) + return ppp_rcv_rx (ppp, htons (ETH_P_IPV6), data, count); + return 0; +} + +/* * Process the receipt of an VJ Compressed frame */ @@ -3102,6 +3116,10 @@ case ETH_P_IP: answer = ppp_dev_xmit_ip (dev, ppp, data); + break; + + case ETH_P_IPV6: + answer = ppp_dev_xmit_ipx (dev, ppp, data, len, PPP_IPV6); break; default: /* All others have no support at this time. */ diff -u --recursive --new-file v2.1.7/linux/drivers/pci/pci.c linux/drivers/pci/pci.c --- v2.1.7/linux/drivers/pci/pci.c Wed Oct 16 10:48:19 1996 +++ linux/drivers/pci/pci.c Wed Nov 6 12:12:53 1996 @@ -70,6 +70,7 @@ DEVICE( DEC, DEC_FDDI, "DEFPA"), DEVICE( DEC, DEC_TULIP_PLUS, "DC21041"), DEVICE( DEC, DEC_21052_AB, "DC21052-AB"), + DEVICE( DEC, DEC_21152_AA, "DC21152-AA"), DEVICE( CIRRUS, CIRRUS_5430, "GD 5430"), DEVICE( CIRRUS, CIRRUS_5434_4, "GD 5434"), DEVICE( CIRRUS, CIRRUS_5434_8, "GD 5434"), diff -u --recursive --new-file v2.1.7/linux/drivers/sbus/char/sunkbd.c linux/drivers/sbus/char/sunkbd.c --- v2.1.7/linux/drivers/sbus/char/sunkbd.c Sat May 4 19:39:23 1996 +++ linux/drivers/sbus/char/sunkbd.c Thu Nov 7 11:25:56 1996 @@ -735,8 +735,9 @@ #define A_CFLEX '^' #define A_TILDE '~' #define A_DIAER '"' -static unsigned char ret_diacr[] = - {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER }; +#define A_CEDIL ',' +static unsigned char ret_diacr[NR_DEAD] = + {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER, A_CEDIL }; /* If a dead key pressed twice, output a character corresponding to it, */ /* otherwise just remember the dead key. */ diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/ChangeLog.ncr53c8xx linux/drivers/scsi/ChangeLog.ncr53c8xx --- v2.1.7/linux/drivers/scsi/ChangeLog.ncr53c8xx Wed Oct 16 10:48:19 1996 +++ linux/drivers/scsi/ChangeLog.ncr53c8xx Sat Nov 2 13:57:10 1996 @@ -1,3 +1,21 @@ +Sun Oct 27 22:00 1996 Gerard Roudier (groudier@club-internet.fr) + * ncr53c8xx.c ncr53c8xx.h - revision 1.14b + Add the following config parameters: + + - CONFIG_SCSI_NCR53C8XX_MAX_TAGS + Max number of queued tagged commands. + Allow from 2 to 12, default 4. + + - CONFIG_SCSI_NCR53C8XX_SYNC + Synchronous transfers frequency in MHz. + Allow from 5 to 10, default 5, 0 means asynchronous. + (And so remove CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS) + +Sun Oct 20 16:00 1996 Gerard Roudier (groudier@club-internet.fr) + * ncr53c8xx.c + ncr_scatter() rewritten. + remove "ncr dead" detection. + Sun Oct 13 19:00 1996 Gerard Roudier (groudier@club-internet.fr) * ncr53c8xx.c ncr53c8xx.h - revision 1.14a Enabling some special features makes problems with some hardware. diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/Config.in linux/drivers/scsi/Config.in --- v2.1.7/linux/drivers/scsi/Config.in Tue Oct 29 19:58:12 1996 +++ linux/drivers/scsi/Config.in Sat Nov 2 13:57:09 1996 @@ -48,14 +48,17 @@ dep_tristate 'NCR53C8XX SCSI support' CONFIG_SCSI_NCR53C8XX $CONFIG_SCSI if [ "$CONFIG_SCSI_NCR53C8XX" != "n" ]; then bool ' enable tagged command queueing' CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE - bool ' force normal IO' CONFIG_SCSI_NCR53C8XX_IOMAPPED - bool ' not allow targets to disconnect' CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT - bool ' force asynchronous transfer mode' CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS - bool ' force synchronous negotiation' CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO + bool ' use normal IO' CONFIG_SCSI_NCR53C8XX_IOMAPPED + int ' maximum number of queued commands' CONFIG_SCSI_NCR53C8XX_MAX_TAGS 4 + int ' synchronous transfers frequency in MHz' CONFIG_SCSI_NCR53C8XX_SYNC 5 + if [ "$CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE" != "y" ]; then + bool ' not allow targets to disconnect' CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT + fi fi if [ "$CONFIG_SCSI_NCR53C8XX" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then bool ' disable master parity checking' CONFIG_SCSI_NCR53C8XX_DISABLE_MPARITY_CHECK bool ' disable scsi parity checking' CONFIG_SCSI_NCR53C8XX_DISABLE_PARITY_CHECK + bool ' force synchronous negotiation' CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO fi fi dep_tristate 'IOMEGA Parallel Port ZIP drive SCSI support' CONFIG_SCSI_PPA $CONFIG_SCSI diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/README.ncr53c8xx linux/drivers/scsi/README.ncr53c8xx --- v2.1.7/linux/drivers/scsi/README.ncr53c8xx Wed Oct 16 10:48:19 1996 +++ linux/drivers/scsi/README.ncr53c8xx Sat Nov 2 13:57:10 1996 @@ -4,7 +4,7 @@ 21 Rue Carnot 95170 DEUIL LA BARRE - FRANCE -13 October 1996 +28 October 1996 =============================================================================== 1. Introduction @@ -52,9 +52,7 @@ configuration parameters and control commands available through the proc SCSI file system read / write operations. -This driver has been tested OK with linux/i386 and is currently untested -under linux/Alpha. If you intend to use this driver under linux/Alpha, just -try it first with read-only or mounted read-only devices. +This driver has been tested OK with linux/i386 and Linux/Alpha. I am not a native speaker of English and there are probably lots of mistakes in this README file. Any help will be welcome. @@ -81,7 +79,7 @@ 815 Y N N Y Y 825 Y Y N Y Y 825A Y Y N Y Not yet -860 N Y N Y Y +860 N N Y(1) Y Y 875 Y Y Y(1) Y Y (1) Ultra SCSI extensions will be supported in a future release of the @@ -356,9 +354,16 @@ Answer "y" if you are sure that all your SCSI devices that are able to accept tagged commands will proceed safely. -CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS (default answer: n) - This option forces asynchronous transfer mode for all SCSI devices. - +CONFIG_SCSI_NCR53C8XX_MAX_TAGS (default answer: 4) + This option allows you to specify the maximum number of tagged commands + that can be queued to a device. + +CONFIG_SCSI_NCR53C8XX_SYNC (default answer: 5) + This option allows you to specify the frequency in MHz the driver + will use at boot time for synchronous data transfer negotiations. + This frequency can be changed later with the "setsync" control command. + 0 means "asynchronous data transfers". + CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO (default answer: n) Force synchronous negotiation for all SCSI-2 devices. Some SCSI-2 devices do not report this feature in byte 7 of inquiry @@ -377,9 +382,12 @@ Do that only if you know what you are doing. SCSI_NCR_TRUST_BIOS_SETTING (default: not defined) - If defined, the driver will preserve features bits in - dmode/dcntl/ctest3/ctest4 io register. - Else, it will enable features according to chip and revision id. + If defined, the driver will preserve features bits from + dmode/dcntl/ctest3/ctest4 io registers. + +SCSI_NCR_SPECIAL_FEATURES (default: not defined) + If defined, the driver will enable some special features according + to chip and revision id. SCSI_NCR_IOMAPPED (default: not defined) If defined, normal I/O is forced. @@ -391,6 +399,11 @@ Maximum number of simultaneous tagged commands to a device. Can be changed by "settags " +SCSI_NCR_DEFAULT_SYNC (default: 5) + Frequency in KHz the driver will use at boot time for synchronous + negotiation. 0 means asynchronous. + Can be changed by "setsync " + SCSI_NCR_DEFAULT_TAGS (default: 4) Default number of simultaneous tagged commands to a device. < 1 means tagged command queuing disabled at start-up. @@ -399,10 +412,6 @@ Use SIMPLE TAG for read and write commands. Can be changed by "setorder " -SCSI_NCR_TAGGED_QUEUE_DISABLED (default: defined) - If defined, tagged command queuing is disable at start-up. - Can be changed by "settags " - SCSI_NCR_NO_DISCONNECT (default: not defined) If defined, targets are not allowed to disconnect. @@ -483,7 +492,7 @@ mkdir ncrB2L cd ncrB2L - tar zxvf ncrBsd2Linux-1.14a-src.tar.gz + tar zxvf ncrBsd2Linux-1.14b-src.tar.gz 12. Installation procedure for Linux version 1 diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/aha1542.c linux/drivers/scsi/aha1542.c --- v2.1.7/linux/drivers/scsi/aha1542.c Sat Oct 5 16:58:34 1996 +++ linux/drivers/scsi/aha1542.c Thu Nov 7 11:25:56 1996 @@ -1035,7 +1035,7 @@ shpnt->dma_channel = dma_chan; shpnt->irq = irq_level; HOSTDATA(shpnt)->bios_translation = trans; - if(trans == 2) + if(trans == BIOS_TRANSLATION_25563) printk("aha1542.c: Using extended bios translation\n"); HOSTDATA(shpnt)->aha1542_last_mbi_used = (2*AHA1542_MAILBOXES - 1); HOSTDATA(shpnt)->aha1542_last_mbo_used = (AHA1542_MAILBOXES - 1); @@ -1308,8 +1308,8 @@ int size = disk->capacity; translation_algorithm = HOSTDATA(disk->device->host)->bios_translation; - /* Should this be > 1024, or >= 1024? Enquiring minds want to know. */ - if((size>>11) > 1024 && translation_algorithm == 2) { + + if((size>>11) > 1024 && translation_algorithm == BIOS_TRANSLATION_25563) { /* Please verify that this is the same as what DOS returns */ ip[0] = 255; ip[1] = 63; @@ -1318,8 +1318,8 @@ ip[0] = 64; ip[1] = 32; ip[2] = size >> 11; - }; -/* if (ip[2] >= 1024) ip[2] = 1024; */ + } + return 0; } diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/dtc.c linux/drivers/scsi/dtc.c --- v2.1.7/linux/drivers/scsi/dtc.c Tue Apr 9 14:25:37 1996 +++ linux/drivers/scsi/dtc.c Wed Nov 6 12:21:09 1996 @@ -94,23 +94,21 @@ static struct override { - unsigned char *address; + unsigned int address; int irq; } overrides #ifdef OVERRIDE [] = OVERRIDE; #else -[4] = {{NULL, IRQ_AUTO}, {NULL, IRQ_AUTO}, {NULL, IRQ_AUTO}, - {NULL, IRQ_AUTO}}; +[4] = {{0, IRQ_AUTO}, {0, IRQ_AUTO}, {0, IRQ_AUTO}, {0, IRQ_AUTO}}; #endif #define NO_OVERRIDES (sizeof(overrides) / sizeof(struct override)) static struct base { - unsigned char *address; + unsigned int address; int noauto; -} bases[] = {{(unsigned char *) 0xcc000, 0}, {(unsigned char *) 0xc8000, 0}, -{(unsigned char *) 0xdc000, 0}, {(unsigned char *) 0xd8000, 0}}; +} bases[] = {{0xcc000, 0}, {0xc8000, 0}, {0xdc000, 0}, {0xd8000, 0}}; #define NO_BASES (sizeof (bases) / sizeof (struct base)) @@ -138,10 +136,10 @@ printk("dtc_setup: usage dtc=address,irq\n"); else if (commandline_current < NO_OVERRIDES) { - overrides[commandline_current].address = (unsigned char *) ints[1]; + overrides[commandline_current].address = ints[1]; overrides[commandline_current].irq = ints[2]; for (i = 0; i < NO_BASES; ++i) - if (bases[i].address == (unsigned char *) ints[1]) { + if (bases[i].address == ints[1]) { bases[i].noauto = 1; break; } @@ -166,25 +164,26 @@ int dtc_detect(Scsi_Host_Template * tpnt) { static int current_override = 0, current_base = 0; struct Scsi_Host *instance; - unsigned char *base; + unsigned int base; int sig, count; tpnt->proc_dir = &proc_scsi_dtc; tpnt->proc_info = &dtc_proc_info; for (count = 0; current_override < NO_OVERRIDES; ++current_override) { - base = NULL; + base = 0; if (overrides[current_override].address) base = overrides[current_override].address; else for (; !base && (current_base < NO_BASES); ++current_base) { #if (DTCDEBUG & DTCDEBUG_INIT) - printk("scsi : probing address %08x\n", (unsigned int) bases[current_base].address); + printk("scsi : probing address %08x\n", bases[current_base].address); #endif for (sig = 0; sig < NO_SIGNATURES; ++sig) - if (!bases[current_base].noauto && !memcmp - (bases[current_base].address + signatures[sig].offset, + if (!bases[current_base].noauto && + check_signature(bases[current_base].address + + signatures[sig].offset, signatures[sig].string, strlen(signatures[sig].string))) { base = bases[current_base].address; #if (DTCDEBUG & DTCDEBUG_INIT) @@ -195,14 +194,14 @@ } #if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT) - printk("scsi-dtc : base = %08x\n", (unsigned int) base); + printk("scsi-dtc : base = %08x\n", base); #endif if (!base) break; instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata)); - instance->base = base; + instance->base = (void *)base; NCR5380_init(instance, 0); @@ -282,6 +281,7 @@ return 0; } + /**************************************************************** * Function : int NCR5380_pread (struct Scsi_Host *instance, * unsigned char *dst, int len) @@ -320,7 +320,7 @@ while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY) ++i; rtrc(3); - memcpy(d, (char *)(base + DTC_DATA_BUF), 128); + memcpy_fromio(d, base + DTC_DATA_BUF, 128); d += 128; len -= 128; rtrc(7); /*** with int's on, it sometimes hangs after here. @@ -370,7 +370,7 @@ while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY) ++i; rtrc(3); - memcpy((char *)(base + DTC_DATA_BUF), src, 128); + memcpy_toio(base + DTC_DATA_BUF, src, 128); src += 128; len -= 128; } diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/dtc.h linux/drivers/scsi/dtc.h --- v2.1.7/linux/drivers/scsi/dtc.h Mon Apr 29 17:14:19 1996 +++ linux/drivers/scsi/dtc.h Wed Nov 6 12:21:09 1996 @@ -6,7 +6,7 @@ * drew@colorado.edu * +1 (303) 440-4894 * - * DISTRIBUTION RELEASE 1. + * DISTRIBUTION RELEASE 2. * * For more information, please consult * @@ -28,7 +28,7 @@ #ifndef DTC3280_H #define DTC3280_H -#define DTC_PUBLIC_RELEASE 1 +#define DTC_PUBLIC_RELEASE 2 /*#define DTCDEBUG 0x1*/ #define DTCDEBUG_INIT 0x1 @@ -116,40 +116,40 @@ #ifndef HOSTS_C #define NCR5380_implementation_fields \ - volatile unsigned char *base + volatile unsigned int base #define NCR5380_local_declare() \ - volatile unsigned char *base + volatile unsigned int base #define NCR5380_setup(instance) \ - base = (volatile unsigned char *) (instance)->base + base = (unsigned int)(instance)->base #define DTC_address(reg) (base + DTC_5380_OFFSET + reg) #define dbNCR5380_read(reg) \ - (rval=*(DTC_address(reg)), \ + (rval=readb(DTC_address(reg)), \ (((unsigned char) printk("DTC : read register %d at addr %08x is: %02x\n"\ , (reg), (int)DTC_address(reg), rval)), rval ) ) #define dbNCR5380_write(reg, value) do { \ printk("DTC : write %02x to register %d at address %08x\n", \ (value), (reg), (int)DTC_address(reg)); \ - *(DTC_address(reg)) = (value);} while(0) + writeb(value, DTC_address(reg));} while(0) #if !(DTCDEBUG & DTCDEBUG_TRANSFER) -#define NCR5380_read(reg) (*(DTC_address(reg))) -#define NCR5380_write(reg, value) (*(DTC_address(reg)) = (value)) +#define NCR5380_read(reg) (readb(DTC_address(reg))) +#define NCR5380_write(reg, value) (writeb(value, DTC_address(reg))) #else -#define NCR5380_read(reg) (*(DTC_address(reg))) +#define NCR5380_read(reg) (readb(DTC_address(reg))) #define xNCR5380_read(reg) \ (((unsigned char) printk("DTC : read register %d at address %08x\n"\ - , (reg), DTC_address(reg))), *(DTC_address(reg))) + , (reg), DTC_address(reg))), readb(DTC_address(reg))) #define NCR5380_write(reg, value) do { \ printk("DTC : write %02x to register %d at address %08x\n", \ (value), (reg), (int)DTC_address(reg)); \ - *(DTC_address(reg)) = (value); } while(0) + writeb(value, DTC_address(reg));} while(0) #endif #define NCR5380_intr dtc_intr diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/fdomain.c linux/drivers/scsi/fdomain.c --- v2.1.7/linux/drivers/scsi/fdomain.c Wed Oct 16 10:48:20 1996 +++ linux/drivers/scsi/fdomain.c Sat Nov 2 16:40:27 1996 @@ -469,7 +469,7 @@ { "IBM F1 P2 BIOS v1.0104/29/93", 5, 28, 3, -1, 0 }, { "Future Domain Corp. V1.0008/18/93", 5, 33, 3, 4, 0 }, { "Future Domain Corp. V1.0008/18/93", 26, 33, 3, 4, 1 }, - { "Adaptec AHA-2920 PCI-SCSI Card", 42, 31, 3, 0, 1 }, + { "Adaptec AHA-2920 PCI-SCSI Card", 42, 31, 3, -1, 1 }, /* This next signature may not be a 3.5 bios */ { "Future Domain Corp. V2.0108/18/93", 5, 33, 3, 5, 0 }, { "FUTURE DOMAIN CORP. V3.5008/18/93", 5, 34, 3, 5, 0 }, diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/ncr53c8xx.c linux/drivers/scsi/ncr53c8xx.c --- v2.1.7/linux/drivers/scsi/ncr53c8xx.c Wed Oct 16 10:48:21 1996 +++ linux/drivers/scsi/ncr53c8xx.c Sat Nov 2 13:57:10 1996 @@ -159,6 +159,10 @@ #define SCSI_NCR_MAX_SYNC (10000) #endif +#ifndef SCSI_NCR_DEFAULT_SYNC +#define SCSI_NCR_DEFAULT_SYNC SCSI_NCR_MAX_SYNC +#endif + /* ** The maximal bus with (in log2 byte) ** (0=8 bit, 1=16 bit) @@ -1255,9 +1259,9 @@ int chip; /* Chip number */ struct timer_list timer; /* Timer link header */ int ncr_cache; /* Cache test variable */ - int release_stage; /* Synchronisation stage on release */ Scsi_Cmnd *waiting_list; /* Waiting list header for commands */ /* that we can't put into the squeue */ + u_char release_stage; /* Synchronisation stage on release */ /*----------------------------------------------- ** Added field to support differences @@ -3883,9 +3887,10 @@ ** **---------------------------------------------------- */ - +#ifdef SCSI_NCR_PROFILE bzero (&cp->phys.header.stamp, sizeof (struct tstamp)); cp->phys.header.stamp.start = jiffies; +#endif /*---------------------------------------------------- ** @@ -5007,10 +5012,10 @@ usrsync = 255; -#ifndef SCSI_NCR_FORCE_ASYNCHRONOUS +#if defined(SCSI_NCR_DEFAULT_SYNC) && SCSI_NCR_DEFAULT_SYNC != 0 if (SCSI_NCR_MAX_SYNC) { u_long period; - period =1000000/SCSI_NCR_MAX_SYNC; /* ns = 10e6 / kHz */ + period =1000000/SCSI_NCR_DEFAULT_SYNC; /* ns = 10e6 / kHz */ if (period <= 11 * np->ns_sync) { if (period < 4 * np->ns_sync) usrsync = np->ns_sync; @@ -5461,6 +5466,7 @@ */ OUTB (nc_istat, SIGP); } +#ifdef undef if (np->latetime>10) { /* ** Although we tried to wake it up, @@ -5481,6 +5487,7 @@ #endif np->heartbeat = thistime; } +#endif /* undef */ /*---------------------------------------------------- ** @@ -7238,31 +7245,39 @@ static int ncr_scatter(ccb_p cp, Scsi_Cmnd *cmd) { - struct dsb *phys = &cp->phys; - u_short segment = 0; - - cp->data_len = 0; - bzero (&phys->data, sizeof (phys->data)); - - if (!cmd->use_sg) { - phys->data[segment].addr = vtophys(cmd->request_buffer); - phys->data[segment].size = cmd->request_bufflen; - cp->data_len += phys->data[segment].size; - segment++; - return segment; + struct scr_tblmove *data; + int segment = 0; + int use_sg = (int) cmd->use_sg; + + bzero (cp->phys.data, sizeof (cp->phys.data)); + data = cp->phys.data; + cp->data_len = 0; + + if (!use_sg) { + if (cmd->request_bufflen) { + data[0].addr = vtophys(cmd->request_buffer); + data[0].size = cmd->request_bufflen; + cp->data_len = data[0].size; + segment = 1; + } } + else if (use_sg < MAX_SCATTER) { + struct scatterlist *scatter = (struct scatterlist *)cmd->buffer; - while (segment < cmd->use_sg && segment < MAX_SCATTER) { - struct scatterlist *scatter = (struct scatterlist *)cmd->buffer; - - phys->data[segment].addr = vtophys(scatter[segment].address); - phys->data[segment].size = scatter[segment].length; - cp->data_len += phys->data[segment].size; - ++segment; + while (segment < use_sg) { + data[segment].addr = vtophys(scatter[segment].address); + data[segment].size = scatter[segment].length; + cp->data_len += data[segment].size; + ++segment; + } + } + else { + return -1; } - return segment < cmd->use_sg ? -1 : segment; + return segment; } + #endif /* SCSI_NCR_SEGMENT_SIZE */ /*========================================================== diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/ncr53c8xx.h linux/drivers/scsi/ncr53c8xx.h --- v2.1.7/linux/drivers/scsi/ncr53c8xx.h Wed Oct 16 10:48:22 1996 +++ linux/drivers/scsi/ncr53c8xx.h Sat Nov 2 13:57:09 1996 @@ -43,6 +43,11 @@ #define NCR53C8XX_H /* +** Name and revision of the driver +*/ +#define SCSI_NCR_DRIVER_NAME "ncr53c8xx - revision 1.14b" + +/* ** If SCSI_NCR_SPECIAL_FEATURES is defined, ** the driver enables or not the following features according to chip id ** revision id: @@ -143,25 +148,51 @@ ** Avoid to change these constants, unless you know what you are doing. */ +#ifdef CONFIG_SCSI_NCR53C8XX_MAX_TAGS +#if CONFIG_SCSI_NCR53C8XX_MAX_TAGS < 2 +#define SCSI_NCR_MAX_TAGS (2) +#elif CONFIG_SCSI_NCR53C8XX_MAX_TAGS > 12 +#define SCSI_NCR_MAX_TAGS (12) +#else +#define SCSI_NCR_MAX_TAGS CONFIG_SCSI_NCR53C8XX_MAX_TAGS +#endif +#else #define SCSI_NCR_MAX_TAGS (4) +#endif + #define SCSI_NCR_ALWAYS_SIMPLE_TAG +#ifdef CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE +#define SCSI_NCR_DEFAULT_TAGS SCSI_NCR_MAX_TAGS +#else +#define SCSI_NCR_DEFAULT_TAGS (0) +#endif + #ifdef CONFIG_SCSI_NCR53C8XX_IOMAPPED #define SCSI_NCR_IOMAPPED #endif -#ifdef CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE -#define SCSI_NCR_DEFAULT_TAGS SCSI_NCR_MAX_TAGS +#ifdef CONFIG_SCSI_NCR53C8XX_SYNC +#if CONFIG_SCSI_NCR53C8XX_SYNC == 0 +#define SCSI_NCR_DEFAULT_SYNC (0) +#elif CONFIG_SCSI_NCR53C8XX_SYNC < 5 +#define SCSI_NCR_DEFAULT_SYNC (5000) +#elif CONFIG_SCSI_NCR53C8XX_SYNC > 10 +#define SCSI_NCR_DEFAULT_SYNC (10000) #else -#define SCSI_NCR_DEFAULT_TAGS (0) +#define SCSI_NCR_DEFAULT_SYNC (CONFIG_SCSI_NCR53C8XX_SYNC * 1000) +#endif +#else +#define SCSI_NCR_DEFAULT_SYNC (10000) #endif -#ifdef CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT -#define SCSI_NCR_NO_DISCONNECT +#ifdef CONFIG_SCSI_FORCE_ASYNCHRONOUS +#undef SCSI_NCR_DEFAULT_SYNC +#define SCSI_NCR_DEFAULT_SYNC (0) #endif -#ifdef CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS -#define SCSI_NCR_FORCE_ASYNCHRONOUS +#ifdef CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT +#define SCSI_NCR_NO_DISCONNECT #endif #ifdef CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO @@ -230,7 +261,7 @@ #if LINUX_VERSION_CODE >= LinuxVersionCode(1,3,0) -#define NCR53C8XX {NULL,NULL,NULL,NULL,"ncr53c8xx (rel 1.14a)", ncr53c8xx_detect,\ +#define NCR53C8XX {NULL,NULL,NULL,NULL,SCSI_NCR_DRIVER_NAME, ncr53c8xx_detect,\ ncr53c8xx_release, /* info */ NULL, /* command, deprecated */ NULL, \ ncr53c8xx_queue_command, ncr53c8xx_abort, ncr53c8xx_reset, \ NULL /* slave attach */, scsicam_bios_param, /* can queue */ SCSI_NCR_CAN_QUEUE,\ @@ -241,7 +272,7 @@ #else -#define NCR53C8XX {NULL, NULL, "ncr53c8xx (rel 1.14a)", ncr53c8xx_detect,\ +#define NCR53C8XX {NULL, NULL, SCSI_NCR_DRIVER_NAME, ncr53c8xx_detect,\ ncr53c8xx_release, /* info */ NULL, /* command, deprecated */ NULL, \ ncr53c8xx_queue_command, ncr53c8xx_abort, ncr53c8xx_reset, \ NULL /* slave attach */, scsicam_bios_param, /* can queue */ SCSI_NCR_CAN_QUEUE,\ diff -u --recursive --new-file v2.1.7/linux/drivers/scsi/scsicam.c linux/drivers/scsi/scsicam.c --- v2.1.7/linux/drivers/scsi/scsicam.c Sun Aug 4 13:39:07 1996 +++ linux/drivers/scsi/scsicam.c Thu Nov 7 11:25:56 1996 @@ -54,22 +54,28 @@ if (!(bh = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, 1024))) return -1; -#ifdef DEBUG - printk ("scsicam_bios_param : trying existing mapping\n"); -#endif + /* try to infer mapping from partition table */ ret_code = partsize (bh, (unsigned long) size, (unsigned int *) ip + 2, (unsigned int *) ip + 0, (unsigned int *) ip + 1); brelse (bh); if (ret_code == -1) { -#ifdef DEBUG - printk ("scsicam_bios_param : trying optimal mapping\n"); -#endif + /* pick some standard mapping with at most 1024 cylinders, + and at most 62 sectors per track - this works up to + 7905 MB */ ret_code = setsize ((unsigned long) size, (unsigned int *) ip + 2, (unsigned int *) ip + 0, (unsigned int *) ip + 1); } - return ret_code; + /* if something went wrong, then apparently we have to return + a geometry with more than 1024 cylinders */ + if (ret_code || ip[0] > 255 || ip[1] > 63) { + ip[0] = 64; + ip[1] = 32; + ip[2] = size / (ip[0] * ip[1]); + } + + return 0; } /* diff -u --recursive --new-file v2.1.7/linux/fs/smbfs/inode.c linux/fs/smbfs/inode.c --- v2.1.7/linux/fs/smbfs/inode.c Tue Oct 29 19:58:44 1996 +++ linux/fs/smbfs/inode.c Thu Nov 7 14:40:58 1996 @@ -126,7 +126,7 @@ struct smb_inode_info *info = SMB_INOP(inode); int opened = finfo->opened; - int mtime = finfo->mtime; + int mtime = inode->i_mtime; int file_id = finfo->fileid; int isdir = S_ISDIR(inode->i_mode); unsigned long ino = inode->i_ino; diff -u --recursive --new-file v2.1.7/linux/include/asm-alpha/ide.h linux/include/asm-alpha/ide.h --- v2.1.7/linux/include/asm-alpha/ide.h Thu Jan 1 02:00:00 1970 +++ linux/include/asm-alpha/ide.h Wed Nov 6 14:49:33 1996 @@ -0,0 +1,119 @@ +/* + * linux/include/asm-alpha/ide.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +/* + * This file contains the alpha architecture specific IDE code. + */ + +#ifndef __ASMalpha_IDE_H +#define __ASMalpha_IDE_H + +#ifdef __KERNEL__ + +typedef unsigned short ide_ioreg_t; + +#ifndef MAX_HWIFS +#define MAX_HWIFS 4 +#endif + +#define ide_sti() sti() + +static __inline__ int ide_default_irq(ide_ioreg_t base) +{ + switch (base) { + case 0x1f0: return 14; + case 0x170: return 15; + case 0x1e8: return 11; + case 0x168: return 10; + default: + return 0; + } +} + +static __inline__ ide_ioreg_t ide_default_io_base(int index) +{ + switch (index) { + case 0: return 0x1f0; + case 1: return 0x170; + case 2: return 0x1e8; + case 3: return 0x168; + default: + return 0; + } +} + +static __inline__ void ide_init_hwif_ports (ide_ioreg_t *p, ide_ioreg_t base, int *irq) +{ + ide_ioreg_t port = base; + int i = 8; + + while (i--) + *p++ = port++; + *p++ = base + 0x206; + if (irq != NULL) + *irq = 0; +} + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned head : 4; /* always zeros here */ + unsigned unit : 1; /* drive select number, 0 or 1 */ + unsigned bit5 : 1; /* always 1 */ + unsigned lba : 1; /* using LBA instead of CHS */ + unsigned bit7 : 1; /* always 1 */ + } b; + } select_t; + +static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), + unsigned long flags, const char *device, void *dev_id) +{ + return request_irq(irq, handler, flags, device, dev_id); +} + +static __inline__ void ide_free_irq(unsigned int irq, void *dev_id) +{ + free_irq(irq, dev_id); +} + +static __inline__ int ide_check_region (ide_ioreg_t from, unsigned int extent) +{ + return check_region(from, extent); +} + +static __inline__ void ide_request_region (ide_ioreg_t from, unsigned int extent, const char *name) +{ + request_region(from, extent, name); +} + +static __inline__ void ide_release_region (ide_ioreg_t from, unsigned int extent) +{ + release_region(from, extent); +} + +/* + * The following are not needed for the non-m68k ports + */ +static __inline__ int ide_ack_intr (ide_ioreg_t base_port, ide_ioreg_t irq_port) +{ + return(1); +} + +static __inline__ void ide_fix_driveid(struct hd_driveid *id) +{ +} + +static __inline__ void ide_release_lock (int *ide_lock) +{ +} + +static __inline__ void ide_get_lock (int *ide_lock, void (*handler)(int, void *, struct pt_regs *), void *data) +{ +} + +#endif /* __KERNEL__ */ + +#endif /* __ASMalpha_IDE_H */ diff -u --recursive --new-file v2.1.7/linux/include/asm-i386/checksum.h linux/include/asm-i386/checksum.h --- v2.1.7/linux/include/asm-i386/checksum.h Mon Sep 23 15:32:35 1996 +++ linux/include/asm-i386/checksum.h Sun Nov 3 11:04:41 1996 @@ -115,4 +115,31 @@ return csum_fold (csum_partial(buff, len, 0)); } +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, + struct in6_addr *daddr, + __u16 len, + unsigned short proto, + unsigned int sum) +{ + __asm__(" + addl 0(%1), %0 + adcl 4(%1), %0 + adcl 8(%1), %0 + adcl 12(%1), %0 + adcl 0(%2), %0 + adcl 4(%2), %0 + adcl 8(%2), %0 + adcl 12(%2), %0 + adcl %3, %0 + adcl %4, %0 + adcl $0, %0 + " + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r"(htonl((__u32) (len))), "r"(htonl(proto)), "0"(sum)); + + return csum_fold(sum); +} + #endif diff -u --recursive --new-file v2.1.7/linux/include/asm-i386/ide.h linux/include/asm-i386/ide.h --- v2.1.7/linux/include/asm-i386/ide.h Thu Jan 1 02:00:00 1970 +++ linux/include/asm-i386/ide.h Wed Nov 6 14:49:33 1996 @@ -0,0 +1,119 @@ +/* + * linux/include/asm-i386/ide.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +/* + * This file contains the i386 architecture specific IDE code. + */ + +#ifndef __ASMi386_IDE_H +#define __ASMi386_IDE_H + +#ifdef __KERNEL__ + +typedef unsigned short ide_ioreg_t; + +#ifndef MAX_HWIFS +#define MAX_HWIFS 4 +#endif + +#define ide_sti() sti() + +static __inline__ int ide_default_irq(ide_ioreg_t base) +{ + switch (base) { + case 0x1f0: return 14; + case 0x170: return 15; + case 0x1e8: return 11; + case 0x168: return 10; + default: + return 0; + } +} + +static __inline__ ide_ioreg_t ide_default_io_base(int index) +{ + switch (index) { + case 0: return 0x1f0; + case 1: return 0x170; + case 2: return 0x1e8; + case 3: return 0x168; + default: + return 0; + } +} + +static __inline__ void ide_init_hwif_ports (ide_ioreg_t *p, ide_ioreg_t base, int *irq) +{ + ide_ioreg_t port = base; + int i = 8; + + while (i--) + *p++ = port++; + *p++ = base + 0x206; + if (irq != NULL) + *irq = 0; +} + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned head : 4; /* always zeros here */ + unsigned unit : 1; /* drive select number, 0 or 1 */ + unsigned bit5 : 1; /* always 1 */ + unsigned lba : 1; /* using LBA instead of CHS */ + unsigned bit7 : 1; /* always 1 */ + } b; + } select_t; + +static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), + unsigned long flags, const char *device, void *dev_id) +{ + return request_irq(irq, handler, flags, device, dev_id); +} + +static __inline__ void ide_free_irq(unsigned int irq, void *dev_id) +{ + free_irq(irq, dev_id); +} + +static __inline__ int ide_check_region (ide_ioreg_t from, unsigned int extent) +{ + return check_region(from, extent); +} + +static __inline__ void ide_request_region (ide_ioreg_t from, unsigned int extent, const char *name) +{ + request_region(from, extent, name); +} + +static __inline__ void ide_release_region (ide_ioreg_t from, unsigned int extent) +{ + release_region(from, extent); +} + +/* + * The following are not needed for the non-m68k ports + */ +static __inline__ int ide_ack_intr (ide_ioreg_t base_port, ide_ioreg_t irq_port) +{ + return(1); +} + +static __inline__ void ide_fix_driveid(struct hd_driveid *id) +{ +} + +static __inline__ void ide_release_lock (int *ide_lock) +{ +} + +static __inline__ void ide_get_lock (int *ide_lock, void (*handler)(int, void *, struct pt_regs *), void *data) +{ +} + +#endif /* __KERNEL__ */ + +#endif /* __ASMi386_IDE_H */ diff -u --recursive --new-file v2.1.7/linux/include/asm-m68k/ide.h linux/include/asm-m68k/ide.h --- v2.1.7/linux/include/asm-m68k/ide.h Tue Jul 23 11:33:32 1996 +++ linux/include/asm-m68k/ide.h Wed Nov 6 14:49:33 1996 @@ -1,8 +1,35 @@ +/* + * linux/include/asm-m68k/ide.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +/* Copyright(c) 1996 Kars de Jong */ +/* Based on the ide driver from 1.2.13pl8 */ + +/* + * Credits (alphabetical): + * + * - Bjoern Brauel + * - Kars de Jong + * - Torsten Ebeling + * - Dwight Engen + * - Thorsten Floeck + * - Roman Hodek + * - Guenther Kelleter + * - Chris Lawrence + * - Michael Rausch + * - Christian Sauer + * - Michael Schmitz + * - Jes Soerensen + * - Michael Thurm + * - Geert Uytterhoeven + */ + #ifndef _M68K_IDE_H #define _M68K_IDE_H -/* Copyright(c) 1996 Kars de Jong */ -/* Based on the ide driver from 1.2.13pl8 */ +#ifdef __KERNEL__ #include @@ -19,42 +46,132 @@ #include #endif /* CONFIG_ATARI */ -#include +#include +#include +#include -struct hd_regs_struct { - unsigned int hd_error, - hd_nsector, - hd_sector, - hd_lcyl, - hd_hcyl, - hd_select, - hd_status; -}; +typedef unsigned char * ide_ioreg_t; + +#ifndef MAX_HWIFS +#define MAX_HWIFS 1 +#endif + +static __inline int ide_default_irq (ide_ioreg_t base) +{ + return 0; +} + +static __inline__ ide_ioreg_t ide_default_io_base (int index) +{ + if (index) + return NULL; +#ifdef CONFIG_AMIGA + if (MACH_IS_AMIGA) { + if (AMIGAHW_PRESENT(A4000_IDE)) { + printk("Gayle IDE interface (A%d style)\n", 4000); + return ((ide_ioreg_t)ZTWO_VADDR(HD_BASE_A4000)); + } + if (AMIGAHW_PRESENT(A1200_IDE)) { + printk("Gayle IDE interface (A%d style)\n", 1200); + return ((ide_ioreg_t)ZTWO_VADDR(HD_BASE_A1200)); + } + } +#endif /* CONFIG_AMIGA */ +#ifdef CONFIG_ATARI + if (MACH_IS_ATARI) { + if (ATARIHW_PRESENT(IDE)) { + printk("Falcon IDE interface\n"); + return ((ide_ioreg_t) ATA_HD_BASE); + } + } +#endif /* CONFIG_ATARI */ + return NULL; +} + +static __inline__ void ide_init_hwif_ports (ide_ioreg_t *p, ide_ioreg_t base, int *irq) +{ + *p++ = base; +#ifdef CONFIG_AMIGA + if (MACH_IS_AMIGA) { + *p++ = base + AMI_HD_ERROR; + *p++ = base + AMI_HD_NSECTOR; + *p++ = base + AMI_HD_SECTOR; + *p++ = base + AMI_HD_LCYL; + *p++ = base + AMI_HD_HCYL; + *p++ = base + AMI_HD_SELECT; + *p++ = base + AMI_HD_STATUS; + *p++ = base + AMI_HD_CMD; + if (AMIGAHW_PRESENT(A4000_IDE)) + *p++ = (ide_ioreg_t) ZTWO_VADDR(HD_A4000_IRQ); + else if (AMIGAHW_PRESENT(A1200_IDE)) + *p++ = (ide_ioreg_t) ZTWO_VADDR(HD_A1200_IRQ); + if (irq != NULL) + *irq = IRQ_AMIGA_PORTS; + } +#endif /* CONFIG_AMIGA */ +#ifdef CONFIG_ATARI + if (MACH_IS_ATARI) { + *p++ = base + ATA_HD_ERROR; + *p++ = base + ATA_HD_NSECTOR; + *p++ = base + ATA_HD_SECTOR; + *p++ = base + ATA_HD_LCYL; + *p++ = base + ATA_HD_HCYL; + *p++ = base + ATA_HD_CURRENT; + *p++ = base + ATA_HD_STATUS; + *p++ = base + ATA_HD_CMD; + if (irq != NULL) + *irq = IRQ_MFP_IDE; + } +#endif /* CONFIG_ATARI */ +} + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit7 : 1; /* always 1 */ + unsigned lba : 1; /* using LBA instead of CHS */ + unsigned bit5 : 1; /* always 1 */ + unsigned unit : 1; /* drive select number, 0 or 1 */ + unsigned head : 4; /* always zeros here */ + } b; + } select_t; + +static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), + unsigned long flags, const char *device, void *dev_id) +{ +#ifdef CONFIG_AMIGA + if (MACH_IS_AMIGA) + return request_irq(irq, handler, 0, device, dev_id); +#endif /* CONFIG_AMIGA */ + return 0; +} + +static __inline__ void ide_free_irq(unsigned int irq, void *dev_id) +{ +#ifdef CONFIG_AMIGA + if (MACH_IS_AMIGA) + free_irq(irq, dev_id); +#endif /* CONFIG_AMIGA */ +} + +/* + * We should really implement those some day. + */ +static __inline__ int ide_check_region (ide_ioreg_t from, unsigned int extent) +{ + return 0; +} -static struct hd_regs_struct hd_regs; -static void probe_m68k_ide (void); +static __inline__ void ide_request_region (ide_ioreg_t from, unsigned int extent, const char *name) +{ +} -/* Undefine these again, they were defined for the PC. */ -#undef IDE_ERROR_OFFSET -#undef IDE_NSECTOR_OFFSET -#undef IDE_SECTOR_OFFSET -#undef IDE_LCYL_OFFSET -#undef IDE_HCYL_OFFSET -#undef IDE_SELECT_OFFSET -#undef IDE_STATUS_OFFSET -#undef IDE_FEATURE_OFFSET -#undef IDE_COMMAND_OFFSET -#undef SELECT_DRIVE - -#define IDE_ERROR_OFFSET hd_regs.hd_error -#define IDE_NSECTOR_OFFSET hd_regs.hd_nsector -#define IDE_SECTOR_OFFSET hd_regs.hd_sector -#define IDE_LCYL_OFFSET hd_regs.hd_lcyl -#define IDE_HCYL_OFFSET hd_regs.hd_hcyl -#define IDE_SELECT_OFFSET hd_regs.hd_select -#define IDE_STATUS_OFFSET hd_regs.hd_status -#define IDE_FEATURE_OFFSET IDE_ERROR_OFFSET -#define IDE_COMMAND_OFFSET IDE_STATUS_OFFSET +static __inline__ void ide_release_region (ide_ioreg_t from, unsigned int extent) +{ +} + +#undef SUPPORT_SLOW_DATA_PORTS +#define SUPPORT_SLOW_DATA_PORTS 0 #undef SUPPORT_VLB_SYNC #define SUPPORT_VLB_SYNC 0 @@ -62,38 +179,11 @@ #undef HD_DATA #define HD_DATA NULL -/* MSch: changed sti() to STI() wherever possible in ide.c; moved STI() def. - * to asm/ide.h - */ -/* The Atari interrupt structure strictly requires that the IPL isn't lowered - * uncontrolled in an interrupt handler. In the concrete case, the IDE - * interrupt is already a slow int, so the irq is already disabled at the time - * the handler is called, and the IPL has been lowered to the minimum value - * possible. To avoid going below that, STI() checks for being called inside - * an interrupt, and in that case it does nothing. Hope that is reasonable and - * works. (Roman) - */ -#if defined(CONFIG_ATARI) && !defined(CONFIG_AMIGA) -#define STI() \ - do { \ - if (!intr_count) sti(); \ - } while(0) -#elif defined(CONFIG_ATARI) -#define STI() \ - do { \ - if (!MACH_IS_ATARI || !intr_count) sti(); \ - } while(0) -#else /* !defined(CONFIG_ATARI) */ -#define STI() sti() -#endif - -#define SELECT_DRIVE(hwif,drive) OUT_BYTE((drive)->select.all, hwif->io_base+IDE_SELECT_OFFSET); - -#define insl(data_reg, buffer, wcount) insw(data_reg, buffer, wcount<<1) -#define outsl(data_reg, buffer, wcount) outsw(data_reg, buffer, wcount<<1) +#define insl(data_reg, buffer, wcount) insw(data_reg, buffer, (wcount)<<1) +#define outsl(data_reg, buffer, wcount) outsw(data_reg, buffer, (wcount)<<1) #define insw(port, buf, nr) \ - if (nr % 16) \ + if ((nr) % 16) \ __asm__ __volatile__ \ ("movel %0,%/a0; \ movel %1,%/a1; \ @@ -128,10 +218,10 @@ movew %/a0@,%/a1@+; \ dbra %/d6,1b" : \ : "g" (port), "g" (buf), "g" (nr) \ - : "a0", "a1", "d6"); + : "a0", "a1", "d6") #define outsw(port, buf, nr) \ - if (nr % 16) \ + if ((nr) % 16) \ __asm__ __volatile__ \ ("movel %0,%/a0; \ movel %1,%/a1; \ @@ -166,7 +256,128 @@ movew %/a1@+,%/a0@; \ dbra %/d6,1b" : \ : "g" (port), "g" (buf), "g" (nr) \ - : "a0", "a1", "d6"); + : "a0", "a1", "d6") + +#ifdef CONFIG_ATARI +#define insl_swapw(data_reg, buffer, wcount) \ + insw_swapw(data_reg, buffer, (wcount)<<1) +#define outsl_swapw(data_reg, buffer, wcount) \ + outsw_swapw(data_reg, buffer, (wcount)<<1) + +#define insw_swapw(port, buf, nr) \ + if ((nr) % 8) \ + __asm__ __volatile__ \ + ("movel %0,%/a0; \ + movel %1,%/a1; \ + movel %2,%/d6; \ + subql #1,%/d6; \ + 1:movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + dbra %/d6,1b" : \ + : "g" (port), "g" (buf), "g" (nr) \ + : "d0", "a0", "a1", "d6"); \ + else \ + __asm__ __volatile__ \ + ("movel %0,%/a0; \ + movel %1,%/a1; \ + movel %2,%/d6; \ + lsrl #3,%/d6; \ + subql #1,%/d6; \ + 1:movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + movew %/a0@,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a1@+; \ + dbra %/d6,1b" : \ + : "g" (port), "g" (buf), "g" (nr) \ + : "d0", "a0", "a1", "d6") + +#define outsw_swapw(port, buf, nr) \ + if ((nr) % 8) \ + __asm__ __volatile__ \ + ("movel %0,%/a0; \ + movel %1,%/a1; \ + movel %2,%/d6; \ + subql #1,%/d6; \ + 1:movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + dbra %/d6,1b" : \ + : "g" (port), "g" (buf), "g" (nr) \ + : "d0", "a0", "a1", "d6"); \ + else \ + __asm__ __volatile__ \ + ("movel %0,%/a0; \ + movel %1,%/a1; \ + movel %2,%/d6; \ + lsrl #3,%/d6; \ + subql #1,%/d6; \ + 1:movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + movew %/a1@+,%/d0; \ + rolw #8,%/d0; \ + movew %/d0,%/a0@; \ + dbra %/d6,1b" : \ + : "g" (port), "g" (buf), "g" (nr) \ + : "d0", "a0", "a1", "d6") + +#endif /* CONFIG_ATARI */ + +static __inline__ int ide_ack_intr (ide_ioreg_t base_port, ide_ioreg_t irq_port) +{ +#ifdef CONFIG_AMIGA + if (MACH_IS_AMIGA) { + unsigned char ch; + ch = inb(irq_port); + if (!(ch & 0x80)) + return(0); + if (AMIGAHW_PRESENT(A1200_IDE)) { + (void) inb(base_port); + outb(0x7c | (ch & 0x03), irq_port); + } + } +#endif /* CONFIG_AMIGA */ + return(1); +} #define T_CHAR (0x0000) /* char: don't touch */ #define T_SHORT (0x4000) /* short: 12 -> 21 */ @@ -181,6 +392,7 @@ #define D_INT(cnt) (T_INT | (cnt)) #define D_TEXT(cnt) (T_TEXT | (cnt)) +#ifdef CONFIG_AMIGA static u_short driveid_types[] = { D_SHORT(10), /* config - vendor2 */ D_TEXT(20), /* serial_no */ @@ -199,13 +411,17 @@ }; #define num_driveid_types (sizeof(driveid_types)/sizeof(*driveid_types)) +#endif /* CONFIG_AMIGA */ -static __inline__ void big_endianize_driveid(struct hd_driveid *id) +static __inline__ void ide_fix_driveid(struct hd_driveid *id) { +#ifdef CONFIG_AMIGA u_char *p = (u_char *)id; int i, j, cnt; u_char t; + if (!MACH_IS_AMIGA) + return; for (i = 0; i < num_driveid_types; i++) { cnt = driveid_types[i] & T_MASK_COUNT; switch (driveid_types[i] & T_MASK_TYPE) { @@ -241,6 +457,66 @@ break; } } +#endif /* CONFIG_AMIGA */ +} + +static __inline__ void ide_release_lock (int *ide_lock) +{ +#ifdef CONFIG_ATARI + if (MACH_IS_ATARI) { + if (*ide_lock == 0) { + printk("ide_release_lock: bug\n"); + return; + } + *ide_lock = 0; + stdma_release(); + } +#endif /* CONFIG_ATARI */ } + +static __inline__ void ide_get_lock (int *ide_lock, void (*handler)(int, void *, struct pt_regs *), void *data) +{ +#ifdef CONFIG_ATARI + if (MACH_IS_ATARI) { + if (*ide_lock == 0) { + if (intr_count > 0) + panic( "Falcon IDE hasn't ST-DMA lock in interrupt" ); + stdma_lock(handler, data); + *ide_lock = 1; + } + } +#endif /* CONFIG_ATARI */ +} + +/* + * On the Atari, we sometimes can't enable interrupts: + */ + +/* MSch: changed sti() to STI() wherever possible in ide.c; moved STI() def. + * to asm/ide.h + */ +/* The Atari interrupt structure strictly requires that the IPL isn't lowered + * uncontrolled in an interrupt handler. In the concrete case, the IDE + * interrupt is already a slow int, so the irq is already disabled at the time + * the handler is called, and the IPL has been lowered to the minimum value + * possible. To avoid going below that, STI() checks for being called inside + * an interrupt, and in that case it does nothing. Hope that is reasonable and + * works. (Roman) + */ +#if defined(CONFIG_ATARI) && !defined(CONFIG_AMIGA) +#define ide_sti() \ + do { \ + if (!intr_count) sti(); \ + } while(0) +#elif defined(CONFIG_ATARI) +#define ide_sti() \ + do { \ + if (!MACH_IS_ATARI || !intr_count) sti(); \ + } while(0) +#else /* !defined(CONFIG_ATARI) */ +#define ide_sti() sti() +#endif + +#endif /* __KERNEL__ */ #endif /* _M68K_IDE_H */ diff -u --recursive --new-file v2.1.7/linux/include/asm-mips/checksum.h linux/include/asm-mips/checksum.h --- v2.1.7/linux/include/asm-mips/checksum.h Wed Dec 13 12:39:45 1995 +++ linux/include/asm-mips/checksum.h Sun Nov 3 11:04:41 1996 @@ -176,4 +176,76 @@ return sum; } +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, + struct in6_addr *daddr, + __u16 len, + unsigned short proto, + unsigned int sum) +{ + unsigned long scratch; + + __asm__(" + .set noreorder + .set noat + addu %0,%5 # proto (long in network byte order) + sltu $1,%0,%5 + addu %0,$1 + + addu %0,%6 # csum + sltu $1,%0,%6 + lw %1,0(%2) # four words source address + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + + lw %1,4(%2) + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + + lw %1,8(%2) + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + + lw %1,12(%2) + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + + lw %1,0(%3) + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + + lw %1,4(%3) + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + + lw %1,8(%3) + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + + lw %1,12(%3) + addu %0,$1 + addu %0,%1 + sltu $1,%0,$1 + .set noat + .set noreorder + " + : "=r" (sum), + "=r" (scratch) + : "r" (saddr), + "r" (daddr), + "0" (htonl((__u32) (len))), + "r" (htonl(proto)), + "r"(sum) + : "$1"); + + return csum_fold(sum); +} + #endif /* __ASM_MIPS_CHECKSUM_H */ diff -u --recursive --new-file v2.1.7/linux/include/asm-sparc/checksum.h linux/include/asm-sparc/checksum.h --- v2.1.7/linux/include/asm-sparc/checksum.h Sun Apr 21 12:30:33 1996 +++ linux/include/asm-sparc/checksum.h Sun Nov 3 11:04:41 1996 @@ -11,307 +11,161 @@ * derived from: * Alpha checksum c-code * ix86 inline assembly + * RFC1071 Computing the Internet Checksum */ /* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary */ +extern unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum); -extern inline unsigned short csum_tcpudp_magic(unsigned long saddr, - unsigned long daddr, - unsigned short len, - unsigned short proto, - unsigned int sum) -{ - __asm__ __volatile__(" - addcc %0, %1, %0 - addxcc %0, %4, %0 - addxcc %0, %5, %0 - addx %0, %%g0, %0 - - ! We need the carry from the addition of 16-bit - ! significant addition, so we zap out the low bits - ! in one half, zap out the high bits in another, - ! shift them both up to the top 16-bits of a word - ! and do the carry producing addition, finally - ! shift the result back down to the low 16-bits. - - ! Actually, we can further optimize away two shifts - ! because we know the low bits of the original - ! value will be added to zero-only bits so cannot - ! affect the addition result nor the final carry - ! bit. - - sll %0, 16, %1 - addcc %0, %1, %0 ! add and set carry, neat eh? - srl %0, 16, %0 ! shift back down the result - addx %0, %%g0, %0 ! get remaining carry bit - xnor %%g0, %0, %0 ! negate, sparc is cool - " - : "=&r" (sum), "=&r" (saddr) - : "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum)); - return ((unsigned short) sum); -} - -extern inline unsigned short from32to16(unsigned long x) -{ - __asm__ __volatile__(" - addcc %0, %1, %0 - srl %0, 16, %0 - addx %%g0, %0, %0 - " - : "=r" (x) - : "r" (x << 16), "0" (x)); - return x; -} - -extern inline unsigned long do_csum(unsigned char * buff, int len) -{ - int odd, count; - unsigned long result = 0; +/* + * the same as csum_partial, but copies from fs:src while it + * checksums + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern unsigned int csum_partial_copy(char *src, char *dst, int len, int sum); - if (len <= 0) - goto out; - odd = 1 & (unsigned long) buff; - if (odd) { - result = *buff; - len--; - buff++; - } - count = len >> 1; /* nr of 16-bit words.. */ - if (count) { - if (2 & (unsigned long) buff) { - result += *(unsigned short *) buff; - count--; - len -= 2; - buff += 2; - } - count >>= 1; /* nr of 32-bit words.. */ - if (count) { - unsigned long carry = 0; - do { - unsigned long w = *(unsigned long *) buff; - count--; - buff += 4; - result += carry; - result += w; - carry = (w > result); - } while (count); - result += carry; - result = (result & 0xffff) + (result >> 16); - } - if (len & 2) { - result += *(unsigned short *) buff; - buff += 2; - } - } - if (len & 1) - result += (*buff << 8); - result = from32to16(result); - if (odd) - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); -out: - return result; -} +#define csum_partial_copy_fromuser(s, d, l, w) \ + csum_partial_copy((char *) (s), (d), (l), (w)) /* ihl is always 5 or greater, almost always is 5, iph is always word * aligned but can fail to be dword aligned very often. */ extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl) { - unsigned int sum; + unsigned long tmp1, tmp2; + unsigned short sum; __asm__ __volatile__(" - ld [%1], %0 + ld [%1 + 0x00], %0 + ld [%1 + 0x04], %3 sub %2, 4, %2 - ld [%1 + 0x4], %%g1 - ld [%1 + 0x8], %%g2 - addcc %%g1, %0, %0 - addxcc %%g2, %0, %0 - ld [%1 + 0xc], %%g1 - ld [%1 + 0x10], %%g2 - addxcc %%g1, %0, %0 - addxcc %0, %%g0, %0 -1: - addcc %%g2, %0, %0 - add %1, 0x4, %1 + addcc %3, %0, %0 + ld [%1 + 0x08], %4 + addxcc %4, %0, %0 + ld [%1 + 0x0c], %3 + addxcc %3, %0, %0 + ld [%1 + 0x10], %4 + addx %0, %%g0, %0 + 1: + addcc %4, %0, %0 + add %1, 4, %1 addxcc %0, %%g0, %0 - subcc %2, 0x1, %2 - bne,a 1b - ld [%1 + 0x10], %%g2 - - sll %0, 16, %2 - addcc %0, %2, %2 - srl %2, 16, %0 - addx %0, %%g0, %2 - xnor %%g0, %2, %0 -2: - " - : "=&r" (sum), "=&r" (iph), "=&r" (ihl) - : "1" (iph), "2" (ihl) - : "g1", "g2"); + subcc %2, 1, %2 + be,a 2f + sll %0, 16, %3 + + b 1b + ld [%1 + 0x10], %4 + 2: + addcc %0, %3, %3 + srl %3, 16, %0 + addx %0, %%g0, %0 + xnor %%g0, %0, %0 + " : "=r" (sum), "=&r" (iph), "=&r" (ihl), "=r" (tmp1), "=r" (tmp2) + : "1" (iph), "2" (ihl)); + return sum; } /* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented */ -extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum) +extern inline unsigned short csum_tcpudp_magic(unsigned long saddr, unsigned long daddr, + unsigned int len, unsigned short proto, + unsigned int sum) { __asm__ __volatile__(" - mov 0, %%g5 ! g5 = result - cmp %1, 0 - bgu,a 1f - andcc %0, 1, %%g7 ! g7 = odd - - b,a 9f - -1: - be,a 1f - srl %1, 1, %%g6 ! g6 = count = (len >> 1) - - sub %1, 1, %1 ! if(odd) { result = *buff; - ldub [%0], %%g5 ! len--; - add %0, 1, %0 ! buff++ } - - srl %1, 1, %%g6 -1: - cmp %%g6, 0 ! if (count) { - be,a 8f - andcc %1, 1, %%g0 - - andcc %0, 2, %%g0 ! if (2 & buff) { - be,a 1f - srl %%g6, 1, %%g6 - - sub %1, 2, %1 ! result += *(unsigned short *) buff; - lduh [%0], %%g1 ! count--; - sub %%g6, 1, %%g6 ! len -= 2; - add %%g1, %%g5, %%g5! buff += 2; - add %0, 2, %0 ! } - - srl %%g6, 1, %%g6 -1: - cmp %%g6, 0 ! if (count) { - be,a 2f - andcc %1, 2, %%g0 - - ld [%0], %%g1 ! csum aligned 32bit words -1: - add %0, 4, %0 - addcc %%g1, %%g5, %%g5 - addx %%g5, %%g0, %%g5 - subcc %%g6, 1, %%g6 - bne,a 1b - ld [%0], %%g1 - - sethi %%hi(0xffff), %%g3 - srl %%g5, 16, %%g2 - or %%g3, %%lo(0xffff), %%g3 - and %%g5, %%g3, %%g5 - add %%g2, %%g5, %%g5! } - - andcc %1, 2, %%g0 -2: - be,a 8f ! if (len & 2) { - andcc %1, 1, %%g0 - - lduh [%0], %%g1 ! result += *(unsigned short *) buff; - add %%g5, %%g1, %%g5! buff += 2; - add %0, 2, %0 ! } - - - andcc %1, 1, %%g0 -8: - be,a 1f ! if (len & 1) { - sll %%g5, 16, %%g1 - - ldub [%0], %%g1 - sll %%g1, 8, %%g1 ! result += (*buff << 8); - add %%g5, %%g1, %%g5! } - - sll %%g5, 16, %%g1 -1: - addcc %%g1, %%g5, %%g5! result = from32to16(result); - srl %%g5, 16, %%g1 - addx %%g0, %%g1, %%g5 - - orcc %%g7, %%g0, %%g0! if(odd) { - be 9f - srl %%g5, 8, %%g1 - - and %%g5, 0xff, %%g2! result = ((result >> 8) & 0xff) | - and %%g1, 0xff, %%g1! ((result & 0xff) << 8); - sll %%g2, 8, %%g2 - or %%g2, %%g1, %%g5! } -9: - addcc %2, %%g5, %2 ! add result and sum with carry - addx %%g0, %2, %2 - " : - "=&r" (buff), "=&r" (len), "=&r" (sum) : - "0" (buff), "1" (len), "2" (sum) : - "g1", "g2", "g3", "g5", "g6", "g7"); + addcc %1, %0, %0 + addxcc %2, %0, %0 + addxcc %3, %0, %0 + addx %0, %%g0, %0 + sll %0, 16, %1 + addcc %1, %0, %0 + srl %0, 16, %0 + addx %0, %%g0, %0 + xnor %%g0, %0, %0 + " : "=r" (sum), "=r" (saddr) + : "r" (daddr), "r" ((proto<<16)+len), "0" (sum), "1" (saddr)); return sum; } /* - * the same as csum_partial, but copies from fs:src while it - * checksums - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary + * Fold a partial checksum without adding pseudo headers */ -extern inline unsigned int csum_partial_copy(char *src, char *dst, int len, int sum) +extern inline unsigned int csum_fold(unsigned int sum) { - /* - * The whole idea is to do the copy and the checksum at - * the same time, but we do it the easy way now. - * - * At least csum on the source, not destination, for cache - * reasons.. - */ - sum = csum_partial(src, len, sum); - memcpy(dst, src, len); + unsigned int tmp; + + __asm__ __volatile__(" + addcc %0, %1, %1 + srl %1, 16, %1 + addx %1, %%g0, %1 + xnor %%g0, %1, %0 + " : "=&r" (sum), "=r" (tmp) + : "0" (sum), "1" (sum<<16)); + return sum; } +#define _HAVE_ARCH_IPV6_CSUM + +static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, + struct in6_addr *daddr, + __u16 len, + unsigned short proto, + unsigned int sum) +{ + __asm__ __volatile__ (" + addcc %3, %4, %%g4 + addxcc %5, %%g4, %%g4 + ld [%2 + 0x0c], %%g2 + ld [%2 + 0x08], %%g3 + addxcc %%g2, %%g4, %%g4 + ld [%2 + 0x04], %%g2 + addxcc %%g3, %%g4, %%g4 + ld [%2 + 0x00], %%g3 + addxcc %%g2, %%g4, %%g4 + ld [%1 + 0x0c], %%g2 + addxcc %%g3, %%g4, %%g4 + ld [%1 + 0x08], %%g3 + addxcc %%g2, %%g4, %%g4 + ld [%1 + 0x04], %%g2 + addxcc %%g3, %%g4, %%g4 + ld [%1 + 0x00], %%g3 + addxcc %%g2, %%g4, %%g4 + addxcc %%g3, %%g4, %0 + addx 0, %0, %0 + " + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r"(htonl((__u32) (len))), "r"(htonl(proto)), "r"(sum) + : "g2", "g3", "g4"); + + return csum_fold(sum); +} + /* * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ extern inline unsigned short ip_compute_csum(unsigned char * buff, int len) { - return ~from32to16(do_csum(buff,len)); -} - -#define csum_partial_copy_fromuser(s, d, l, w) \ - csum_partial_copy((char *) (s), (d), (l), (w)) - -/* - * Fold a partial checksum without adding pseudo headers - */ -extern inline unsigned int csum_fold(unsigned int sum) -{ - __asm__ __volatile__(" - addcc %0, %1, %0 - srl %0, 16, %0 - addx %%g0, %0, %0 - xnor %%g0, %0, %0 - " - : "=r" (sum) - : "r" (sum << 16), "0" (sum)); - return sum; + return csum_fold(csum_partial(buff, len, 0)); } #endif /* !(__SPARC_CHECKSUM_H) */ diff -u --recursive --new-file v2.1.7/linux/include/linux/hdreg.h linux/include/linux/hdreg.h --- v2.1.7/linux/include/linux/hdreg.h Mon Sep 30 11:19:00 1996 +++ linux/include/linux/hdreg.h Thu Nov 7 19:51:20 1996 @@ -101,6 +101,7 @@ #define HDIO_SET_NOWERR 0x0325 /* change ignore-write-error flag */ #define HDIO_SET_DMA 0x0326 /* change use-dma flag */ #define HDIO_SET_PIO_MODE 0x0327 /* reconfig interface to new speed */ +#define HDIO_SCAN_HWIF 0x0328 /* register and (re)scan interface */ /* structure returned by HDIO_GET_IDENTITY, as per ANSI ATA2 rev.2f spec */ struct hd_driveid { @@ -162,15 +163,15 @@ #ifdef CONFIG_BLK_DEV_HD void hd_setup(char *, int *); #endif /* CONFIG_BLK_DEV_HD */ + #ifdef CONFIG_BLK_DEV_IDE void ide_setup(char *); +#endif /* CONFIG_BLK_DEV_IDE */ -#ifdef CONFIG_BLK_DEV_IDE_PCMCIA +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) int ide_register(int io_port, int ctl_port, int irq); void ide_unregister(unsigned int); -#endif /* CONFIG_BLK_DEV_IDE_PCMCIA */ - -#endif /* CONFIG_BLK_DEV_IDE */ +#endif /* CONFIG_BLK_DEV_IDE || CONFIG_BLK_DEV_IDE_MODULE */ #endif /* __KERNEL__ */ diff -u --recursive --new-file v2.1.7/linux/include/linux/icmpv6.h linux/include/linux/icmpv6.h --- v2.1.7/linux/include/linux/icmpv6.h Thu Jan 1 02:00:00 1970 +++ linux/include/linux/icmpv6.h Thu Nov 7 19:53:38 1996 @@ -0,0 +1,143 @@ +#ifndef _LINUX_ICMPV6_H +#define _LINUX_ICMPV6_H + +#include + +struct icmpv6hdr { + + __u8 type; + __u8 code; + __u16 checksum; + + + union { + struct icmpv6_echo { + __u16 identifier; + __u16 sequence; + } u_echo; + __u32 pointer; + __u32 mtu; + __u32 unused; + + struct icmpv6_nd_advt { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 reserved:5, + override:1, + solicited:1, + router:1, + reserved2:24; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u32 router:1, + solicited:1, + override:1, + reserved:29; +#else +#error "Please fix " +#endif + } u_nd_advt; + + struct icmpv6_nd_ra { + __u8 hop_limit; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 reserved:6, + other:1, + managed:1; + +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 managed:1, + other:1, + reserved:6; +#else +#error "Please fix " +#endif + __u16 rt_lifetime; + } u_nd_ra; + + } u; + +#define icmp6_identifier u.u_echo.identifier +#define icmp6_sequence u.u_echo.sequence +#define icmp6_pointer u.pointer +#define icmp6_mtu u.mtu +#define icmp6_unused u.unused +#define icmp6_router u.u_nd_advt.router +#define icmp6_solicited u.u_nd_advt.solicited +#define icmp6_override u.u_nd_advt.override +#define icmp6_ndiscreserved u.u_nd_advt.reserved +#define icmp6_hop_limit u.u_nd_ra.hop_limit +#define icmp6_addrconf_managed u.u_nd_ra.managed +#define icmp6_addrconf_other u.u_nd_ra.other +#define icmp6_rt_lifetime u.u_nd_ra.rt_lifetime +}; + + +#define ICMPV6_DEST_UNREACH 1 +#define ICMPV6_PKT_TOOBIG 2 +#define ICMPV6_TIME_EXCEEDED 3 +#define ICMPV6_PARAMETER_PROB 4 + +#define ICMPV6_ECHO_REQUEST 128 +#define ICMPV6_ECHO_REPLY 129 +#define ICMPV6_MEMBERSHIP_QUERY 130 +#define ICMPV6_MEMBERSHIP_REPORT 131 +#define ICMPV6_MEMBERSHIP_REDUCTION 132 + +/* + * Codes for Destination Unreachable + */ +#define ICMPV6_NOROUTE 0 +#define ICMPV6_ADM_PROHIBITED 1 +#define ICMPV6_NOT_NEIGHBOUR 2 +#define ICMPV6_ADDR_UNREACH 3 +#define ICMPV6_PORT_UNREACH 4 + +/* + * Codes for Time Exceeded + */ +#define ICMPV6_EXC_HOPLIMIT 0 +#define ICMPV6_EXC_FRAGTIME 1 + +/* + * Codes for Parameter Problem + */ +#define ICMPV6_HDR_FIELD 0 +#define ICMPV6_UNK_NEXTHDR 1 +#define ICMPV6_UNK_OPTION 2 + +/* + * constants for (set|get)sockopt + */ + +#define RAW_CHECKSUM 1 +#define ICMPV6_FILTER 256 + +/* + * ICMPV6 filter + */ + +#define ICMPV6_FILTER_BLOCK 1 +#define ICMPV6_FILTER_PASS 2 +#define ICMPV6_FILTER_BLOCKOTHERS 3 +#define ICMPV6_FILTER_PASSONLY 4 + +struct icmp6_filter { + __u32 data[8]; +}; + +#ifdef __KERNEL__ + +#include +#include + + +extern void icmpv6_send(struct sk_buff *skb, + int type, int code, + __u32 info, + struct device *dev); + +extern void icmpv6_init(struct proto_ops *ops); +extern int icmpv6_err_convert(int type, int code, + int *err); +#endif + +#endif diff -u --recursive --new-file v2.1.7/linux/include/linux/if_arp.h linux/include/linux/if_arp.h --- v2.1.7/linux/include/linux/if_arp.h Mon Sep 30 11:21:31 1996 +++ linux/include/linux/if_arp.h Thu Nov 7 19:53:37 1996 @@ -52,6 +52,7 @@ #define ARPHRD_SKIP 771 /* SKIP vif */ #define ARPHRD_LOOPBACK 772 /* Loopback device */ #define ARPHRD_LOCALTLK 773 /* Localtalk device */ +#define ARPHRD_SIT 774 /* sit0 device - IPv6-in-IPv4 */ /* ARP protocol opcodes. */ #define ARPOP_REQUEST 1 /* ARP request */ diff -u --recursive --new-file v2.1.7/linux/include/linux/in.h linux/include/linux/in.h --- v2.1.7/linux/include/linux/in.h Wed Oct 16 10:48:29 1996 +++ linux/include/linux/in.h Thu Nov 7 19:51:20 1996 @@ -32,7 +32,10 @@ IPPROTO_UDP = 17, /* User Datagram Protocol */ IPPROTO_IDP = 22, /* XNS IDP protocol */ - IPPROTO_RAW = 255, /* Raw IP packets */ + IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */ + IPPROTO_ICMPV6 = 58, /* ICMPv6 */ + + IPPROTO_RAW = 255, /* Raw IP packets */ IPPROTO_MAX }; @@ -54,7 +57,7 @@ /* Structure describing an Internet (IP) socket address. */ #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ struct sockaddr_in { - short int sin_family; /* Address family */ + unsigned short int sin_family; /* Address family */ unsigned short int sin_port; /* Port number */ struct in_addr sin_addr; /* Internet address */ @@ -125,24 +128,5 @@ #define MULTICAST(x) (((x) & htonl(0xf0000000)) == htonl(0xe0000000)) #endif - -/* - * IPv6 definitions as we start to include them. This is just - * a beginning -- don't get excited 8) - */ - -struct in_addr6 -{ - unsigned char s6_addr[16]; -}; - -struct sockaddr_in6 -{ - unsigned short sin6_family; - unsigned short sin6_port; - unsigned long sin6_flowinfo; - struct in_addr6 sin6_addr; -}; - #endif /* _LINUX_IN_H */ diff -u --recursive --new-file v2.1.7/linux/include/linux/in6.h linux/include/linux/in6.h --- v2.1.7/linux/include/linux/in6.h Thu Jan 1 02:00:00 1970 +++ linux/include/linux/in6.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,99 @@ +/* + * Types and definitions for AF_INET6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Source: + * IPv6 Program Interfaces for BSD Systems + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_IN6_H +#define _LINUX_IN6_H + + +/* + * IPv6 address structure + */ + +struct in6_addr +{ + union + { + unsigned char u6_addr8[16]; + __u32 u6_addr32[4]; + } in6_u; +#define s6_addr32 in6_u.u6_addr32 +#define s6_addr in6_u.u6_addr8 +}; + +struct sockaddr_in6 { + unsigned short int sin6_family; /* AF_INET6 */ + __u16 sin6_port; /* Transport layer port # */ + __u32 sin6_flowinfo; /* IPv6 flow information */ + struct in6_addr sin6_addr; /* IPv6 address */ +}; + + +struct ipv6_mreq { + /* IPv6 multicast address of group */ + struct in6_addr ipv6mr_multiaddr; + + /* local IPv6 address of interface */ + struct in6_addr ipv6mr_interface; +}; + +/* + * Bitmask constant declarations to help applications select out the + * flow label and priority fields. + * + * Note that this are in host byte order while the flowinfo field of + * sockaddr_in6 is in network byte order. + */ + +#define IPV6_FLOWINFO_FLOWLABEL 0x00ff +#define IPV6_FLOWINFO_PRIORITY 0x0f00 + +#define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 +#define IPV6_PRIORITY_FILLER 0x0100 +#define IPV6_PRIORITY_UNATTENDED 0x0200 +#define IPV6_PRIORITY_RESERVED1 0x0300 +#define IPV6_PRIORITY_BULK 0x0400 +#define IPV6_PRIORITY_RESERVED2 0x0500 +#define IPV6_PRIORITY_INTERACTIVE 0x0600 +#define IPV6_PRIORITY_CONTROL 0x0700 +#define IPV6_PRIORITY_8 0x0800 +#define IPV6_PRIORITY_9 0x0900 +#define IPV6_PRIORITY_10 0x0a00 +#define IPV6_PRIORITY_11 0x0b00 +#define IPV6_PRIORITY_12 0x0c00 +#define IPV6_PRIORITY_13 0x0d00 +#define IPV6_PRIORITY_14 0x0e00 +#define IPV6_PRIORITY_15 0x0f00 + +/* + * IPV6 socket options + */ + +#define IPV6_ADDRFORM 1 +#define IPV6_RXINFO 2 +#define IPV6_TXINFO IPV6_RXINFO +#define SCM_SRCINFO IPV6_TXINFO +#define SCM_SRCRT 4 +#define IPV6_UNICAST_HOPS 5 + + +#define IPV6_MULTICAST_IF 17 +#define IPV6_MULTICAST_HOPS 18 +#define IPV6_MULTICAST_LOOP 19 +#define IPV6_ADD_MEMBERSHIP 20 +#define IPV6_DROP_MEMBERSHIP 21 + +#endif diff -u --recursive --new-file v2.1.7/linux/include/linux/ipv6.h linux/include/linux/ipv6.h --- v2.1.7/linux/include/linux/ipv6.h Thu Jan 1 02:00:00 1970 +++ linux/include/linux/ipv6.h Thu Nov 7 19:53:38 1996 @@ -0,0 +1,105 @@ +#ifndef _IPV6_H +#define _IPV6_H + +#include +#include + +/* + * IPv6 fixed header + */ + +struct ipv6hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 priority:4, + version:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 version:4, + priority:4; +#else +#error "Please fix " +#endif + __u8 flow_lbl[3]; + + __u16 payload_len; + __u8 nexthdr; + __u8 hop_limit; + + struct in6_addr saddr; + struct in6_addr daddr; +}; + +struct in6_ifreq { + struct in6_addr addr; + __u32 prefix_len; + char devname[8]; +}; + +/* + * Advanced API + * source interface/address selection, source routing, etc... + * *under construction* + */ + + +struct in6_pktinfo { + int ipi6_ifindex; + struct in6_addr ipi6_addr; +}; + +#define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ +#define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ + +/* + * routing header + */ +struct ipv6_rt_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + + /* + * type specific data + * variable length field + */ +}; + +/* + * routing header type 0 (used in cmsghdr struct) + */ + +struct rt0_hdr { + struct ipv6_rt_hdr rt_hdr; + __u32 bitmap; /* strict/loose bit map */ + struct in6_addr addr[0]; + +#define rt0_type rt_hdr.type; +}; + +#ifdef __KERNEL__ + +/* + * The length of this struct cannot be greater than the length of + * the proto_priv field in a sk_buff which is currently + * defined to be 16 bytes. + * Pointers take upto 8 bytes (sizeof(void *) is 8 on the alpha). + */ +struct ipv6_options +{ + /* length of extension headers */ + + __u16 opt_flen; /* after fragment hdr */ + __u16 opt_nflen; /* before fragment hdr */ + + /* + * protocol options + * usualy carried in IPv6 extension headers + */ + + struct ipv6_rt_hdr *srcrt; /* Routing Header */ + +}; + +#endif + +#endif diff -u --recursive --new-file v2.1.7/linux/include/linux/ipv6_route.h linux/include/linux/ipv6_route.h --- v2.1.7/linux/include/linux/ipv6_route.h Thu Jan 1 02:00:00 1970 +++ linux/include/linux/ipv6_route.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,41 @@ +/* + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_IPV6_ROUTE_H +#define _LINUX_IPV6_ROUTE_H + +#include + +#define RTI_DEVRT 0x00010000 /* route lookup, dev must match */ +#define RTI_ALLONLINK 0x00020000 /* all destinations on link */ +#define RTI_DCACHE RTF_DCACHE /* rt6_info is a dcache entry */ +#define RTI_INVALID RTF_INVALID /* invalid route/dcache entry */ + +#define RTI_DYNAMIC RTF_DYNAMIC /* rt6_info created dynamicly */ +#define RTI_GATEWAY RTF_GATEWAY +#define RTI_DYNMOD RTF_MODIFIED /* more specific route may exist*/ + +#define DCF_PMTU RTF_MSS /* dest cache has valid PMTU */ +#define DCF_INVALID RTF_INVALID + +struct in6_rtmsg { + __u32 rtmsg_type; + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_gateway; + __u16 rtmsg_prefixlen; + __u16 rtmsg_metric; + char rtmsg_device[16]; + __u16 rtmsg_flags; + unsigned long rtmsg_info; +}; + +#endif diff -u --recursive --new-file v2.1.7/linux/include/linux/keyboard.h linux/include/linux/keyboard.h --- v2.1.7/linux/include/linux/keyboard.h Sun Oct 8 12:37:07 1995 +++ linux/include/linux/keyboard.h Thu Nov 7 11:25:56 1996 @@ -352,8 +352,9 @@ #define K_DCIRCM K(KT_DEAD,2) #define K_DTILDE K(KT_DEAD,3) #define K_DDIERE K(KT_DEAD,4) +#define K_DCEDIL K(KT_DEAD,5) -#define NR_DEAD 5 +#define NR_DEAD 6 #define K_DOWN K(KT_CUR,0) #define K_LEFT K(KT_CUR,1) diff -u --recursive --new-file v2.1.7/linux/include/linux/limits.h linux/include/linux/limits.h --- v2.1.7/linux/include/linux/limits.h Wed Jul 17 15:10:03 1996 +++ linux/include/linux/limits.h Fri Nov 8 15:36:54 1996 @@ -11,7 +11,7 @@ #define MAX_CANON 255 /* size of the canonical input queue */ #define MAX_INPUT 255 /* size of the type-ahead buffer */ #define NAME_MAX 255 /* # chars in a file name */ -#define PATH_MAX 1024 /* # chars in a path name */ +#define PATH_MAX 4095 /* # chars in a path name */ #define PIPE_BUF 4096 /* # bytes in atomic write to a pipe */ #endif diff -u --recursive --new-file v2.1.7/linux/include/linux/locks.h linux/include/linux/locks.h --- v2.1.7/linux/include/linux/locks.h Mon Sep 30 11:19:00 1996 +++ linux/include/linux/locks.h Thu Nov 7 19:51:20 1996 @@ -28,7 +28,7 @@ extern inline void lock_buffer(struct buffer_head * bh) { - if (set_bit(BH_Lock, &bh->b_state)) + while (set_bit(BH_Lock, &bh->b_state)) __wait_on_buffer(bh); } diff -u --recursive --new-file v2.1.7/linux/include/linux/module.h linux/include/linux/module.h --- v2.1.7/linux/include/linux/module.h Mon Sep 30 11:19:11 1996 +++ linux/include/linux/module.h Thu Nov 7 19:51:31 1996 @@ -58,6 +58,10 @@ /* * Note: The string table follows immediately after the symbol table in memory! */ +struct _exceptinfo{ + struct exception_table_entry *start; + struct exception_table_entry *stop; +}; struct module { struct module *next; @@ -68,11 +72,26 @@ void* addr; /* address of module */ int state; void (*cleanup)(void); /* cleanup routine */ + struct _exceptinfo exceptinfo; }; +/* + prior to modules-2.1 there were no real way to identify + which insmod is talking to us Now a special signature must + be written here. + + The new module utilities knows about older kernel and write + the init in the signature and the cleanup in the init. + This is to make sure newer utilities work with older kernel + so it is simple for people to upgrade. +*/ +#define MODULE_2_1_7_SIG ((void*)0x00000217) + struct mod_routines { + void *signature; int (*init)(void); /* initialization routine */ void (*cleanup)(void); /* cleanup routine */ + struct _exceptinfo exceptinfo; }; /* @@ -104,6 +123,7 @@ #define MOD_INC_USE_COUNT do { } while (0) #define MOD_DEC_USE_COUNT do { } while (0) #define MOD_IN_USE 1 +extern struct module *module_list; #endif diff -u --recursive --new-file v2.1.7/linux/include/linux/netdevice.h linux/include/linux/netdevice.h --- v2.1.7/linux/include/linux/netdevice.h Mon Sep 30 11:21:29 1996 +++ linux/include/linux/netdevice.h Thu Nov 7 19:53:34 1996 @@ -31,19 +31,23 @@ /* for future expansion when we will have different priorities. */ #define DEV_NUMBUFFS 3 #define MAX_ADDR_LEN 7 -#ifndef CONFIG_AX25 -#ifndef CONFIG_TR -#ifndef CONFIG_NET_IPIP -#define MAX_HEADER 32 /* We really need about 18 worst case .. so 32 is aligned */ + +#if !defined(CONFIG_AX25) && !defined(CONFIG_TR) +#define LL_MAX_HEADER 32 #else -#define MAX_HEADER 80 /* We need to allow for having tunnel headers */ -#endif /* IPIP */ +#if defined(CONFIG_AX25) +#define LL_MAX_HEADER 96 #else -#define MAX_HEADER 48 /* Token Ring header needs 40 bytes ... 48 is aligned */ -#endif /* TR */ +#define LL_MAX_HEADER 48 +#endif +#endif + +#if !defined(CONFIG_NET_IPIP) && \ + !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) +#define MAX_HEADER LL_MAX_HEADER #else -#define MAX_HEADER 96 /* AX.25 + NetROM */ -#endif /* AX25 */ +#define MAX_HEADER (LL_MAX_HEADER + 48) +#endif #define IS_MYADDR 1 /* address is (one of) our own */ #define IS_LOOPBACK 2 /* address is for LOOPBACK */ @@ -144,7 +148,13 @@ unsigned char pad; /* make dev_addr aligned to 8 bytes */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ unsigned char addr_len; /* hardware address length */ +#if 0 + __u32 pa_addr_arr[4]; + __u16 pa_prefix_len; +#define pa_addr pa_addr_arr[3]; +#else unsigned long pa_addr; /* protocol address */ +#endif unsigned long pa_brdaddr; /* protocol broadcast addr */ unsigned long pa_dstaddr; /* protocol P-P other side addr */ unsigned long pa_mask; /* protocol netmask */ diff -u --recursive --new-file v2.1.7/linux/include/linux/pci.h linux/include/linux/pci.h --- v2.1.7/linux/include/linux/pci.h Wed Oct 16 10:48:30 1996 +++ linux/include/linux/pci.h Wed Nov 6 12:12:53 1996 @@ -265,6 +265,7 @@ #define PCI_DEVICE_ID_DEC_FDDI 0x000F #define PCI_DEVICE_ID_DEC_TULIP_PLUS 0x0014 #define PCI_DEVICE_ID_DEC_21052_AB 0x0021 +#define PCI_DEVICE_ID_DEC_21152_AA 0x0024 #define PCI_VENDOR_ID_CIRRUS 0x1013 #define PCI_DEVICE_ID_CIRRUS_5430 0x00a0 diff -u --recursive --new-file v2.1.7/linux/include/linux/ppp_defs.h linux/include/linux/ppp_defs.h --- v2.1.7/linux/include/linux/ppp_defs.h Tue Mar 5 10:01:28 1996 +++ linux/include/linux/ppp_defs.h Sun Nov 3 11:04:42 1996 @@ -69,6 +69,7 @@ #define PPP_IPX 0x2b /* IPX protocol */ #define PPP_VJC_COMP 0x2d /* VJ compressed TCP */ #define PPP_VJC_UNCOMP 0x2f /* VJ uncompressed TCP */ +#define PPP_IPV6 0x57 /* Internet Protocol Version 6 */ #define PPP_COMP 0xfd /* compressed packet */ #define PPP_IPCP 0x8021 /* IP Control Protocol */ #define PPP_IPXCP 0x802b /* IPX Control Protocol */ diff -u --recursive --new-file v2.1.7/linux/include/linux/proc_fs.h linux/include/linux/proc_fs.h --- v2.1.7/linux/include/linux/proc_fs.h Tue Oct 29 19:58:47 1996 +++ linux/include/linux/proc_fs.h Thu Nov 7 19:51:31 1996 @@ -102,6 +102,8 @@ PROC_NET_ALIAS_TYPES, PROC_NET_ALIASES, PROC_NET_IP_MASQ_APP, + PROC_NET_RT6, + PROC_NET_RT6_STATS, PROC_NET_STRIP_STATUS, PROC_NET_STRIP_TRACE, PROC_NET_Z8530, diff -u --recursive --new-file v2.1.7/linux/include/linux/route.h linux/include/linux/route.h --- v2.1.7/linux/include/linux/route.h Sun Aug 4 14:38:04 1996 +++ linux/include/linux/route.h Sun Nov 3 11:14:54 1996 @@ -53,6 +53,12 @@ #define RTF_IRTT 0x0100 /* Initial round trip time */ #define RTF_REJECT 0x0200 /* Reject route */ +#define RTF_ADDRCONF 0x0800 /* announced on link prefix */ +#define RTF_INVALID 0x1000 +#define RTF_DCACHE 0x2000 +#define RTF_DEFAULT 0x4000 /* Route is a default route */ +#define RTF_NEXTHOP 0x8000 /* Non gateway route with nexthop */ + /* * This structure is passed from the kernel to user space by netlink * routing/device announcements diff -u --recursive --new-file v2.1.7/linux/include/linux/serial.h linux/include/linux/serial.h --- v2.1.7/linux/include/linux/serial.h Tue Sep 24 14:07:29 1996 +++ linux/include/linux/serial.h Sun Nov 3 10:16:46 1996 @@ -44,7 +44,19 @@ #define PORT_16550A 4 #define PORT_CIRRUS 5 #define PORT_16650 6 -#define PORT_MAX 6 +#define PORT_16650V2 7 +#define PORT_16750 8 +#define PORT_MAX 8 + +struct serial_uart_config { + char *name; + int dfl_xmit_fifo_size; + int flags; +}; + +#define UART_CLEAR_FIFO 0x01 +#define UART_USE_FIFO 0x02 +#define UART_STARTECH 0x04 /* * Definitions for async_struct (and serial_struct) flags field @@ -55,7 +67,7 @@ #define ASYNC_SAK 0x0004 /* Secure Attention Key (Orange book) */ #define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ -#define ASYNC_SPD_MASK 0x0030 +#define ASYNC_SPD_MASK 0x1030 #define ASYNC_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ #define ASYNC_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ @@ -67,8 +79,13 @@ #define ASYNC_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ #define ASYNC_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ -#define ASYNC_FLAGS 0x0FFF /* Possible legal async flags */ -#define ASYNC_USR_MASK 0x0430 /* Legal flags that non-privileged +#define ASYNC_HARDPPS_CD 0x0800 /* Call hardpps when CD goes high */ + +#define ASYNC_SPD_SHI 0x1000 /* Use 230400 instead of 38400 bps */ +#define ASYNC_SPD_WARP 0x1010 /* Use 460800 instead of 38400 bps */ + +#define ASYNC_FLAGS 0x1FFF /* Possible legal async flags */ +#define ASYNC_USR_MASK 0x1430 /* Legal flags that non-privileged * users can set or reset */ /* Internal flags used only by kernel/chr_drv/serial.c */ @@ -81,6 +98,8 @@ #define ASYNC_CHECK_CD 0x02000000 /* i.e., CLOCAL */ #define ASYNC_SHARE_IRQ 0x01000000 /* for multifunction cards */ +#define ASYNC_INTERNAL_FLAGS 0xFF000000 /* Internal flags */ + /* * Multiport serial configuration structure --- external structure */ @@ -128,20 +147,37 @@ __u32 cts, dsr, rng, dcd; }; +struct serial_state { + int magic; + int baud_base; + int port; + int irq; + int flags; + int hub6; + int type; + int line; + int xmit_fifo_size; + int custom_divisor; + int count; + unsigned short close_delay; + unsigned short closing_wait; /* time to wait before closing */ + struct async_icount icount; + struct termios normal_termios; + struct termios callout_termios; + struct async_struct *info; +}; + struct async_struct { int magic; - int baud_base; int port; - int irq; - int flags; /* defined in tty.h */ - int hub6; /* HUB6 plus one */ - int type; /* UART type */ + int hub6; + int flags; + int xmit_fifo_size; + struct serial_state *state; struct tty_struct *tty; int read_status_mask; int ignore_status_mask; int timeout; - int xmit_fifo_size; - int custom_divisor; int x_char; /* xon/xoff character */ int close_delay; unsigned short closing_wait; @@ -152,7 +188,6 @@ unsigned long event; unsigned long last_active; int line; - int count; /* # of fd on device */ int blocked_open; /* # of blocked opens */ long session; /* Session of opening process */ long pgrp; /* pgrp of opening process */ @@ -162,17 +197,15 @@ int xmit_cnt; struct tq_struct tqueue; struct tq_struct tqueue_hangup; - struct termios normal_termios; - struct termios callout_termios; struct wait_queue *open_wait; struct wait_queue *close_wait; struct wait_queue *delta_msr_wait; - struct async_icount icount; /* kernel counters for the 4 input interrupts */ struct async_struct *next_port; /* For the linked list */ struct async_struct *prev_port; }; #define SERIAL_MAGIC 0x5301 +#define SSTATE_MAGIC 0x5302 /* * The size of the serial xmit buffer is 1 page, or 4096 bytes diff -u --recursive --new-file v2.1.7/linux/include/linux/serial_reg.h linux/include/linux/serial_reg.h --- v2.1.7/linux/include/linux/serial_reg.h Mon Feb 20 21:29:53 1995 +++ linux/include/linux/serial_reg.h Sun Nov 3 10:16:46 1996 @@ -51,6 +51,8 @@ #define UART_FCR6_T_TRIGGER_8 0x10 /* Mask for transmit trigger set at 8 */ #define UART_FCR6_T_TRIGGER_24 0x20 /* Mask for transmit trigger set at 24 */ #define UART_FCR6_T_TRIGGER_30 0x30 /* Mask for transmit trigger set at 30 */ +/* TI 16750 definitions */ +#define UART_FCR7_64BYTE 0x20 /* Go into 64 byte mode */ /* * These are the definitions for the Line Control Register @@ -98,6 +100,11 @@ #define UART_IER_RLSI 0x04 /* Enable receiver line status interrupt */ #define UART_IER_THRI 0x02 /* Enable Transmitter holding register int. */ #define UART_IER_RDI 0x01 /* Enable receiver data interrupt */ +/* + * Sleep mode for ST16650 and TI16750. + * Note that for 16650, EFR-bit 4 must be selected as well. + */ +#define UART_IERX_SLEEP 0x10 /* Enable sleep mode */ /* * These are the definitions for the Modem Control Register @@ -128,7 +135,7 @@ #define UART_EFR_CTS 0x80 /* CTS flow control */ #define UART_EFR_RTS 0x40 /* RTS flow control */ #define UART_EFR_SCD 0x20 /* Special character detect */ -#define UART_EFR_ENI 0x10 /* Enhanced Interrupt */ +#define UART_EFR_ECB 0x10 /* Enhanced control bit */ /* * the low four bits control software flow control */ diff -u --recursive --new-file v2.1.7/linux/include/linux/skbuff.h linux/include/linux/skbuff.h --- v2.1.7/linux/include/linux/skbuff.h Mon Sep 30 11:19:11 1996 +++ linux/include/linux/skbuff.h Thu Nov 7 19:51:31 1996 @@ -77,6 +77,17 @@ } mac; struct iphdr *ip_hdr; /* For IPPROTO_RAW */ +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + struct ipv6hdr *ipv6_hdr; + + /* + * It would be inefficient to store the nexthop address in every + * skb. Instead we store a pointer to the respective neighbour + * cache entry. This might make ndisc cache management harder. + */ + + struct neighbour *nexthop; +#endif unsigned long len; /* Length of actual data */ unsigned long csum; /* Checksum */ __u32 saddr; /* IP source address */ @@ -100,6 +111,7 @@ #define PACKET_BROADCAST 1 /* To all */ #define PACKET_MULTICAST 2 /* To group */ #define PACKET_OTHERHOST 3 /* To someone else */ +#define PACKET_NDISC 17 /* Outgoing NDISC packet */ unsigned short users; /* User count - see datagram.c,tcp.c */ unsigned short protocol; /* Packet protocol from driver. */ unsigned short truesize; /* Buffer size */ @@ -112,6 +124,7 @@ unsigned char *end; /* End pointer */ void (*destructor)(struct sk_buff *); /* Destruct function */ __u16 redirport; /* Redirect port */ + __u16 inclone; /* Inline clone */ }; #ifdef CONFIG_SKB_LARGE diff -u --recursive --new-file v2.1.7/linux/include/linux/socket.h linux/include/linux/socket.h --- v2.1.7/linux/include/linux/socket.h Mon Jul 8 16:09:16 1996 +++ linux/include/linux/socket.h Sun Nov 3 11:04:42 1996 @@ -33,6 +33,44 @@ int msg_flags; /* 4.4 BSD item we dont use */ }; +/* + * POSIX 1003.1g - ancillary data object information + * Ancillary data consits of a sequence of pairs of + * (cmsghdr, cmsg_data[]) + */ + +struct cmsghdr { + size_t cmsg_len; /* data byte count, including hdr */ + int cmsg_level; /* originating protocol */ + int cmsg_type; /* protocol-specific type */ + unsigned char cmsg_data[0]; +}; + +/* + * Ancilliary data object information MACROS + * Table 5-14 of POSIX 1003.1g + */ + +#define CMSG_DATA(cmsg) cmsg->cmsg_data +#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr(mhdr, cmsg) +#define CMSG_FIRST(mhdr) ((struct cmsghdr *) (mhdr)->msg_control) + +extern __inline__ struct cmsghdr * cmsg_nxthdr(struct msghdr *mhdr, + struct cmsghdr *cmsg) +{ + void * ptr; + + if (cmsg->cmsg_len < sizeof(struct cmsghdr)) + { + return NULL; + } + ptr = ((unsigned char *) cmsg) + cmsg->cmsg_len; + if (ptr >= mhdr->msg_control + mhdr->msg_controllen) + return NULL; + + return ptr; +} + /* Control Messages */ #define SCM_RIGHTS 1 @@ -90,6 +128,9 @@ /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ #define SOL_IP 0 +#define SOL_IPV6 41 +#define SOL_ICMPV6 58 +#define SOL_RAW 255 #define SOL_IPX 256 #define SOL_AX25 257 #define SOL_ATALK 258 @@ -132,6 +173,13 @@ #ifdef __KERNEL__ extern void memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); +extern void memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, + int offset, int len); +extern unsigned int csum_partial_copy_fromiovecend(unsigned char *kdata, + struct iovec *iov, + int offset, + int len, int csum); + extern int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode); extern void memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen); diff -u --recursive --new-file v2.1.7/linux/include/linux/sysctl.h linux/include/linux/sysctl.h --- v2.1.7/linux/include/linux/sysctl.h Mon Jun 3 14:04:03 1996 +++ linux/include/linux/sysctl.h Sun Nov 3 11:04:42 1996 @@ -80,6 +80,7 @@ #define NET_NETROM 8 #define NET_AX25 9 #define NET_BRIDGE 10 +#define NET_IPV6 11 /* /proc/sys/net/core */ @@ -97,7 +98,11 @@ #define NET_IPV4_ARP_CHECK_INTERVAL 5 #define NET_IPV4_ARP_CONFIRM_INTERVAL 6 #define NET_IPV4_ARP_CONFIRM_TIMEOUT 7 +#define NET_IPV4_TCP_VEGAS_CONG_AVOID 8 +/* /proc/sys/net/ipv6 */ +#define NET_IPV6_FORWARDING 1 +#define NET_IPV6_HOPLIMIT 2 /* /proc/sys/net/ipx */ /* /proc/sys/net/appletalk */ diff -u --recursive --new-file v2.1.7/linux/include/linux/tty.h linux/include/linux/tty.h --- v2.1.7/linux/include/linux/tty.h Fri Nov 1 17:13:19 1996 +++ linux/include/linux/tty.h Sat Nov 9 11:17:31 1996 @@ -267,6 +267,8 @@ #define TTY_DO_WRITE_WAKEUP 5 #define TTY_PUSH 6 #define TTY_CLOSING 7 +#define TTY_HW_COOK_OUT 14 +#define TTY_HW_COOK_IN 15 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) diff -u --recursive --new-file v2.1.7/linux/include/linux/tty_driver.h linux/include/linux/tty_driver.h --- v2.1.7/linux/include/linux/tty_driver.h Mon Sep 30 11:19:00 1996 +++ linux/include/linux/tty_driver.h Thu Nov 7 19:51:20 1996 @@ -91,7 +91,16 @@ * * This routine notifies the tty driver that it should hangup the * tty device. + * + * void (*wait_until_sent)(struct tty_struct *tty, int timeout); * + * This routine waits until the device has written out all of the + * characters in its transmitter FIFO. + * + * void (*send_xchar)(struct tty_struct *tty, char ch); + * + * This routine is used to send a high-priority XON/XOFF + * character to the device. */ #include @@ -139,6 +148,8 @@ void (*hangup)(struct tty_struct *tty); void (*flush_buffer)(struct tty_struct *tty); void (*set_ldisc)(struct tty_struct *tty); + void (*wait_until_sent)(struct tty_struct *tty, int timeout); + void (*send_xchar)(struct tty_struct *tty, char ch); /* * linked list pointers diff -u --recursive --new-file v2.1.7/linux/include/linux/tty_ldisc.h linux/include/linux/tty_ldisc.h --- v2.1.7/linux/include/linux/tty_ldisc.h Mon Sep 30 11:19:00 1996 +++ linux/include/linux/tty_ldisc.h Thu Nov 7 19:51:20 1996 @@ -2,7 +2,100 @@ #define _LINUX_TTY_LDISC_H /* - * Definitions for the tty line discipline + * This structure defines the interface between the tty line discpline + * implementation and the tty routines. The following routines can be + * defined; unless noted otherwise, they are optional, and can be + * filled in with a null pointer. + * + * int (*open)(struct tty_struct *); + * + * This function is called when the line discpline is associated + * with the tty. The line discpline can use this as an + * opportunity to initialize any state needed by the ldisc routines. + * + * void (*close)(struct tty_struct *); + * + * This function is called when the line discpline is being + * shutdown, either because the tty is being closed or because + * the tty is being changed to use a new line discpline + * + * void (*flush_buffer)(struct tty_struct *tty); + * + * This function instructs the line discipline to clear its + * buffers of any input characters it may have queued to be + * delivered to the user mode process. + * + * int (*chars_in_buffer)(struct tty_struct *tty); + * + * This function returns the number of input characters the line + * iscpline may have queued up to be delivered to the user mode + * process. + * + * int (*read)(struct tty_struct * tty, struct file * file, + * unsigned char * buf, unsigned int nr); + * + * This function is called when the user requests to read from + * the tty. The line discpline will return whatever characters + * it has buffered up for the user. If this function is not + * defined, the user will receive an EIO error. + * + * int (*write)(struct tty_struct * tty, struct file * file, + * const unsigned char * buf, unsigned int nr); + * + * This function is called when the user requests to write to the + * tty. The line discpline will deliver the characters to the + * low-level tty device for transmission, optionally performing + * some processing on the characters first. If this function is + * not defined, the user will receive an EIO error. + * + * int (*ioctl)(struct tty_struct * tty, struct file * file, + * unsigned int cmd, unsigned long arg); + * + * This function is called when the user requests an ioctl which + * is not handled by the tty layer or the low-level tty driver. + * It is intended for ioctls which affect line discpline + * operation. Not that the search order for ioctls is (1) tty + * layer, (2) tty low-level driver, (3) line discpline. So a + * low-level driver can "grab" an ioctl request before the line + * discpline has a chance to see it. + * + * void (*set_termios)(struct tty_struct *tty, struct termios * old); + * + * This function notifies the line discpline that a change has + * been made to the termios stucture. + * + * int (*select)(struct tty_struct * tty, struct inode * inode, + * struct file * file, int sel_type, + * struct select_table_struct *wait); + * + * This function is called when a user attempts to select on a + * tty device. It is solely the responsibility of the line + * discipline to handle select requests. + * + * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, + * char *fp, int count); + * + * This function is called by the low-level tty driver to send + * characters received by the hardware to the line discpline for + * processing. is a pointer to the buffer of input + * character received by the device. is a pointer to a + * pointer of flag bytes which indicate whether a character was + * received with a parity error, etc. + * + * int (*receive_room)(struct tty_struct *); + * + * This function is called by the low-level tty driver to + * determine how many characters the line discpline can accept. + * The low-level driver must not send more characters than was + * indicated by receive_room, or the line discpline may drop + * those characters. + * + * void (*write_wakeup)(struct tty_struct *); + * + * This function is called by the low-level tty driver to signal + * that line discpline should try to send more characters to the + * low-level driver for transmission. If the line discpline does + * not have any more data to send, it can just return. */ #include diff -u --recursive --new-file v2.1.7/linux/include/linux/un.h linux/include/linux/un.h --- v2.1.7/linux/include/linux/un.h Mon Mar 4 09:16:40 1996 +++ linux/include/linux/un.h Sun Nov 3 11:04:42 1996 @@ -8,11 +8,4 @@ char sun_path[UNIX_PATH_MAX]; /* pathname */ }; -struct cmsghdr { - unsigned int cmsg_len; - int cmsg_level; - int cmsg_type; - unsigned char cmsg_data[0]; -}; - #endif /* _LINUX_UN_H */ diff -u --recursive --new-file v2.1.7/linux/include/net/addrconf.h linux/include/net/addrconf.h --- v2.1.7/linux/include/net/addrconf.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/addrconf.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,135 @@ +#ifndef _ADDRCONF_H +#define _ADDRCONF_H + +#define RETRANS_TIMER HZ + +#define MAX_RTR_SOLICITATIONS 3 +#define RTR_SOLICITATION_INTERVAL (4*HZ) + +#define ADDR_CHECK_FREQUENCY (120*HZ) + +struct prefix_info { + __u8 type; + __u8 length; + __u8 prefix_len; + +#if defined(__BIG_ENDIAN_BITFIELD) + __u8 onlink : 1, + autoconf : 1, + reserved : 6; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + __u8 reserved : 6, + autoconf : 1, + onlink : 1; +#else +#error "Please fix " +#endif + __u32 valid; + __u32 prefered; + __u32 reserved2; + + struct in6_addr prefix; +}; + + +#ifdef __KERNEL__ + +#include +#include +#include + +extern struct inet6_ifaddr *inet6_addr_lst[16]; +extern struct ipv6_mc_list *inet6_mcast_lst[16]; +extern struct inet6_dev *inet6_dev_lst; + +extern void addrconf_init(void); +extern void addrconf_cleanup(void); + +extern int addrconf_notify(struct notifier_block *this, + unsigned long event, + void * data); + +extern int addrconf_add_ifaddr(void *arg); +extern int addrconf_set_dstaddr(void *arg); + +extern struct inet6_ifaddr * ipv6_chk_addr(struct in6_addr *addr); +extern struct inet6_ifaddr * ipv6_get_saddr(struct rt6_info *rt, + struct in6_addr *daddr); +extern struct inet6_ifaddr * ipv6_get_lladdr(struct device *dev); + +/* + * multicast prototypes (mcast.c) + */ +extern int ipv6_sock_mc_join(struct sock *sk, + struct device *dev, + struct in6_addr *addr); +extern int ipv6_sock_mc_drop(struct sock *sk, + struct device *dev, + struct in6_addr *addr); +extern void ipv6_sock_mc_close(struct sock *sk); + +extern int ipv6_dev_mc_inc(struct device *dev, + struct in6_addr *addr); +extern int ipv6_dev_mc_dec(struct device *dev, + struct in6_addr *addr); + +extern int ipv6_chk_mcast_addr(struct device *dev, + struct in6_addr *addr); + +extern void addrconf_prefix_rcv(struct device *dev, + u8 *opt, int len); + +extern struct inet6_dev * ipv6_dev_by_index(int index); +extern struct inet6_dev * ipv6_get_idev(struct device *dev); + +extern void addrconf_forwarding_on(void); +/* + * Hash function taken from net_alias.c + */ + +static __inline__ u8 ipv6_addr_hash(struct in6_addr *addr) +{ + __u32 word; + unsigned tmp; + + /* + * We perform the hash function over the last 64 bits of the address + * This will include the IEEE address token on links that support it. + */ + + word = addr->s6_addr[2] ^ addr->s6_addr32[3]; + tmp = word ^ (word>>16); + tmp ^= (tmp >> 8); + + return ((tmp ^ (tmp >> 4)) & 0x0f); +} + +/* + * compute link-local solicited-node multicast address + */ + +static __inline__ void addrconf_addr_solict_mult(struct in6_addr *addr, + struct in6_addr *solicited) +{ + ipv6_addr_set(solicited, + __constant_htonl(0xFF020000), 0, + __constant_htonl(0x1), addr->s6_addr32[3]); +} + +static __inline__ void ipv6_addr_all_nodes(struct in6_addr *addr) +{ + ipv6_addr_set(addr, + __constant_htonl(0xFF020000), 0, 0, + __constant_htonl(0x1)); +} + +static __inline__ void ipv6_addr_all_routers(struct in6_addr *addr) +{ + ipv6_addr_set(addr, + __constant_htonl(0xFF020000), 0, 0, + __constant_htonl(0x2)); +} + + +#endif +#endif diff -u --recursive --new-file v2.1.7/linux/include/net/checksum.h linux/include/net/checksum.h --- v2.1.7/linux/include/net/checksum.h Mon Sep 30 11:24:39 1996 +++ linux/include/net/checksum.h Thu Nov 7 19:56:46 1996 @@ -15,11 +15,79 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +/* + * Fixes: + * + * Ralf Baechle : generic ipv6 checksum + * + */ + #ifndef _CHECKSUM_H #define _CHECKSUM_H #include #include #include + +#ifndef _HAVE_ARCH_IPV6_CSUM + +static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, + struct in6_addr *daddr, + __u16 len, + unsigned short proto, + unsigned int csum) +{ + + int carry; + __u32 ulen; + __u32 uproto; + + csum += saddr->s6_addr32[0]; + carry = (csum < saddr->s6_addr32[0]); + csum += carry; + + csum += saddr->s6_addr32[1]; + carry = (csum < saddr->s6_addr32[1]); + csum += carry; + + csum += saddr->s6_addr32[2]; + carry = (csum < saddr->s6_addr32[2]); + csum += carry; + + csum += saddr->s6_addr32[3]; + carry = (csum < saddr->s6_addr32[3]); + csum += carry; + + csum += daddr->s6_addr32[0]; + carry = (csum < daddr->s6_addr32[0]); + csum += carry; + + csum += daddr->s6_addr32[1]; + carry = (csum < daddr->s6_addr32[1]); + csum += carry; + + csum += daddr->s6_addr32[2]; + carry = (csum < daddr->s6_addr32[2]); + csum += carry; + + csum += daddr->s6_addr32[3]; + carry = (csum < daddr->s6_addr32[3]); + csum += carry; + + ulen = htonl((__u32) len); + csum += ulen; + carry = (csum < ulen); + csum += carry; + + uproto = htonl(proto); + csum += proto; + carry = (csum < proto); + csum += carry; + + return csum_fold(csum); +} + +#endif #endif diff -u --recursive --new-file v2.1.7/linux/include/net/if_inet6.h linux/include/net/if_inet6.h --- v2.1.7/linux/include/net/if_inet6.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/if_inet6.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,109 @@ +/* + * inet6 interface/address list definitions + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_IF_INET6_H +#define _NET_IF_INET6_H + +#define DAD_COMPLETE 0x00 +#define DAD_INCOMPLETE 0x01 +#define DAD_STATUS 0x01 + +#define ADDR_STATUS 0x06 +#define ADDR_DEPRECATED 0x02 +#define ADDR_INVALID 0x04 + +#define ADDR_PERMANENT 0x80 + +#define IF_RA_RCVD 0x20 +#define IF_RS_SENT 0x10 + +#ifdef __KERNEL__ + +struct inet6_ifaddr +{ + struct in6_addr addr; + __u32 prefix_len; + + __u32 valid_lft; + __u32 prefered_lft; + unsigned long tstamp; + + __u8 probes; + __u8 flags; + + __u16 scope; + + struct timer_list timer; + + struct inet6_dev *idev; + + struct inet6_ifaddr *lst_next; /* next addr in addr_lst */ + struct inet6_ifaddr *if_next; /* next addr in inet6_dev */ +}; + + +struct ipv6_mc_socklist { + struct in6_addr addr; + struct device *dev; + struct ipv6_mc_socklist *next; +}; + +struct ipv6_mc_list { + struct in6_addr addr; + struct device *dev; + struct ipv6_mc_list *next; + struct ipv6_mc_list *if_next; + struct timer_list timer; + int tm_running; + atomic_t users; +}; + +#define IFA_HOST IPV6_ADDR_LOOPBACK +#define IFA_LINK IPV6_ADDR_LINKLOCAL +#define IFA_SITE IPV6_ADDR_SITELOCAL +#define IFA_GLOBAL 0x0000U + +extern int in6_ifnum; + +struct inet6_dev +{ + struct device *dev; + + struct inet6_ifaddr *addr_list; + struct ipv6_mc_list *mc_list; + + __u32 if_index; + __u32 if_flags; + __u32 router:1, + unused:31; + + struct inet6_dev *next; +}; + + +extern __inline__ void ipv6_mc_map(struct in6_addr *addr, char *buf) +{ + /* + * +-------+-------+-------+-------+-------+-------+ + * | 33 | 33 | DST13 | DST14 | DST15 | DST16 | + * +-------+-------+-------+-------+-------+-------+ + */ + + buf[0]= 0x33; + buf[1]= 0x33; + + memcpy(buf + 2, &addr->s6_addr32[3], sizeof(__u32)); +} +#endif +#endif diff -u --recursive --new-file v2.1.7/linux/include/net/inet_common.h linux/include/net/inet_common.h --- v2.1.7/linux/include/net/inet_common.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/inet_common.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,47 @@ +#ifndef _INET_COMMON_H +#define _INET_COMMON_H + +extern struct proto_ops inet_proto_ops; +extern struct sock * tcp_sock_array[SOCK_ARRAY_SIZE]; +extern struct sock * udp_sock_array[SOCK_ARRAY_SIZE]; + + +/* + * INET4 prototypes used by INET6 + */ + +extern void inet_remove_sock(struct sock *sk1); +extern void inet_put_sock(unsigned short num, + struct sock *sk); +extern int inet_release(struct socket *sock, + struct socket *peer); +extern int inet_connect(struct socket *sock, + struct sockaddr * uaddr, + int addr_len, int flags); +extern int inet_accept(struct socket *sock, + struct socket *newsock, int flags); +extern int inet_recvmsg(struct socket *sock, + struct msghdr *ubuf, + int size, int noblock, + int flags, int *addr_len ); +extern int inet_sendmsg(struct socket *sock, + struct msghdr *msg, + int size, int noblock, + int flags); +extern int inet_shutdown(struct socket *sock, int how); +extern int inet_select(struct socket *sock, int sel_type, + select_table *wait); +extern int inet_setsockopt(struct socket *sock, int level, + int optname, char *optval, + int optlen); +extern int inet_getsockopt(struct socket *sock, int level, + int optname, char *optval, + int *optlen); +extern int inet_fcntl(struct socket *sock, + unsigned int cmd, + unsigned long arg); +extern int inet_listen(struct socket *sock, int backlog); + +#endif + + diff -u --recursive --new-file v2.1.7/linux/include/net/ipv6.h linux/include/net/ipv6.h --- v2.1.7/linux/include/net/ipv6.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/ipv6.h Thu Nov 7 19:56:49 1996 @@ -0,0 +1,296 @@ +/* + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * $Id: ipv6.h,v 1.19 1996/09/24 17:04:20 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_IPV6_H +#define _NET_IPV6_H + +#include +#include + +/* + * NextHeader field of IPv6 header + */ + +#define NEXTHDR_HOP 0 /* Hop-by-hop option header. */ +#define NEXTHDR_TCP 6 /* TCP segment. */ +#define NEXTHDR_UDP 17 /* UDP message. */ +#define NEXTHDR_IPV6 41 /* IPv6 in IPv6 */ +#define NEXTHDR_ROUTING 43 /* Routing header. */ +#define NEXTHDR_FRAGMENT 44 /* Fragmentation/reassembly header. */ +#define NEXTHDR_ESP 50 /* Encapsulating security payload. */ +#define NEXTHDR_AUTH 51 /* Authentication header. */ +#define NEXTHDR_ICMP 58 /* ICMP for IPv6. */ +#define NEXTHDR_NONE 59 /* No next header */ +#define NEXTHDR_DEST 60 /* Destination options header. */ + +#define NEXTHDR_MAX 255 + + + +#define IPV6_DEFAULT_HOPLIMIT 64 +#define IPV6_DEFAULT_MCASTHOPS 1 + +/* + * Addr type + * + * type - unicast | multicast | anycast + * scope - local | site | global + * v4 - compat + * v4mapped + * any + * loopback + */ + +#define IPV6_ADDR_ANY 0x0000U + +#define IPV6_ADDR_UNICAST 0x0001U +#define IPV6_ADDR_MULTICAST 0x0002U +#define IPV6_ADDR_ANYCAST 0x0004U + +#define IPV6_ADDR_LOOPBACK 0x0010U +#define IPV6_ADDR_LINKLOCAL 0x0020U +#define IPV6_ADDR_SITELOCAL 0x0040U + +#define IPV6_ADDR_COMPATv4 0x0080U + +#define IPV6_ADDR_SCOPE_MASK 0x00f0U + +#define IPV6_ADDR_MAPPED 0x1000U +#define IPV6_ADDR_RESERVED 0x2000U /* reserved address space */ + +/* + * fragmentation header + */ + +struct frag_hdr { + unsigned char nexthdr; + unsigned char reserved; + unsigned short frag_off; + __u32 identification; +}; + +#ifdef __KERNEL__ + +#include + +extern struct ipv6_mib ipv6_statistics; + +extern int ipv6_forwarding; /* host/router switch */ +extern int ipv6_hop_limit; /* default hop limit */ + +struct ipv6_frag { + __u16 offset; + __u16 len; + struct sk_buff *skb; + + struct frag_hdr *fhdr; + + struct ipv6_frag *next; +}; + +/* + * Equivalent of ipv4 struct ipq + */ + +struct frag_queue { + + struct frag_queue *next; + struct frag_queue *prev; + + __u32 id; /* fragment id */ + struct timer_list timer; /* expire timer */ + struct ipv6_frag *fragments; + struct device *dev; + __u8 last_in; /* has last segment arrived? */ + __u8 nexthdr; + __u8 *nhptr; +}; + +extern int ipv6_routing_header(struct sk_buff **skb, + struct device *dev, + __u8 *nhptr, + struct ipv6_options *opt); + +extern int ipv6_reassembly(struct sk_buff **skb, + struct device *dev, + __u8 *nhptr, + struct ipv6_options *opt); + +#define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ + +/* + * Function prototype for build_xmit + */ + +typedef void (*inet_getfrag_t) (const void *data, + struct in6_addr *addr, + char *, + unsigned int, unsigned int); + + +extern int ipv6_addr_type(struct in6_addr *addr); + +extern __inline__ int ipv6_addr_cmp(struct in6_addr *a1, struct in6_addr *a2) +{ + return memcmp((void *) a1, (void *) a2, sizeof(struct in6_addr)); +} + +extern __inline__ void ipv6_addr_copy(struct in6_addr *a1, struct in6_addr *a2) +{ + memcpy((void *) a1, (void *) a2, sizeof(struct in6_addr)); +} + +#ifndef __HAVE_ARCH_ADDR_SET +extern __inline__ void ipv6_addr_set(struct in6_addr *addr, + __u32 w1, __u32 w2, + __u32 w3, __u32 w4) +{ + addr->s6_addr32[0] = w1; + addr->s6_addr32[1] = w2; + addr->s6_addr32[2] = w3; + addr->s6_addr32[3] = w4; +} +#endif + +extern __inline__ int ipv6_addr_any(struct in6_addr *a) +{ + return ((a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | a->s6_addr32[3] ) == 0); +} + +/* + * Prototypes exported by ipv6 + */ + +#if 0 +extern int ipv6_build_header(struct sk_buff *skb, + struct device *dev, + struct in6_addr *saddr_in, + struct in6_addr *daddr_in, + int proto, int len, + struct ipv6_pinfo *np); +#endif + +extern void ipv6_redo_mac_hdr(struct sk_buff *skb, + struct neighbour *neigh, + int len); + +extern int ipv6_bld_hdr_2(struct sock *sk, + struct sk_buff *skb, + struct device *dev, + struct neighbour *neigh, + struct in6_addr *saddr, + struct in6_addr *daddr, + int proto, int len); + +extern int ipv6_xmit(struct sock *sk, + struct sk_buff *skb, + struct in6_addr *saddr, + struct in6_addr *daddr, + struct ipv6_options *opt, + int proto); + +extern void ipv6_queue_xmit(struct sock *sk, + struct device *dev, + struct sk_buff *skb, + int free); + +extern int ipv6_build_xmit(struct sock *sk, + inet_getfrag_t getfrag, + const void * data, + struct in6_addr * daddr, + unsigned short int length, + struct in6_addr * saddr, + struct device *dev, + struct ipv6_options *opt, + int proto, int noblock); + +/* + * rcv function (called from netdevice level) + */ + +extern int ipv6_rcv(struct sk_buff *skb, + struct device *dev, + struct packet_type *pt); + +extern void ipv6_forward(struct sk_buff *skb, + struct device *dev, + int flags); + +#define IP6_FW_SRCRT 0x1 +#define IP6_FW_STRICT 0x2 + +/* + * Extension header (options) processing + */ +extern int ipv6opt_bld_rthdr(struct sk_buff *skb, + struct ipv6_options *opt, + struct in6_addr *addr, + int proto); + +extern int ipv6opt_srcrt_co(struct sockaddr_in6 *sin6, + int len, + struct ipv6_options *opt); + +extern int ipv6opt_srcrt_cl(struct sockaddr_in6 *sin6, + int num_addrs, + struct ipv6_options *opt); + +extern int ipv6opt_srt_tosin(struct ipv6_options *opt, + struct sockaddr_in6 *sin6, + int len); + +extern void ipv6opt_free(struct ipv6_options *opt); + + +/* + * socket lookup (af_inet6.c) + */ + +extern struct sock * inet6_get_sock(struct proto *prot, + struct in6_addr *loc_addr, + struct in6_addr *rmt_addr, + unsigned short loc_port, + unsigned short rmt_port); + +extern struct sock * inet6_get_sock_raw(struct sock *sk, + unsigned short num, + struct in6_addr *loc_addr, + struct in6_addr *rmt_addr); + +extern struct sock * inet6_get_sock_mcast(struct sock *sk, + unsigned short num, + unsigned short rmt_port, + struct in6_addr *loc_addr, + struct in6_addr *rmt_addr); + +/* + * socket options (ipv6_sockglue.c) + */ + +extern int ipv6_setsockopt(struct sock *sk, int level, + int optname, char *optval, + int optlen); +extern int ipv6_getsockopt(struct sock *sk, int level, + int optname, char *optval, + int *optlen); + + +extern void ipv6_init(void); +extern void ipv6_cleanup(void); +#endif +#endif + + + diff -u --recursive --new-file v2.1.7/linux/include/net/ipv6_route.h linux/include/net/ipv6_route.h --- v2.1.7/linux/include/net/ipv6_route.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/ipv6_route.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,196 @@ +/* + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_IPV6_ROUTE_H +#define _NET_IPV6_ROUTE_H + +#include + + +#ifdef __KERNEL__ + + +struct fib6_node { + struct fib6_node *parent; + struct fib6_node *left; + struct fib6_node *right; + + struct rt6_info *leaf; + + __u16 fn_bit; /* bit key */ + __u16 fn_flags; + __u32 fn_sernum; +}; + + +struct rt6_info; + +typedef void (*rt6_output_method_t) (struct sk_buff *skb, struct rt6_info *rt); + +struct rt6_info { + struct fib6_node *fib_node; + struct rt6_info *next; + + struct in6_addr rt_dst; + + atomic_t rt_use; /* dcache references */ + atomic_t rt_ref; /* fib references */ + + struct neighbour *rt_nexthop; + struct device *rt_dev; + + rt6_output_method_t rt_output_method; + + __u16 rt_metric; + __u16 rt_prefixlen; + __u32 rt_flags; + unsigned long rt_expires; +}; + +extern struct rt6_info *default_rt_list; +extern struct rt6_info *last_resort_rt; + +struct dest_entry { + struct rt6_info rt; + + __u32 dc_irtt; + __u32 dc_window; + __u16 dc_pmtu; + + unsigned long dc_tstamp; /* for garbage collection */ + +#define dc_addr rt.rt_dst +#define dc_usecnt rt.rt_use +#define dc_nexthop rt.rt_nexthop +#define dc_flags rt.rt_flags +}; + +/* + * Structure for assync processing of operations on the routing + * table + */ + +struct rt6_req { + int operation; + struct rt6_info *ptr; + + struct rt6_req *next; + struct rt6_req *prev; + +#define RT_OPER_ADD 1 +#define RT_OPER_DEL 2 +}; + +struct rt6_statistics { + __u32 fib_nodes; + __u32 fib_route_nodes; + __u32 fib_rt_alloc; + __u32 fib_rt_entries; + __u32 fib_dc_alloc; +}; + +#define RTN_ROOT 0x0001 /* root node */ +#define RTN_BACKTRACK 0x0002 /* backtrack point */ +#define RTN_TAG 0x0010 + +/* + * Values for destination cache garbage colection + * These are wild guesses for now... + */ + +#define DC_WATER_MARK 512 +#define DC_SHORT_TIMEOUT (5*HZ) +#define DC_LONG_TIMEOUT (15*HZ) + +#define DC_TIME_RUN (5*HZ) +#define DC_TIME_RETRY HZ + +/* + * Prototypes + */ + +/* + * check/obtain destination cache from routing table + */ + +extern struct dest_entry * ipv6_dst_check(struct dest_entry *dc, + struct in6_addr * daddr, + __u32 sernum, int flags); + +extern struct dest_entry * ipv6_dst_route(struct in6_addr * daddr, + struct device *src_dev, + int flags); + +extern void ipv6_dst_unlock(struct dest_entry *dest); + +extern struct rt6_info * fibv6_lookup(struct in6_addr *addr, + struct device *dev, + int flags); + +/* + * user space set/del route + */ + +extern int ipv6_route_ioctl(unsigned int cmd, void *arg); + + +extern void ipv6_route_init(void); +extern void ipv6_route_cleanup(void); + +extern int ipv6_route_add(struct in6_rtmsg *rt); + +extern int fib6_del_rt(struct rt6_info *rt); + +extern void rt6_sndmsg(__u32 type, struct in6_addr *dst, + struct in6_addr *gw, __u16 plen, + __u16 metric, char *devname, + __u16 flags); +/* + * ICMP interface + */ + +extern struct rt6_info * ipv6_rt_redirect(struct device *dev, + struct in6_addr *dest, + struct in6_addr *target, + int on_link); + +extern void rt6_handle_pmtu(struct in6_addr *addr, + int pmtu); +/* + * + */ + +extern struct fib6_node routing_table; +extern struct rt6_statistics rt6_stats; + +static __inline__ void rt_release(struct rt6_info *rt) +{ + atomic_dec(&rt->rt_ref); + if ((rt->rt_use | rt->rt_ref) == 0) + { + if (rt->rt_nexthop) + { + ndisc_dec_neigh(rt->rt_nexthop); + } + + if (rt->rt_flags & RTI_DCACHE) + { + rt6_stats.fib_dc_alloc--; + } + rt6_stats.fib_rt_alloc--; + kfree(rt); + } +} + +#endif + +#endif diff -u --recursive --new-file v2.1.7/linux/include/net/ndisc.h linux/include/net/ndisc.h --- v2.1.7/linux/include/net/ndisc.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/ndisc.h Thu Nov 7 19:56:49 1996 @@ -0,0 +1,184 @@ +#ifndef _NDISC_H +#define _NDISC_H + + +/* + * Neighbor Cache Entry States (7.3.2.) + */ + +/* + * The lsb is set for states that have a timer associated + */ + +#define NUD_NONE 0x00 +#define NUD_INCOMPLETE 0x11 +#define NUD_REACHABLE 0x20 +#define NUD_STALE 0x30 +#define NUD_DELAY 0x41 +#define NUD_PROBE 0x51 +#define NUD_FAILED 0x60 /* neighbour discovery failed */ + +#define NUD_IN_TIMER 0x01 + +#define NDISC_QUEUE_LEN 3 + +#define NCF_NOARP 0x01 /* no ARP needed on this device */ +#define NCF_SUBNET 0x02 /* NC entry for subnet */ +#define NCF_INVALID 0x04 +#define NCF_DELAY_EXPIRED 0x08 /* time to move to PROBE */ +#define NCF_ROUTER 0x10 /* neighbour is a router */ +#define NCF_HHVALID 0x20 /* Hardware header is valid */ + +/* + * ICMP codes for neighbour discovery messages + */ + +#define NDISC_ROUTER_SOLICITATION 133 +#define NDISC_ROUTER_ADVERTISEMENT 134 +#define NDISC_NEIGHBOUR_SOLICITATION 135 +#define NDISC_NEIGHBOUR_ADVERTISEMENT 136 +#define NDISC_REDIRECT 137 + +/* + * ndisc options + */ + +#define ND_OPT_SOURCE_LL_ADDR 1 +#define ND_OPT_TARGET_LL_ADDR 2 +#define ND_OPT_PREFIX_INFO 3 +#define ND_OPT_REDIRECT_HDR 4 +#define ND_OPT_MTU 5 + +#define MAX_RTR_SOLICITATION_DELAY HZ + +#define RECHABLE_TIME (30*HZ) +#define RETRANS_TIMER HZ + +#define MIN_RANDOM_FACTOR (1/2) +#define MAX_RANDOM_FACTOR (3/2) + +#define REACH_RANDOM_INTERVAL (60*60*HZ) /* 1 hour */ + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +/* + * neighbour cache entry + * used by neighbour discovery module + * as similar functions of "struct hh_cache" used in ipv4 + */ +struct neighbour { + struct in6_addr addr; /* next hop addr */ + __u8 len; /* prefix len */ + __u8 type; /* {unicast, multicast} */ + + struct device * dev; + + __u8 flags; + + + __u8 hh_data[MAX_ADDR_LEN]; /* cached hdr */ + __u8 *h_dest; /* dest addr */ + + struct sk_buff_head arp_queue; /* packets waiting for ND to + finish */ + atomic_t refcnt; + __u8 nud_state; + __u8 probes; + __u32 tstamp; /* last reachable conf */ + + unsigned long expires; /* timer expires at */ + + struct neighbour *next; /* for hash chaining */ + struct neighbour *prev; /* for hash chaining */ +}; + +struct nd_msg { + struct icmpv6hdr icmph; + struct in6_addr target; + struct { + __u8 opt_type; + __u8 opt_len; + __u8 link_addr[MAX_ADDR_LEN]; + } opt; +}; + +struct ra_msg { + struct icmpv6hdr icmph; + __u32 reachable_time; + __u32 retrans_timer; +}; + +struct ndisc_statistics { + __u32 allocs; /* allocated entries */ + __u32 free_delayed; /* zombie entries */ + __u32 snt_probes_ucast; /* ns probes sent (ucast) */ + __u32 snt_probes_mcast; /* ns probes sent (mcast) */ + __u32 rcv_probes_ucast; /* ns probes rcv (ucast) */ + __u32 rcv_probes_mcast; /* ns probes rcv (mcast) */ + __u32 rcv_upper_conf; /* confirmations from upper layers */ + __u32 res_failed; /* address resolution failures */ +}; + +extern struct neighbour * ndisc_get_neigh(struct device *dev, + struct in6_addr *addr); + +extern void ndisc_validate(struct neighbour *neigh); + +extern void ndisc_init(struct proto_ops *ops); +extern void ndisc_cleanup(void); + +extern int ndisc_eth_resolv(unsigned char *, + struct device *, + struct sk_buff *); + +extern int ndisc_rcv(struct sk_buff *skb, + struct device *dev, + struct in6_addr *saddr, + struct in6_addr *daddr, + struct ipv6_options *opt, + unsigned short len); + +extern void ndisc_event_send(struct neighbour *neigh, + struct sk_buff *skb); + +extern void ndisc_send_ns(struct device *dev, + struct neighbour *neigh, + struct in6_addr *solicit, + struct in6_addr *daddr, + struct in6_addr *saddr); + +extern void ndisc_send_rs(struct device *dev, + struct in6_addr *saddr, + struct in6_addr *daddr); + +extern int (*ndisc_eth_hook) (unsigned char *, + struct device *, + struct sk_buff *); + +extern void ndisc_forwarding_on(void); +extern void ndisc_forwarding_off(void); + +extern void ndisc_send_redirect(struct sk_buff *skb, + struct neighbour *neigh, + struct in6_addr *target); + +struct rt6_info * dflt_rt_lookup(void); + +extern unsigned long nd_rand_seed; +extern int ipv6_random(void); + + +static __inline__ void ndisc_dec_neigh(struct neighbour *neigh) +{ + atomic_dec(&neigh->refcnt); +} + +#endif /* __KERNEL__ */ + + +#endif diff -u --recursive --new-file v2.1.7/linux/include/net/netlink.h linux/include/net/netlink.h --- v2.1.7/linux/include/net/netlink.h Mon Sep 30 11:25:08 1996 +++ linux/include/net/netlink.h Thu Nov 7 19:57:13 1996 @@ -2,9 +2,10 @@ #define __NET_NETLINK_H #define NET_MAJOR 36 /* Major 18 is reserved for networking */ -#define MAX_LINKS 11 /* 18,0 for route updates, 18,1 for SKIP, 18,2 debug tap 18,3 PPP reserved */ +#define MAX_LINKS 12 /* 18,0 for route updates, 18,1 for SKIP, 18,2 debug tap 18,3 PPP reserved */ /* 4-7 are psi0-psi3 8 is arpd 9 is ppp */ /* 10 is for IPSEC */ + /* 11 IPv6 route updates */ #define MAX_QBYTES 32768 /* Maximum bytes in the queue */ #include @@ -23,6 +24,7 @@ #define NETLINK_ARPD 8 #define NETLINK_NET_PPP 9 /* Non tty PPP devices */ #define NETLINK_IPSEC 10 /* IPSEC */ +#define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ #ifdef CONFIG_RTNETLINK extern void ip_netlink_msg(unsigned long, __u32, __u32, __u32, short, short, char *); diff -u --recursive --new-file v2.1.7/linux/include/net/protocol.h linux/include/net/protocol.h --- v2.1.7/linux/include/net/protocol.h Sun Feb 11 13:28:37 1996 +++ linux/include/net/protocol.h Thu Nov 7 19:53:38 1996 @@ -18,11 +18,18 @@ * Alan Cox : Added a name field and a frag handler * field for later. * Alan Cox : Cleaned up, and sorted types. + * Pedro Roque : inet6 protocols */ #ifndef _PROTOCOL_H #define _PROTOCOL_H +#include +#include +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include +#endif + #define MAX_INET_PROTOS 32 /* Must be a power of 2 */ @@ -33,8 +40,7 @@ unsigned short len, __u32 saddr, int redo, struct inet_protocol *protocol); void (*err_handler)(int type, int code, unsigned char *buff, - __u32 daddr, - __u32 saddr, + __u32 info, __u32 daddr, __u32 saddr, struct inet_protocol *protocol); struct inet_protocol *next; unsigned char protocol; @@ -43,13 +49,41 @@ const char *name; }; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +struct inet6_protocol { + int (*handler)(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, + struct in6_addr *daddr, + struct ipv6_options *opt, + unsigned short len, + int redo, struct inet6_protocol *protocol); + + void (*err_handler)(int type, int code, unsigned char *buff, + __u32 info, struct in6_addr *saddr, + struct in6_addr *daddr, + struct inet6_protocol *protocol); + struct inet6_protocol *next; + unsigned char protocol; + unsigned char copy:1; + void *data; + const char *name; +}; +#endif extern struct inet_protocol *inet_protocol_base; extern struct inet_protocol *inet_protos[MAX_INET_PROTOS]; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +extern struct inet6_protocol *inet6_protocol_base; +extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; +#endif extern void inet_add_protocol(struct inet_protocol *prot); extern int inet_del_protocol(struct inet_protocol *prot); +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +extern void inet6_add_protocol(struct inet6_protocol *prot); +extern int inet6_del_protocol(struct inet6_protocol *prot); +#endif #endif /* _PROTOCOL_H */ diff -u --recursive --new-file v2.1.7/linux/include/net/rawv6.h linux/include/net/rawv6.h --- v2.1.7/linux/include/net/rawv6.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/rawv6.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,21 @@ +#ifndef _NET_RAWV6_H +#define _NET_RAWV6_H + +#ifdef __KERNEL__ +extern int rawv6_rcv(struct sk_buff *skb, + struct device *dev, + struct in6_addr *saddr, + struct in6_addr *daddr, + struct ipv6_options *opt, + unsigned short len); + + +extern void rawv6_err(struct sock *sk, + int type, int code, + unsigned char *buff, + struct in6_addr *saddr, + struct in6_addr *daddr); + +#endif + +#endif diff -u --recursive --new-file v2.1.7/linux/include/net/sit.h linux/include/net/sit.h --- v2.1.7/linux/include/net/sit.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/sit.h Sun Nov 3 11:04:42 1996 @@ -0,0 +1,39 @@ +/* + * SIT tunneling device - definitions + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SIT_H +#define _NET_SIT_H + +struct sit_mtu_info { + __u32 addr; /* IPv4 destination */ + unsigned long tstamp; /* last use tstamp */ + __u32 mtu; /* Path MTU */ + struct sit_mtu_info *next; +}; + +struct sit_vif { + char name[8]; + struct device *dev; + struct sit_vif *next; +}; + +extern int sit_init(void); +extern void sit_cleanup(void); + +extern struct device * sit_add_tunnel(__u32 dstaddr); + +#define SIT_GC_TIMEOUT (3*60*HZ) +#define SIT_GC_FREQUENCY (2*60*HZ) + +#endif diff -u --recursive --new-file v2.1.7/linux/include/net/snmp.h linux/include/net/snmp.h --- v2.1.7/linux/include/net/snmp.h Tue Jun 6 11:22:18 1995 +++ linux/include/net/snmp.h Sun Nov 3 11:04:42 1996 @@ -48,6 +48,26 @@ unsigned long IpFragCreates; }; +struct ipv6_mib +{ + unsigned long Ip6InReceives; + unsigned long Ip6InHdrErrors; + unsigned long Ip6InAddrErrors; + unsigned long Ip6ForwDatagrams; + unsigned long Ip6InUnknownProtos; + unsigned long Ip6InDiscards; + unsigned long Ip6InDelivers; + unsigned long Ip6OutRequests; + unsigned long Ip6OutDiscards; + unsigned long Ip6OutNoRoutes; + unsigned long Ip6ReasmTimeout; + unsigned long Ip6ReasmReqds; + unsigned long Ip6ReasmOKs; + unsigned long Ip6ReasmFails; + unsigned long Ip6FragOKs; + unsigned long Ip6FragFails; + unsigned long Ip6FragCreates; +}; struct icmp_mib { diff -u --recursive --new-file v2.1.7/linux/include/net/sock.h linux/include/net/sock.h --- v2.1.7/linux/include/net/sock.h Thu Oct 10 19:10:57 1996 +++ linux/include/net/sock.h Thu Nov 7 19:53:38 1996 @@ -31,9 +31,18 @@ #ifndef _SOCK_H #define _SOCK_H +#include #include #include /* struct options */ #include /* struct sockaddr_in */ + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include /* struct sockaddr_in6 */ +#include /* dest_cache, inet6_options */ +#include +#include /* struct ipv6_mc_socklist */ +#endif + #include /* struct tcphdr */ #include @@ -116,7 +125,53 @@ }; #endif -#ifdef CONFIG_NUTCP +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +struct ipv6_pinfo +{ + struct in6_addr saddr; + struct in6_addr rcv_saddr; + struct in6_addr daddr; + + __u32 flow_lbl; + __u8 priority; + __u8 hop_limit; + + __u8 mcast_hops; + + /* sockopt flags */ + + __u8 recvsrcrt:1, + rxinfo:1, + mc_loop:1, + unused:4; + + /* device for outgoing mcast packets */ + + struct device *mc_if; + + struct ipv6_mc_socklist *ipv6_mc_list; + /* + * destination cache entry pointer + * contains a pointer to neighbour cache + * and other info related to network level + * (ex. PMTU) + */ + + struct dest_entry *dest; + __u32 dc_sernum; + + struct ipv6_options *opt; +}; + +struct raw6_opt { + __u32 checksum; /* perform checksum */ + __u32 offset; /* checksum offset */ + + struct icmp6_filter filter; +}; + +#endif /* IPV6 */ + struct tcp_opt { /* @@ -132,6 +187,14 @@ __u32 snd_up; /* Outgoing urgent pointer */ __u32 snd_wl1; /* Sequence for window update */ __u32 snd_wl2; /* Ack sequence for update */ + + __u32 rcv_wup; /* rcv_nxt on last window update sent */ + + + __u32 srtt; /* smothed round trip time << 3 */ + __u32 mdev; /* medium deviation */ + __u32 rto; /* retransmit timeout */ + __u32 backoff; /* backoff */ /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ @@ -145,14 +208,85 @@ struct timer_list completion_timer; /* Up/Down timer */ struct timer_list probe_timer; /* Probes */ struct timer_list retransmit_timer; /* Resend (no ack) */ + + __u32 basertt; /* Vegas baseRTT */ + + __u8 delayed_acks; + __u8 dup_acks; + + __u32 lrcvtime; /* timestamp of last received data packet */ + __u32 rcv_tstamp; /* timestamp of last received packet */ + __u32 iat_mdev; /* interarrival time medium deviation */ + __u32 iat; /* interarrival time */ + __u32 ato; /* delayed ack timeout */ + + __u32 high_seq; +/* + * new send pointers + */ + struct sk_buff * send_head; + struct sk_buff * retrans_head; /* retrans head can be + * different to the head of + * write queue if we are doing + * fast retransmit + */ +/* + * pending events + */ + __u8 pending; + +/* + * Header prediction flags + * 0x5?10 << 16 + snd_wnd in net byte order + */ + __u32 pred_flags; + __u32 snd_wnd; /* The window we expect to receive */ + + __u32 probes_out; /* unanswered 0 window probes */ + + struct open_request *syn_wait_queue; + struct tcp_func *af_specific; }; -#endif + /* * This structure really needs to be cleaned up. * Most of it is for TCP, and not used by any of * the other protocols. */ + +/* + * The idea is to start moving to a newer struct gradualy + * + * IMHO the newer struct should have the following format: + * + * struct sock { + * sockmem [mem, proto, callbacks] + * + * union or struct { + * netrom; + * ax_25; + * } ll_pinfo; + * + * union { + * ipv4; + * ipv6; + * ipx; + * } net_pinfo; + * + * union { + * tcp; + * udp; + * spx; + * } tp_pinfo; + * + * } + */ + +/* + * TCP will start to use the new protinfo while *still using the old* fields + */ + struct sock { struct options *opt; @@ -160,17 +294,15 @@ atomic_t rmem_alloc; unsigned long allocation; /* Allocation mode */ __u32 write_seq; - __u32 sent_seq; - __u32 acked_seq; __u32 copied_seq; - __u32 rcv_ack_seq; - unsigned short rcv_ack_cnt; /* count of same ack */ - __u32 window_seq; __u32 fin_seq; __u32 syn_seq; __u32 urg_seq; __u32 urg_data; int users; /* user count */ + + unsigned char delayed_acks, + dup_acks; /* * Not all are volatile, but some are, so we * might as well say they all are. @@ -183,7 +315,6 @@ reuse, keepopen, linger, - delay_acks, destroy, ack_timed, no_check, @@ -196,52 +327,68 @@ struct sock *next; struct sock *prev; /* Doubly linked chain.. */ struct sock *pair; - struct sk_buff * volatile send_head; - struct sk_buff * volatile send_next; - struct sk_buff * volatile send_tail; + + struct sk_buff * send_head; + struct sk_buff * send_tail; + struct sk_buff_head back_log; struct sk_buff *partial; struct timer_list partial_timer; - long retransmits; + atomic_t retransmits; + struct sk_buff_head write_queue, - receive_queue; + receive_queue, + out_of_order_queue; + + unsigned short family; struct proto *prot; struct wait_queue **sleep; + __u32 daddr; __u32 saddr; /* Sending source */ __u32 rcv_saddr; /* Bound address */ + unsigned short max_unacked; - unsigned short window; - __u32 lastwin_seq; /* sequence number when we last updated the window we offer */ - __u32 high_seq; /* sequence number when we did current fast retransmit */ - volatile unsigned long ato; /* ack timeout */ - volatile unsigned long lrcvtime; /* jiffies at last data rcv */ - volatile unsigned long idletime; /* jiffies at last rcv */ + + unsigned short bytes_rcv; /* * mss is min(mtu, max_window) */ unsigned short mtu; /* mss negotiated in the syn's */ - volatile unsigned short mss; /* current eff. mss - can change */ - volatile unsigned short user_mss; /* mss requested by user in ioctl */ - volatile unsigned short max_window; + unsigned short mss; /* current eff. mss - can change */ + unsigned short user_mss; /* mss requested by user in ioctl */ + unsigned short max_window; unsigned long window_clamp; unsigned int ssthresh; unsigned short num; - volatile unsigned short cong_window; - volatile unsigned short cong_count; - volatile unsigned short packets_out; - volatile unsigned short shutdown; - volatile unsigned long rtt; - volatile unsigned long mdev; - volatile unsigned long rto; + unsigned short cong_window; + unsigned short cong_count; + atomic_t packets_out; + unsigned short shutdown; + + unsigned short window; /* used by netrom/ax.25 */ + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + union { + struct ipv6_pinfo af_inet6; + } net_pinfo; +#endif + + union { + struct tcp_opt af_tcp; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + struct raw6_opt tp_raw; +#endif + } tp_pinfo; /* * currently backoff isn't used, but I'm maintaining it in case * we want to go back to a backoff formula that needs it */ - - volatile unsigned short backoff; +/* + unsigned short backoff; + */ int err, err_soft; /* Soft holds errors that don't cause failure but are the cause of a persistent failure not just @@ -252,8 +399,8 @@ unsigned char max_ack_backlog; unsigned char priority; unsigned char debug; - unsigned short rcvbuf; - unsigned short sndbuf; + int rcvbuf; + int sndbuf; unsigned short type; unsigned char localroute; /* Route locally only */ #ifdef CONFIG_AX25 @@ -328,63 +475,82 @@ void (*data_ready)(struct sock *sk,int bytes); void (*write_space)(struct sock *sk); void (*error_report)(struct sock *sk); - + + int (*backlog_rcv) (struct sock *sk, + struct sk_buff *skb); }; +#if 0 +/* + * Inet protocol options + */ +struct inet_options { + __u8 version; + union { + struct options opt_v4; + struct ipv6_options opt_v6; + } u; +}; +#endif + /* * IP protocol blocks we attach to sockets. + * socket layer -> transport layer interface + * transport -> network interface is defined by struct inet_proto */ struct proto { - void (*close)(struct sock *sk, unsigned long timeout); - int (*build_header)(struct sk_buff *skb, - __u32 saddr, - __u32 daddr, - struct device **dev, int type, - struct options *opt, int len, - int tos, int ttl, struct rtable ** rp); + void (*close)(struct sock *sk, + unsigned long timeout); int (*connect)(struct sock *sk, - struct sockaddr_in *usin, int addr_len); + struct sockaddr *uaddr, + int addr_len); + struct sock * (*accept) (struct sock *sk, int flags); - void (*queue_xmit)(struct sock *sk, - struct device *dev, struct sk_buff *skb, - int free); void (*retransmit)(struct sock *sk, int all); void (*write_wakeup)(struct sock *sk); void (*read_wakeup)(struct sock *sk); - int (*rcv)(struct sk_buff *buff, struct device *dev, - struct options *opt, __u32 daddr, - unsigned short len, __u32 saddr, - int redo, struct inet_protocol *protocol); + int (*select)(struct sock *sk, int which, select_table *wait); + int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); int (*init)(struct sock *sk); + int (*destroy)(struct sock *sk); void (*shutdown)(struct sock *sk, int how); - int (*setsockopt)(struct sock *sk, int level, int optname, - char *optval, int optlen); - int (*getsockopt)(struct sock *sk, int level, int optname, - char *optval, int *option); - int (*sendmsg)(struct sock *sk, struct msghdr *msg, int len, - int noblock, int flags); - int (*recvmsg)(struct sock *sk, struct msghdr *msg, int len, - int noblock, int flags, int *addr_len); - int (*bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len); + int (*setsockopt)(struct sock *sk, int level, + int optname, char *optval, int optlen); + int (*getsockopt)(struct sock *sk, int level, + int optname, char *optval, + int *option); + int (*sendmsg)(struct sock *sk, struct msghdr *msg, + int len, int noblock, int flags); + int (*recvmsg)(struct sock *sk, struct msghdr *msg, + int len, int noblock, int flags, + int *addr_len); + int (*bind)(struct sock *sk, + struct sockaddr *uaddr, int addr_len); + + int (*backlog_rcv) (struct sock *sk, + struct sk_buff *skb); + unsigned short max_header; unsigned long retransmits; char name[32]; int inuse, highestinuse; - struct sock * sock_array[SOCK_ARRAY_SIZE]; + struct sock ** sock_array; }; -#define TIME_WRITE 1 -#define TIME_CLOSE 2 -#define TIME_KEEPOPEN 3 -#define TIME_DESTROY 4 -#define TIME_DONE 5 /* Used to absorb those last few packets */ -#define TIME_PROBE0 6 +#define TIME_WRITE 1 /* Not yet used */ +#define TIME_RETRANS 2 /* Retransmit timer */ +#define TIME_DACK 3 /* Delayed ack timer */ +#define TIME_CLOSE 4 +#define TIME_KEEPOPEN 5 +#define TIME_DESTROY 6 +#define TIME_DONE 7 /* Used to absorb those last few packets */ +#define TIME_PROBE0 8 /* * About 10 seconds @@ -445,13 +611,32 @@ __release_sock(sk); } +/* + * This might not be the most apropriate place for this two + * but since they are used by a lot of the net related code + * at least they get declared on a include that is common to all + */ + +static __inline__ int min(unsigned int a, unsigned int b) +{ + if (a > b) + a = b; + return a; +} + +static __inline__ int max(unsigned int a, unsigned int b) +{ + if (a < b) + a = b; + return a; +} extern struct sock * sk_alloc(int priority); extern void sk_free(struct sock *sk); extern void destroy_sock(struct sock *sk); extern unsigned short get_new_socknum(struct proto *, unsigned short); -extern void put_sock(unsigned short, struct sock *); +extern void inet_put_sock(unsigned short, struct sock *); extern struct sock *get_sock(struct proto *, unsigned short, unsigned long, unsigned short, unsigned long, @@ -537,8 +722,8 @@ extern struct sock *timer_base; -extern void delete_timer (struct sock *); -extern void reset_timer (struct sock *, int, unsigned long); +extern void net_delete_timer (struct sock *); +extern void net_reset_timer (struct sock *, int, unsigned long); extern void net_timer (unsigned long); diff -u --recursive --new-file v2.1.7/linux/include/net/tcp.h linux/include/net/tcp.h --- v2.1.7/linux/include/net/tcp.h Wed Oct 9 08:55:24 1996 +++ linux/include/net/tcp.h Thu Nov 7 19:56:46 1996 @@ -18,24 +18,33 @@ #ifndef _TCP_H #define _TCP_H +#include #include #include + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#define NETHDR_SIZE sizeof(struct ipv6hdr) +#else +#define NETHDR_SIZE sizeof(struct iphdr) + 40 +#endif + /* * 40 is maximal IP options size * 4 is TCP option size (MSS) */ -#define MAX_SYN_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + 4 + MAX_HEADER + 15) -#define MAX_FIN_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15) -#define MAX_ACK_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15) -#define MAX_RESET_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15) + +#define MAX_SYN_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + 4 + MAX_HEADER + 15) +#define MAX_FIN_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15) +#define MAX_ACK_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15) +#define MAX_RESET_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15) #define MAX_WINDOW 32767 /* Never offer a window over 32767 without using window scaling (not yet supported). Some poor stacks do signed 16bit maths! */ #define MIN_WINDOW 2048 #define MAX_ACK_BACKLOG 2 -#define MAX_DUP_ACKS 2 +#define MAX_DELAY_ACK 2 #define MIN_WRITE_SPACE 2048 #define TCP_WINDOW_DIFF 2048 @@ -58,7 +67,8 @@ #define TCP_TIMEOUT_LEN (15*60*HZ) /* should be about 15 mins */ #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to successfully * close the socket, about 60 seconds */ -#define TCP_FIN_TIMEOUT (3*60*HZ) /* BSD style FIN_WAIT2 deadlock breaker */ +#define TCP_FIN_TIMEOUT (3*60*HZ) /* BSD style FIN_WAIT2 deadlock breaker */ + #define TCP_ACK_TIME (3*HZ) /* time to delay before sending an ACK */ #define TCP_DONE_TIME (5*HZ/2)/* maximum time to wait before actually * destroying a socket */ @@ -70,10 +80,13 @@ #define TCP_PROBEWAIT_LEN (1*HZ)/* time to wait between probes when * I've got something to write and * there is no window */ - +#define TCP_KEEPALIVE_TIME (180*60*HZ) /* two hours */ +#define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ +#define TCP_KEEPALIVE_PERIOD (75*HZ) /* period of keepalive check */ #define TCP_NO_CHECK 0 /* turn to one if you want the default * to be no checksum */ +#define TCP_SYNACK_PERIOD (HZ/2) /* * TCP option @@ -88,6 +101,115 @@ #define TCPOPT_WINDOW 3 /* Window scaling */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ +/* + * TCP option lengths + */ + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_TIMESTAMP 10 + + +/* + * TCP Vegas constants + */ + +#define TCP_VEGAS_ALPHA 2 /* v_cong_detect_top_nseg */ +#define TCP_VEGAS_BETA 4 /* v_cong_detect_bot_nseg */ +#define TCP_VEGAS_GAMMA 1 /* v_exp_inc_nseg */ + +struct open_request; + +struct or_calltable { + void (*rtx_syn_ack) (struct sock *sk, struct open_request *req); + void (*destructor) (struct open_request *req); +}; + +struct open_request { + struct open_request *dl_next; + struct open_request *dl_prev; + __u32 rcv_isn; + __u32 snt_isn; + __u16 mss; + __u16 rmt_port; + unsigned long expires; + int retrans; + struct or_calltable *class; + struct sock *sk; +}; + +struct tcp_v4_open_req { + struct open_request req; + __u32 loc_addr; + __u32 rmt_addr; + struct options *opt; +}; + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +struct tcp_v6_open_req { + struct open_request req; + struct in6_addr loc_addr; + struct in6_addr rmt_addr; + struct ipv6_options *opt; + struct device *dev; +}; +#endif + +/* + * Pointers to address related TCP functions + * (i.e. things that depend on the address family) + */ + +struct tcp_func { + int (*build_net_header) (struct sock *sk, + struct sk_buff *skb); + + void (*queue_xmit) (struct sock *sk, + struct device *dev, + struct sk_buff *skb, + int free); + + void (*send_check) (struct sock *sk, + struct tcphdr *th, + int len, + struct sk_buff *skb); + + int (*rebuild_header) (struct sock *sk, + struct sk_buff *skb); + + int (*conn_request) (struct sock *sk, + struct sk_buff *skb, + void *opt, + __u32 isn); + + struct sock * (*syn_recv_sock) (struct sock *sk, + struct sk_buff *skb, + struct open_request *req); + + __u32 (*init_sequence) (struct sock *sk, + struct sk_buff *skb); + + struct sock * (*get_sock) (struct sk_buff *skb, + struct tcphdr *th); + + int (*setsockopt) (struct sock *sk, + int level, + int optname, + char *optval, + int optlen); + + int (*getsockopt) (struct sock *sk, + int level, + int optname, + char *optval, + int *optlen); + + + void (*addr2sockaddr) (struct sock *sk, + struct sockaddr *); + + int sockaddr_len; +}; /* * The next routines deal with comparing 32 bit unsigned ints @@ -111,40 +233,95 @@ return (after(seq1+1, seq2) && before(seq1, seq3+1)); } -static __inline__ int min(unsigned int a, unsigned int b) -{ - if (a > b) - a = b; - return a; -} - -static __inline__ int max(unsigned int a, unsigned int b) -{ - if (a < b) - a = b; - return a; -} extern struct proto tcp_prot; extern struct tcp_mib tcp_statistics; -extern void tcp_err(int type, int code, unsigned char *header, __u32 daddr, - __u32, struct inet_protocol *protocol); -extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_rcv(struct sk_buff *skb, struct device *dev, - struct options *opt, __u32 daddr, - unsigned short len, __u32 saddr, int redo, - struct inet_protocol *protocol); +extern void tcp_v4_err(int type, int code, + unsigned char *header, __u32 info, + __u32 daddr, __u32 saddr, + struct inet_protocol *protocol); + +extern void tcp_shutdown (struct sock *sk, int how); + +extern int tcp_v4_rcv(struct sk_buff *skb, + struct device *dev, + struct options *opt, __u32 daddr, + unsigned short len, __u32 saddr, + int redo, + struct inet_protocol *protocol); + +extern int tcp_do_sendmsg(struct sock *sk, + int iovlen, struct iovec *iov, + int len, int nonblock, + int flags); + +extern int tcp_ioctl(struct sock *sk, + int cmd, + unsigned long arg); + +extern int tcp_rcv_state_process(struct sock *sk, + struct sk_buff *skb, + struct tcphdr *th, + void *opt, __u16 len); + +extern void tcp_rcv_established(struct sock *sk, + struct sk_buff *skb, + struct tcphdr *th, + __u16 len); + +extern void tcp_close(struct sock *sk, + unsigned long timeout); +extern struct sock * tcp_accept(struct sock *sk, int flags); +extern int tcp_select(struct sock *sk, int sel_type, + select_table *wait); +extern int tcp_getsockopt(struct sock *sk, int level, + int optname, char *optval, + int *optlen); +extern int tcp_setsockopt(struct sock *sk, int level, + int optname, char *optval, + int optlen); +extern void tcp_set_keepalive(struct sock *sk, int val); +extern int tcp_recvmsg(struct sock *sk, + struct msghdr *msg, + int len, int nonblock, + int flags, int *addr_len); + +extern int tcp_parse_options(struct tcphdr *th); + +/* + * TCP v4 functions exported for the inet6 API + */ + +extern int tcp_v4_rebuild_header(struct sock *sk, + struct sk_buff *skb); + +extern int tcp_v4_build_header(struct sock *sk, + struct sk_buff *skb); + +extern void tcp_v4_send_check(struct sock *sk, + struct tcphdr *th, int len, + struct sk_buff *skb); + +extern int tcp_v4_conn_request(struct sock *sk, + struct sk_buff *skb, + void *ptr, __u32 isn); + +extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, + struct sk_buff *skb, + struct open_request *req); + +extern int tcp_v4_backlog_rcv(struct sock *sk, + struct sk_buff *skb); +extern int tcp_v4_connect(struct sock *sk, + struct sockaddr *uaddr, + int addr_len); -extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern void tcp_read_wakeup(struct sock *); extern void tcp_write_xmit(struct sock *); extern void tcp_time_wait(struct sock *); -extern void tcp_retransmit(struct sock *, int); extern void tcp_do_retransmit(struct sock *, int); -extern void tcp_send_check(struct tcphdr *th, unsigned long saddr, - unsigned long daddr, int len, struct sk_buff *skb); /* tcp_output.c */ @@ -152,16 +329,10 @@ extern void tcp_send_partial(struct sock *); extern void tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); -extern void tcp_send_synack(struct sock *, struct sock *, struct sk_buff *); -extern void tcp_send_skb(struct sock *, struct sk_buff *); +extern int tcp_send_synack(struct sock *); +extern int tcp_send_skb(struct sock *, struct sk_buff *); extern void tcp_send_ack(struct sock *sk); -extern void tcp_send_delayed_ack(struct sock *sk, int max_timeout, unsigned long timeout); -extern void tcp_send_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th, - struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl); - -extern void tcp_enqueue_partial(struct sock *, struct sk_buff *); -extern struct sk_buff * tcp_dequeue_partial(struct sock *); -extern void tcp_shrink_skb(struct sock *,struct sk_buff *,u32); +extern void tcp_send_delayed_ack(struct sock *sk, int max_timeout); /* tcp_input.c */ extern void tcp_cache_zap(void); @@ -170,44 +341,152 @@ extern int tcp_chkaddr(struct sk_buff *); /* tcp_timer.c */ -#define tcp_reset_msl_timer(x,y,z) reset_timer(x,y,z) +#define tcp_reset_msl_timer(x,y,z) net_reset_timer(x,y,z) extern void tcp_reset_xmit_timer(struct sock *, int, unsigned long); -extern void tcp_delack_timer(unsigned long); +extern void tcp_clear_xmit_timer(struct sock *, int); +extern int tcp_timer_is_set(struct sock *, int); +extern void tcp_init_xmit_timers(struct sock *); +extern void tcp_clear_xmit_timers(struct sock *); + extern void tcp_retransmit_timer(unsigned long); +extern void tcp_delack_timer(unsigned long); +extern void tcp_probe_timer(unsigned long); -static __inline__ int tcp_old_window(struct sock * sk) -{ - return sk->window - (sk->acked_seq - sk->lastwin_seq); -} -extern int tcp_new_window(struct sock *); +/* + * TCP slow timer + */ +extern struct timer_list tcp_slow_timer; + +struct tcp_sl_timer { + atomic_t count; + unsigned long period; + unsigned long last; + void (*handler) (unsigned long); +}; + +#define TCP_SLT_SYNACK 0 +#define TCP_SLT_KEEPALIVE 1 +#define TCP_SLT_MAX 2 +extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX]; + /* - * Return true if we should raise the window when we - * have cleaned up the receive queue. We don't want to - * do this normally, only if it makes sense to avoid - * zero window probes.. - * - * We do this only if we can raise the window noticeably. + * This function returns the amount that we can raise the + * usable window based on the following constraints + * + * 1. The window can never be shrunk once it is offered (RFC 793) + * 2. We limit memory per socket */ -static __inline__ int tcp_raise_window(struct sock * sk) + +static __inline__ unsigned short tcp_raise_window(struct sock *sk) { - int new = tcp_new_window(sk); - return new && (new >= 2*tcp_old_window(sk)); + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + long free_space = sock_rspace(sk); + long window; + + if (free_space > 1024) + free_space &= ~0x3FF; + + if(sk->window_clamp) + free_space = min(sk->window_clamp, free_space); + + /* + * compute the actual window i.e. + * old_window - received_bytes_on_that_win + */ + + + window = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup); + + + /* + * We need to send an ack right away if + * our rcv window is blocking the sender and + * we have more free space to offer. + */ + + if (window < (sk->mss << 1) && free_space > window) + return 1; + + return 0; } static __inline__ unsigned short tcp_select_window(struct sock *sk) { - int window = tcp_new_window(sk); - int oldwin = tcp_old_window(sk); + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + long free_space = sock_rspace(sk); + long window; + + if (sk->window_clamp) + free_space = min(sk->window_clamp, free_space); + + + /* + * compute the actual window i.e. + * old_window - received_bytes_on_that_win + */ + + window = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup); + + if ( window < 0 ) + { + window = 0; + printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n", + tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup); + } + + /* + * RFC 1122: + * "the suggested [SWS] avoidance algoritm for the receiver is to keep + * RECV.NEXT + RCV.WIN fixed until: + * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" + * + * i.e. don't raise the right edge of the window until you can't raise + * it MSS bytes + */ + + /* + * It would be a good idea if it didn't break header prediction. + * and BSD made the header predition standard... + * It expects the same value in the header i.e. th->window to be + * constant [in fact it's a good idea but they could document it + * couldn't they ?] [PR]. + */ + + /* + * If the actual window is blocking the sender then try + * to raise it. + */ + + if (window < (sk->mss << 1)) + { + long usable; + + usable = free_space - window; + + if (usable < 0) + { + /* shouldn't happen */ + usable = 0; + } - /* Don't allow a shrinking window */ - if (window > oldwin) { - sk->window = window; - sk->lastwin_seq = sk->acked_seq; - oldwin = window; + tp->rcv_wnd += (min(usable, sk->mss) + 0x3FF) & ~0x3FF; } - return oldwin; + +#if 0 + if (tp->rcv_wnd > free_space) + { + tp->rcv_wnd = free_space & ~0x3FF; + } +#endif + if (tp->rcv_wnd < window) + { + tp->rcv_wnd = (window + 0x3FF) & ~0x3FF; + } + + tp->rcv_wup = tp->rcv_nxt; + return tp->rcv_wnd; } /* @@ -227,12 +506,14 @@ /* * Calculate(/check) TCP checksum */ -static __inline__ u16 tcp_check(struct tcphdr *th, int len, - unsigned long saddr, unsigned long daddr, unsigned long base) +static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len, + unsigned long saddr, unsigned long daddr, + unsigned long base) { return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); } + #undef STATE_TRACE #ifdef STATE_TRACE @@ -245,6 +526,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; int oldstate = sk->state; sk->state = state; @@ -264,7 +546,7 @@ case TCP_CLOSE: tcp_cache_zap(); /* Should be about 2 rtt's */ - reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME)); + net_reset_timer(sk, TIME_DONE, min(tp->srtt * 2, TCP_DONE_TIME)); /* fall through */ default: if (oldstate==TCP_ESTABLISHED) @@ -272,4 +554,79 @@ } } +extern __inline__ void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req) +{ + if (req->dl_next == req) + { + tp->syn_wait_queue = NULL; + } + else + { + req->dl_prev->dl_next = req->dl_next; + req->dl_next->dl_prev = req->dl_prev; + + if (tp->syn_wait_queue == req) + { + tp->syn_wait_queue = req->dl_next; + } + } + + req->dl_prev = req->dl_next = NULL; +} + +extern __inline__ void tcp_synq_queue(struct tcp_opt *tp, struct open_request *req) +{ + if (!tp->syn_wait_queue) + { + req->dl_next = req; + req->dl_prev = req; + tp->syn_wait_queue = req; + } + else + { + struct open_request *list = tp->syn_wait_queue; + + req->dl_next = list; + req->dl_prev = list->dl_prev; + list->dl_prev->dl_next = req; + list->dl_prev = req; + } + +} + +extern __inline__ void tcp_inc_slow_timer(int timer) +{ + struct tcp_sl_timer *slt = &tcp_slt_array[timer]; + + if (slt->count == 0) + { + unsigned long now = jiffies; + unsigned long when; + unsigned long next; + + slt->last = now; + + when = now + slt->period; + next = del_timer(&tcp_slow_timer); + + if (next && ((long)(next - when) < 0)) + { + when = next; + } + + tcp_slow_timer.expires = when; + add_timer(&tcp_slow_timer); + } + + atomic_inc(&slt->count); +} + +extern __inline__ void tcp_dec_slow_timer(int timer) +{ + struct tcp_sl_timer *slt = &tcp_slt_array[timer]; + + atomic_dec(&slt->count); +} + #endif /* _TCP_H */ + diff -u --recursive --new-file v2.1.7/linux/include/net/transp_v6.h linux/include/net/transp_v6.h --- v2.1.7/linux/include/net/transp_v6.h Thu Jan 1 02:00:00 1970 +++ linux/include/net/transp_v6.h Thu Nov 7 19:58:14 1996 @@ -0,0 +1,42 @@ +#ifndef _TRANSP_V6_H +#define _TRANSP_V6_H + +#include + +/* + * IPv6 transport protocols + */ + +#ifdef __KERNEL__ + +extern struct proto rawv6_prot; +extern struct proto udpv6_prot; +extern struct proto tcpv6_prot; + +extern void rawv6_init(void); +extern void udpv6_init(void); +extern void tcpv6_init(void); + +extern int udpv6_connect(struct sock *sk, + struct sockaddr *uaddr, + int addr_len); + +extern int datagram_recv_ctl(struct sock *sk, + struct msghdr *msg, + struct sk_buff *skb); + +extern int datagram_send_ctl(struct msghdr *msg, + struct device **src_dev, + struct in6_addr **src_addr, + struct ipv6_options *opt); + +#define LOOPBACK4_IPV6 __constant_htonl(0x7f000006) + +/* + * address family specific functions + */ +extern struct tcp_func ipv4_specific; + +#endif + +#endif diff -u --recursive --new-file v2.1.7/linux/include/net/udp.h linux/include/net/udp.h --- v2.1.7/linux/include/net/udp.h Mon May 20 08:32:19 1996 +++ linux/include/net/udp.h Sun Nov 3 11:04:43 1996 @@ -31,8 +31,9 @@ extern struct proto udp_prot; -extern void udp_err(int type, int code, unsigned char *header, __u32 daddr, - __u32 saddr, struct inet_protocol *protocol); +extern void udp_err(int type, int code, unsigned char *header, + __u32 info, __u32 daddr, __u32 saddr, + struct inet_protocol *protocol); extern void udp_send_check(struct udphdr *uh, __u32 saddr, __u32 daddr, int len, struct sock *sk); extern int udp_recvfrom(struct sock *sk, unsigned char *to, @@ -41,7 +42,11 @@ extern int udp_read(struct sock *sk, unsigned char *buff, int len, int noblock, unsigned flags); extern int udp_connect(struct sock *sk, - struct sockaddr_in *usin, int addr_len); + struct sockaddr *usin, int addr_len); + +extern int udp_sendmsg(struct sock *sk, struct msghdr *msg, + int len, int noblock, int flags); + extern int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, __u32 daddr, unsigned short len, __u32 saddr, int redo, diff -u --recursive --new-file v2.1.7/linux/init/main.c linux/init/main.c --- v2.1.7/linux/init/main.c Fri Nov 1 17:13:19 1996 +++ linux/init/main.c Sat Nov 9 11:27:40 1996 @@ -838,6 +838,7 @@ check_bugs(); printk(linux_banner); + printk("POSIX conformance testing by UNIFIX\n"); #ifdef __SMP__ smp_init(); #endif diff -u --recursive --new-file v2.1.7/linux/kernel/fork.c linux/kernel/fork.c --- v2.1.7/linux/kernel/fork.c Tue Oct 29 19:58:48 1996 +++ linux/kernel/fork.c Thu Nov 7 11:25:56 1996 @@ -252,7 +252,7 @@ p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; - p->leader = 0; /* process leadership doesn't inherit */ + p->leader = 0; /* session leadership doesn't inherit */ p->tty_old_pgrp = 0; p->utime = p->stime = 0; p->cutime = p->cstime = 0; diff -u --recursive --new-file v2.1.7/linux/kernel/ksyms.c linux/kernel/ksyms.c --- v2.1.7/linux/kernel/ksyms.c Wed Oct 16 10:48:30 1996 +++ linux/kernel/ksyms.c Wed Nov 6 14:49:33 1996 @@ -49,6 +49,9 @@ #include extern unsigned char aux_device_present, kbd_read_mask; +#ifdef __i386__ + extern struct drive_info_struct drive_info; +#endif #ifdef CONFIG_PCI #include @@ -186,6 +189,10 @@ X(blkdev_release), X(gendisk_head), X(resetup_one_dev), + X(unplug_device), +#ifdef __i386__ + X(drive_info), +#endif #ifdef CONFIG_SERIAL /* Module creation of serial units */ @@ -219,6 +226,11 @@ /* sysctl table registration */ X(register_sysctl_table), X(unregister_sysctl_table), + X(sysctl_string), + X(sysctl_intvec), + X(proc_dostring), + X(proc_dointvec), + X(proc_dointvec_minmax), /* interrupt handling */ X(request_irq), @@ -285,6 +297,7 @@ X(sys_call_table), X(hard_reset_now), X(_ctype), + X(secure_tcp_sequence_number), /* Signal interfaces */ X(send_sig), @@ -329,11 +342,6 @@ /* psaux mouse */ X(aux_device_present), X(kbd_read_mask), - -#ifdef CONFIG_BLK_DEV_IDE_PCMCIA - X(ide_register), - X(ide_unregister), -#endif #ifdef CONFIG_BLK_DEV_MD X(disk_name), /* for md.c */ diff -u --recursive --new-file v2.1.7/linux/kernel/module.c linux/kernel/module.c --- v2.1.7/linux/kernel/module.c Tue Oct 29 19:58:48 1996 +++ linux/kernel/module.c Wed Nov 6 14:24:43 1996 @@ -49,7 +49,7 @@ #ifdef CONFIG_MODULES /* a *big* #ifdef block... */ static struct module kernel_module; -static struct module *module_list = &kernel_module; +struct module *module_list = &kernel_module; static int freeing_modules; /* true if some modules are marked for deletion */ @@ -117,6 +117,8 @@ mp->addr = addr; mp->state = MOD_UNINITIALIZED; mp->cleanup = NULL; + mp->exceptinfo.start = NULL; + mp->exceptinfo.stop = NULL; * (long *) addr = 0; /* set use count to zero */ module_list = mp; /* link it in */ @@ -173,7 +175,12 @@ mp->size * PAGE_SIZE - (codesize + sizeof (long))); pr_debug("module init entry = 0x%08lx, cleanup entry = 0x%08lx\n", (unsigned long) rt.init, (unsigned long) rt.cleanup); + if (rt.signature != MODULE_2_1_7_SIG){ + printk ("Older insmod used with kernel 2.1.7 +\n"); + return -EINVAL; + } mp->cleanup = rt.cleanup; + mp->exceptinfo = rt.exceptinfo; /* update kernel symbol table */ if (symtab) { /* symtab == NULL means no new entries to handle */ diff -u --recursive --new-file v2.1.7/linux/kernel/sched.c linux/kernel/sched.c --- v2.1.7/linux/kernel/sched.c Tue Oct 29 19:58:48 1996 +++ linux/kernel/sched.c Thu Nov 7 16:08:32 1996 @@ -1192,20 +1192,19 @@ #endif -static struct task_struct *find_process_by_pid(pid_t pid) { - struct task_struct *p, *q; +static struct task_struct *find_process_by_pid(pid_t pid) +{ + struct task_struct *p; - if (pid == 0) - p = current; - else { - p = 0; - for_each_task(q) { - if (q && q->pid == pid) { - p = q; - break; - } + p = current; + if (pid) { + for_each_task(p) { + if (p->pid == pid) + goto found; } + p = NULL; } +found: return p; } diff -u --recursive --new-file v2.1.7/linux/kernel/sysctl.c linux/kernel/sysctl.c --- v2.1.7/linux/kernel/sysctl.c Tue Oct 29 19:58:48 1996 +++ linux/kernel/sysctl.c Sun Nov 3 11:04:43 1996 @@ -6,6 +6,7 @@ * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas. * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver. * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver. + * Dynamic registration fixes, Stephen Tweedie. */ #include @@ -203,7 +204,7 @@ do { context = 0; error = parse_table(name, nlen, oldval, oldlenp, - newval, newlen, root_table, &context); + newval, newlen, tmp->ctl_table, &context); if (context) kfree(context); if (error != -ENOTDIR) @@ -401,9 +402,11 @@ /* Scan the sysctl entries in table and add them all into /proc */ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) { - struct proc_dir_entry *de; + struct proc_dir_entry *de, *tmp; + int exists; for (; table->ctl_name; table++) { + exists = 0; /* Can't do anything without a proc name. */ if (!table->procname) continue; @@ -432,12 +435,24 @@ } /* Otherwise it's a subdir */ else { - de->ops = &proc_dir_inode_operations; - de->nlink++; - de->mode |= S_IFDIR; + /* First check to see if it already exists */ + for (tmp = root->subdir; tmp; tmp = tmp->next) { + if (tmp->namelen == de->namelen && + !memcmp(tmp->name,de->name,de->namelen)) { + exists = 1; + kfree (de); + de = tmp; + } + } + if (!exists) { + de->ops = &proc_dir_inode_operations; + de->nlink++; + de->mode |= S_IFDIR; + } } table->de = de; - proc_register_dynamic(root, de); + if (!exists) + proc_register_dynamic(root, de); if (de->mode & S_IFDIR ) register_proc_table(table->child, de); } @@ -456,8 +471,12 @@ } unregister_proc_table(table->child, de); } - proc_unregister(root, de->low_ino); - kfree(de); + /* Don't unregister proc directories which still have + entries... */ + if (!((de->mode & S_IFDIR) && de->subdir)) { + proc_unregister(root, de->low_ino); + kfree(de); + } } } diff -u --recursive --new-file v2.1.7/linux/net/Config.in linux/net/Config.in --- v2.1.7/linux/net/Config.in Mon Jul 8 10:21:46 1996 +++ linux/net/Config.in Sun Nov 3 11:04:43 1996 @@ -8,7 +8,12 @@ bool 'TCP/IP networking' CONFIG_INET if [ "$CONFIG_INET" = "y" ]; then source net/ipv4/Config.in + + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + tristate 'The IPv6 protocol' CONFIG_IPV6 + fi fi + comment ' ' tristate 'The IPX protocol' CONFIG_IPX if [ "$CONFIG_IPX" != "n" ]; then diff -u --recursive --new-file v2.1.7/linux/net/Makefile linux/net/Makefile --- v2.1.7/linux/net/Makefile Thu May 16 16:35:55 1996 +++ linux/net/Makefile Sun Nov 3 11:04:43 1996 @@ -8,18 +8,27 @@ # Note 2! The CFLAGS definition is now in the main makefile... MOD_SUB_DIRS := ipv4 -ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipx unix appletalk netrom #decnet +ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipv6 ipx unix appletalk \ + netrom #decnet SUB_DIRS := core ethernet unix MOD_LIST_NAME := NET_MISC_MODULES ifeq ($(CONFIG_NET),y) SUB_DIRS += 802 -endif +endif ifeq ($(CONFIG_INET),y) SUB_DIRS += ipv4 endif +ifeq ($(CONFIG_IPV6),y) +SUB_DIRS += ipv6 +else + ifeq ($(CONFIG_IPV6),m) + MOD_SUB_DIRS += ipv6 + endif +endif + ifeq ($(CONFIG_BRIDGE),y) SUB_DIRS += bridge endif @@ -58,10 +67,29 @@ M_OBJS := -ifeq ($(CONFIG_NETLINK),y) +CONFIG_NETLINK_BUILTIN := +CONFIG_NETLINK_MODULE := + +ifeq ($(CONFIG_NETLINK), y) + CONFIG_NETLINK_BUILTIN = y +endif + +ifeq ($(CONFIG_IPV6), y) + CONFIG_NETLINK_BUILTIN = y +endif + +ifeq ($(CONFIG_NETLINK), m) + CONFIG_NETLINK_MODULE = y +endif + +ifeq ($(CONFIG_IPV6), m) + CONFIG_NETLINK_MODULE = y +endif + +ifdef CONFIG_NETLINK_BUILTIN L_OBJS += netlink.o else - ifeq ($(CONFIG_NETLINK),m) + ifdef CONFIG_NETLINK_MODULE M_OBJS += netlink.o endif endif diff -u --recursive --new-file v2.1.7/linux/net/bridge/br.c linux/net/bridge/br.c --- v2.1.7/linux/net/bridge/br.c Tue Oct 29 19:58:49 1996 +++ linux/net/bridge/br.c Fri Nov 8 08:44:12 1996 @@ -267,7 +267,7 @@ (((port_info[port_no].designated_cost + port_info[port_no].path_cost ) - == + < (port_info[root_port].designated_cost + port_info[root_port].path_cost ) /* (4.6.8.3.1(2)) */ diff -u --recursive --new-file v2.1.7/linux/net/core/dev.c linux/net/core/dev.c --- v2.1.7/linux/net/core/dev.c Fri Nov 1 17:13:19 1996 +++ linux/net/core/dev.c Wed Nov 6 14:41:20 1996 @@ -45,6 +45,7 @@ * Alan Cox : Cleaned up the backlog initialise. * Craig Metz : SIOCGIFCONF fix if space for under * 1 device. + * Molnar Ingo : skb->stamp hack for the Pentium * Thomas Bogendoerfer : Return ENODEV for dev_open, if there * is no device open function. * @@ -118,14 +119,6 @@ static int backlog_size = 0; -/* - * Return the lesser of the two values. - */ - -static __inline__ unsigned long min(unsigned long a, unsigned long b) -{ - return (a < b)? a : b; -} /****************************************************************************************** @@ -418,6 +411,10 @@ /* copy outgoing packets to any sniffer packet handlers */ if (dev_nit) { struct packet_type *ptype; +#ifdef CONFIG_M586 + struct timeval dummy_tv; + do_gettimeofday( &dummy_tv ); +#endif skb->stamp=xtime; for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) { @@ -489,7 +486,13 @@ skb->sk = NULL; skb->free = 1; if(skb->stamp.tv_sec==0) + { +#ifdef CONFIG_M586 + struct timeval dummy_tv; + do_gettimeofday( &dummy_tv ); +#endif skb->stamp = xtime; + } /* * Check that we aren't overdoing things. @@ -1074,7 +1077,7 @@ goto rarok; case SIOCSIFADDR: /* Set interface address (and family) */ - + /* * BSDism. SIOCSIFADDR family=AF_UNSPEC sets the * physical address. We can cope with this now. @@ -1107,7 +1110,7 @@ #ifdef CONFIG_NET_ALIAS if (net_alias_is(dev)) - net_alias_dev_rehash(dev ,&ifr.ifr_addr); + net_alias_dev_rehash(dev ,&ifr.ifr_addr); #endif dev->pa_addr = new_pa_addr; dev->family = new_family; @@ -1204,7 +1207,7 @@ return -EINVAL; if (dev->change_mtu) - ret = (*dev->change_mtu)(dev, ifr.ifr_mtu); + ret = dev->change_mtu(dev, ifr.ifr_mtu); else { dev->mtu = ifr.ifr_mtu; diff -u --recursive --new-file v2.1.7/linux/net/core/iovec.c linux/net/core/iovec.c --- v2.1.7/linux/net/core/iovec.c Tue Oct 29 19:58:49 1996 +++ linux/net/core/iovec.c Sun Nov 3 11:04:43 1996 @@ -9,6 +9,8 @@ * * Fixes: * Andrew Lunn : Errors in iovec copying. + * Pedro Roque : Added memcpy_fromiovecend and + * csum_..._fromiovecend. */ @@ -17,8 +19,10 @@ #include #include #include +#include #include - +#include +#include extern inline int min(int x, int y) { @@ -103,4 +107,131 @@ } iov++; } +} + + +/* + * For use with ip_build_xmit + */ + +void memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, + int len) +{ + while(offset>0) + { + if (offset > iov->iov_len) + { + offset -= iov->iov_len; + + } + else + { + u8 *base; + int copy; + + base = iov->iov_base + offset; + copy = min(len, iov->iov_len - offset); + offset = 0; + + copy_from_user(kdata, base, copy); + len-=copy; + kdata+=copy; + } + iov++; + } + + while (len>0) + { + int copy=min(len, iov->iov_len); + copy_from_user(kdata, iov->iov_base, copy); + len-=copy; + kdata+=copy; + iov++; + } +} + +/* + * And now for the all-in-one: copy and checksum from a user iovec + * directly to a datagram + * Calls to csum_partial but the last must be in 32 bit chunks + * + * ip_build_xmit must ensure that when fragmenting only the last + * call to this function will be unaligned also. + */ + +unsigned int csum_partial_copy_fromiovecend(unsigned char *kdata, + struct iovec *iov, int offset, + int len, int csum) +{ + __u32 partial; + __u32 partial_cnt = 0; + + while(offset>0) + { + if (offset > iov->iov_len) + { + offset -= iov->iov_len; + + } + else + { + u8 *base; + int copy; + + base = iov->iov_base + offset; + copy = min(len, iov->iov_len - offset); + offset = 0; + + partial_cnt = copy % 4; + if (partial_cnt) + { + copy -= partial_cnt; + copy_from_user(&partial, base + copy, + partial_cnt); + } + + csum = csum_partial_copy_fromuser(base, kdata, + copy, csum); + + len -= copy + partial_cnt; + kdata += copy + partial_cnt; + } + iov++; + } + + while (len>0) + { + u8 *base = iov->iov_base; + int copy=min(len, iov->iov_len); + + if (partial_cnt) + { + int par_len = 4 - partial_cnt; + + copy_from_user(&partial, base + partial_cnt, par_len); + csum = csum_partial((u8*) &partial, 4, csum); + base += par_len; + copy -= par_len; + partial_cnt = 0; + } + + if (len - copy > 0) + { + partial_cnt = copy % 4; + if (partial_cnt) + { + copy -= partial_cnt; + copy_from_user(&partial, base + copy, + partial_cnt); + } + } + + csum = csum_partial_copy_fromuser(base, kdata, + copy, csum); + len -= copy + partial_cnt; + kdata += copy + partial_cnt; + iov++; + } + + return csum; } diff -u --recursive --new-file v2.1.7/linux/net/core/skbuff.c linux/net/core/skbuff.c --- v2.1.7/linux/net/core/skbuff.c Tue Oct 29 19:58:49 1996 +++ linux/net/core/skbuff.c Thu Nov 7 11:02:38 1996 @@ -40,12 +40,13 @@ #include #include #include -#include #include +#include #include #include #include +#include #include #include #include @@ -702,6 +703,7 @@ skb->end=bptr+len; skb->len=0; skb->destructor=NULL; + skb->inclone = 0; return skb; } @@ -729,7 +731,8 @@ addr = skb; __kfree_skbmem(skb->data_skb); } - kfree(addr); + if (!skb->inclone) + kfree(addr); atomic_dec(&net_skbcount); } } @@ -742,11 +745,21 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority) { struct sk_buff *n; - + int inbuff = 0; + IS_SKB(skb); - n = kmalloc(sizeof(*n), priority); - if (!n) - return NULL; + if (skb_tailroom(skb) >= sizeof(struct sk_buff)) + { + n = ((struct sk_buff *) skb->end) - 1; + skb->end -= sizeof(struct sk_buff); + inbuff = 1; + } + else + { + n = kmalloc(sizeof(*n), priority); + if (!n) + return NULL; + } memcpy(n, skb, sizeof(*n)); n->count = 1; if (skb->data_skb) @@ -762,6 +775,7 @@ n->tries = 0; n->lock = 0; n->users = 0; + n->inclone = inbuff; return n; } @@ -804,6 +818,10 @@ n->h.raw=skb->h.raw+offset; n->mac.raw=skb->mac.raw+offset; n->ip_hdr=(struct iphdr *)(((char *)skb->ip_hdr)+offset); +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + n->ipv6_hdr=(struct ipv6hdr *)(((char *)skb->ipv6_hdr)+offset); + n->nexthop = skb->nexthop; +#endif n->saddr=skb->saddr; n->daddr=skb->daddr; n->raddr=skb->raddr; diff -u --recursive --new-file v2.1.7/linux/net/core/sock.c linux/net/core/sock.c --- v2.1.7/linux/net/core/sock.c Tue Oct 29 19:58:49 1996 +++ linux/net/core/sock.c Sun Nov 3 11:04:43 1996 @@ -192,6 +192,10 @@ return(0); case SO_KEEPALIVE: + if (sk->protocol == IPPROTO_TCP) + { + tcp_set_keepalive(sk, valbool); + } sk->keepopen = valbool; return(0); @@ -536,7 +540,7 @@ void __release_sock(struct sock *sk) { #ifdef CONFIG_INET - if (!sk->prot || !sk->prot->rcv) + if (!sk->prot || !sk->backlog_rcv) return; /* See if we have any packets built up. */ @@ -544,10 +548,7 @@ while (!skb_queue_empty(&sk->back_log)) { struct sk_buff * skb = sk->back_log.next; __skb_unlink(skb, &sk->back_log); - sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv, - skb->saddr, skb->len, skb->daddr, 1, - /* Only used for/by raw sockets. */ - (struct inet_protocol *)sk->pair); + sk->backlog_rcv(sk, skb); } end_bh_atomic(); #endif diff -u --recursive --new-file v2.1.7/linux/net/ethernet/eth.c linux/net/ethernet/eth.c --- v2.1.7/linux/net/ethernet/eth.c Tue Oct 29 19:58:49 1996 +++ linux/net/ethernet/eth.c Sun Nov 3 11:04:43 1996 @@ -53,8 +53,21 @@ #include #include #include +#include + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include +#include +#endif + #include + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +int (*ndisc_eth_hook) (unsigned char *, struct device *, + struct sk_buff *) = NULL; +#endif + void eth_setup(char *str, int *ints) { struct device *d = dev_base; @@ -143,24 +156,43 @@ struct ethhdr *eth = (struct ethhdr *)buff; /* - * Only ARP/IP is currently supported + * Only ARP/IP and NDISC/IPv6 are currently supported */ - - if(eth->h_proto != htons(ETH_P_IP)) + + switch (eth->h_proto) { - printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",dev->name,(int)eth->h_proto); +#ifdef CONFIG_INET + case __constant_htons(ETH_P_IP): + + /* + * Try to get ARP to resolve the header. + */ + + return (arp_find(eth->h_dest, dst, dev, dev->pa_addr, skb) ? + 1 : 0); + break; +#endif + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case __constant_htons(ETH_P_IPV6): +#ifdef CONFIG_IPV6 + return (ndisc_eth_resolv(eth->h_dest, dev, skb)); +#else + if (ndisc_eth_hook) + return (ndisc_eth_hook(eth->h_dest, dev, skb)); +#endif +#endif + default: + printk(KERN_DEBUG + "%s: unable to resolve type %X addresses.\n", + dev->name, (int)eth->h_proto); + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); return 0; + break; } - /* - * Try to get ARP to resolve the header. - */ -#ifdef CONFIG_INET - return arp_find(eth->h_dest, dst, dev, dev->pa_addr, skb)? 1 : 0; -#else return 0; -#endif } diff -u --recursive --new-file v2.1.7/linux/net/ipv4/Makefile linux/net/ipv4/Makefile --- v2.1.7/linux/net/ipv4/Makefile Fri May 31 13:46:27 1996 +++ linux/net/ipv4/Makefile Sun Nov 3 11:04:43 1996 @@ -11,7 +11,7 @@ IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o packet.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o \ - tcp.o tcp_input.o tcp_output.o tcp_timer.o \ + tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o\ raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o ip_fw.o \ sysctl_net_ipv4.o diff -u --recursive --new-file v2.1.7/linux/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c --- v2.1.7/linux/net/ipv4/af_inet.c Tue Oct 29 19:58:49 1996 +++ linux/net/ipv4/af_inet.c Sun Nov 3 11:04:43 1996 @@ -114,6 +114,11 @@ extern int tcp_get_info(char *, char **, off_t, int, int); extern int udp_get_info(char *, char **, off_t, int, int); + +struct sock * tcp_sock_array[SOCK_ARRAY_SIZE]; +struct sock * udp_sock_array[SOCK_ARRAY_SIZE]; +struct sock * raw_sock_array[SOCK_ARRAY_SIZE]; + #ifdef CONFIG_DLCI extern int dlci_ioctl(unsigned int, void*); #endif @@ -205,7 +210,7 @@ * Add a socket into the socket tables by number. */ -void put_sock(unsigned short num, struct sock *sk) +void inet_put_sock(unsigned short num, struct sock *sk) { struct sock **skp, *tmp; int mask; @@ -266,7 +271,7 @@ * Remove a socket from the socket tables. */ -static void remove_sock(struct sock *sk1) +void inet_remove_sock(struct sock *sk1) { struct sock **p; unsigned long flags; @@ -309,35 +314,16 @@ lock_sock(sk); /* just to be safe. */ - remove_sock(sk); /* * Now we can no longer get new packets or once the * timers are killed, send them. */ - delete_timer(sk); - del_timer(&sk->delack_timer); - del_timer(&sk->retransmit_timer); - - /* - * Drain any partial frames - */ - - while ((skb = tcp_dequeue_partial(sk)) != NULL) - { - IS_SKB(skb); - kfree_skb(skb, FREE_WRITE); - } + net_delete_timer(sk); - /* - * Cleanup up the write buffer. - */ - - while((skb = skb_dequeue(&sk->write_queue)) != NULL) { - IS_SKB(skb); - kfree_skb(skb, FREE_WRITE); - } + if (sk->prot->destroy) + sk->prot->destroy(sk); /* * Clean up the read buffer. @@ -358,34 +344,6 @@ kfree_skb(skb, FREE_READ); } - /* - * Now we need to clean up the send head. - */ - - cli(); - for(skb = sk->send_head; skb != NULL; ) - { - struct sk_buff *skb2; - - /* - * We need to remove skb from the transmit queue, - * or maybe the arp queue. - */ - if (skb->next && skb->prev) - { - IS_SKB(skb); - skb_unlink(skb); - } - skb->dev = NULL; - skb2 = skb->link3; - kfree_skb(skb, FREE_WRITE); - skb = skb2; - } - sk->send_head = NULL; - sk->send_tail = NULL; - sk->send_next = NULL; - sti(); - /* * Now the backlog. */ @@ -415,6 +373,8 @@ if (sk->rmem_alloc == 0 && sk->wmem_alloc == 0) { + inet_remove_sock(sk); + if(sk->opt) kfree(sk->opt); ip_rt_put(sk->ip_route_cache); @@ -429,12 +389,19 @@ { /* this should never happen. */ /* actually it can if an ack has just been sent. */ - NETDEBUG(printk("Socket destroy delayed (r=%d w=%d)\n", - sk->rmem_alloc, sk->wmem_alloc)); + /* + * It's more normal than that... + * It can happen because a skb is still in the device queues + * [PR] + */ + + printk("Socket destroy delayed (r=%d w=%d)\n", + sk->rmem_alloc, sk->wmem_alloc); + sk->destroy = 1; sk->ack_backlog = 0; release_sock(sk); - reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME); + net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME); } } @@ -444,7 +411,7 @@ * the work. */ -static int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) +int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk; @@ -473,7 +440,7 @@ * Set socket options on an inet socket. */ -static int inet_setsockopt(struct socket *sock, int level, int optname, +int inet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { struct sock *sk = (struct sock *) sock->data; @@ -489,7 +456,7 @@ * Get a socket option on an AF_INET socket. */ -static int inet_getsockopt(struct socket *sock, int level, int optname, +int inet_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { struct sock *sk = (struct sock *) sock->data; @@ -515,7 +482,7 @@ return(-EAGAIN); udp_cache_zap(); tcp_cache_zap(); - put_sock(sk->num, sk); + inet_put_sock(sk->num, sk); sk->dummy_th.source = ntohs(sk->num); } return 0; @@ -525,7 +492,7 @@ * Move a socket into listening state. */ -static int inet_listen(struct socket *sock, int backlog) +int inet_listen(struct socket *sock, int backlog) { struct sock *sk = (struct sock *) sock->data; @@ -666,38 +633,38 @@ #ifdef CONFIG_TCP_NAGLE_OFF sk->nonagle = 1; #endif + sk->family = AF_INET; sk->type = sock->type; sk->protocol = protocol; sk->allocation = GFP_KERNEL; sk->sndbuf = SK_WMEM_MAX; sk->rcvbuf = SK_RMEM_MAX; - sk->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ - sk->cong_window = 1; /* start with only sending one packet at a time. */ - sk->ssthresh = 0x7fffffff; sk->priority = 1; + + sk->prot = prot; + sk->backlog_rcv = prot->backlog_rcv; + + sk->sleep = sock->wait; + sock->data =(void *) sk; + sk->state = TCP_CLOSE; - /* this is how many unacked bytes we will accept for this socket. */ - sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ - sk->delay_acks = 1; - sk->max_ack_backlog = SOMAXCONN; skb_queue_head_init(&sk->write_queue); skb_queue_head_init(&sk->receive_queue); - sk->mtu = 576; - sk->prot = prot; - sk->sleep = sock->wait; - init_timer(&sk->timer); - init_timer(&sk->delack_timer); - init_timer(&sk->retransmit_timer); + skb_queue_head_init(&sk->back_log); + + sk->timer.data = (unsigned long)sk; sk->timer.function = &net_timer; - skb_queue_head_init(&sk->back_log); + sock->data =(void *) sk; sk->ip_ttl=ip_statistics.IpDefaultTTL; + if(sk->type==SOCK_RAW && protocol==IPPROTO_RAW) sk->ip_hdrincl=1; else sk->ip_hdrincl=0; + #ifdef CONFIG_IP_MULTICAST sk->ip_mc_loop=1; sk->ip_mc_ttl=1; @@ -709,9 +676,6 @@ * if TCP uses it (maybe move to tcp_init later) */ - sk->dummy_th.ack=1; - sk->dummy_th.doff=sizeof(struct tcphdr)>>2; - sk->state_change = def_callback1; sk->data_ready = def_callback2; sk->write_space = def_callback3; @@ -725,7 +689,7 @@ * creation time automatically * shares. */ - put_sock(sk->num, sk); + inet_put_sock(sk->num, sk); sk->dummy_th.source = ntohs(sk->num); } @@ -757,7 +721,7 @@ * should refer to it. */ -static int inet_release(struct socket *sock, struct socket *peer) +int inet_release(struct socket *sock, struct socket *peer) { unsigned long timeout; struct sock *sk = (struct sock *) sock->data; @@ -934,12 +898,12 @@ } sti(); - remove_sock(sk); + inet_remove_sock(sk); if(sock->type==SOCK_DGRAM) udp_cache_zap(); if(sock->type==SOCK_STREAM) tcp_cache_zap(); - put_sock(snum, sk); + inet_put_sock(snum, sk); sk->dummy_th.source = ntohs(sk->num); sk->daddr = 0; sk->dummy_th.dest = 0; @@ -954,8 +918,8 @@ * TCP 'magic' in here. */ -static int inet_connect(struct socket *sock, struct sockaddr * uaddr, - int addr_len, int flags) +int inet_connect(struct socket *sock, struct sockaddr * uaddr, + int addr_len, int flags) { struct sock *sk=(struct sock *)sock->data; int err; @@ -981,7 +945,7 @@ return(-EAGAIN); if (sk->prot->connect == NULL) return(-EOPNOTSUPP); - err = sk->prot->connect(sk, (struct sockaddr_in *)uaddr, addr_len); + err = sk->prot->connect(sk, uaddr, addr_len); if (err < 0) return(err); sock->state = SS_CONNECTING; @@ -1036,7 +1000,7 @@ * Accept a pending connection. The TCP layer now gives BSD semantics. */ -static int inet_accept(struct socket *sock, struct socket *newsock, int flags) +int inet_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk1, *sk2; int err; @@ -1153,8 +1117,8 @@ -static int inet_recvmsg(struct socket *sock, struct msghdr *ubuf, int size, int noblock, - int flags, int *addr_len ) +int inet_recvmsg(struct socket *sock, struct msghdr *ubuf, int size, + int noblock, int flags, int *addr_len) { struct sock *sk = (struct sock *) sock->data; @@ -1169,8 +1133,8 @@ } -static int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, int noblock, - int flags) +int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, + int noblock, int flags) { struct sock *sk = (struct sock *) sock->data; if (sk->shutdown & SEND_SHUTDOWN) @@ -1190,7 +1154,7 @@ } -static int inet_shutdown(struct socket *sock, int how) +int inet_shutdown(struct socket *sock, int how) { struct sock *sk=(struct sock*)sock->data; @@ -1214,7 +1178,7 @@ } -static int inet_select(struct socket *sock, int sel_type, select_table *wait ) +int inet_select(struct socket *sock, int sel_type, select_table *wait ) { struct sock *sk=(struct sock *) sock->data; if (sk->prot->select == NULL) @@ -1322,7 +1286,6 @@ #else return -ENOPKG; #endif - case SIOCADDDLCI: case SIOCDELDLCI: #ifdef CONFIG_DLCI @@ -1340,7 +1303,7 @@ return((*dlci_ioctl_hook)(cmd, (void *) arg)); #endif return -ENOPKG; - + default: if ((cmd >= SIOCDEVPRIVATE) && (cmd <= (SIOCDEVPRIVATE + 15))) @@ -1394,9 +1357,9 @@ */ struct sock *get_sock(struct proto *prot, unsigned short num, - unsigned long raddr, - unsigned short rnum, unsigned long laddr, - unsigned long paddr, unsigned short pnum) + unsigned long raddr, unsigned short rnum, + unsigned long laddr, unsigned long paddr, + unsigned short pnum) { struct sock *s = 0; struct sock *result = NULL; @@ -1570,7 +1533,7 @@ #endif -static struct proto_ops inet_proto_ops = { +struct proto_ops inet_proto_ops = { AF_INET, inet_create, @@ -1673,16 +1636,20 @@ for(i = 0; i < SOCK_ARRAY_SIZE; i++) { - tcp_prot.sock_array[i] = NULL; - udp_prot.sock_array[i] = NULL; - raw_prot.sock_array[i] = NULL; + tcp_sock_array[i] = NULL; + udp_sock_array[i] = NULL; + raw_sock_array[i] = NULL; } + tcp_prot.inuse = 0; tcp_prot.highestinuse = 0; + tcp_prot.sock_array = tcp_sock_array; udp_prot.inuse = 0; udp_prot.highestinuse = 0; + udp_prot.sock_array = udp_sock_array; raw_prot.inuse = 0; raw_prot.highestinuse = 0; + raw_prot.sock_array = raw_sock_array; printk("IP Protocols: "); for(p = inet_protocol_base; p != NULL;) @@ -1693,14 +1660,17 @@ p = tmp; } + /* * Set the ARP module up */ arp_init(); + /* * Set the IP module up */ ip_init(); + /* * Set the ICMP layer up */ diff -u --recursive --new-file v2.1.7/linux/net/ipv4/icmp.c linux/net/ipv4/icmp.c --- v2.1.7/linux/net/ipv4/icmp.c Tue Oct 29 19:58:49 1996 +++ linux/net/ipv4/icmp.c Sun Nov 3 11:04:43 1996 @@ -639,7 +639,8 @@ int hash; struct inet_protocol *ipprot; unsigned char *dp; - + __u32 info = 0; + iph = (struct iphdr *) (icmph + 1); dp= ((unsigned char *)iph)+(iph->ihl<<2); @@ -723,11 +724,7 @@ */ new_mtu = 68; } - /* - * Ugly trick to pass MTU to protocol layer. - * Really we should add argument "info" to error handler. - */ - iph->id = htons(new_mtu); + info = new_mtu; break; } #endif @@ -777,7 +774,7 @@ if (iph->protocol == ipprot->protocol && ipprot->err_handler) { - ipprot->err_handler(icmph->type, icmph->code, dp, + ipprot->err_handler(icmph->type, icmph->code, dp, info, iph->daddr, iph->saddr, ipprot); } diff -u --recursive --new-file v2.1.7/linux/net/ipv4/ip_masq.c linux/net/ipv4/ip_masq.c --- v2.1.7/linux/net/ipv4/ip_masq.c Thu Oct 10 19:10:58 1996 +++ linux/net/ipv4/ip_masq.c Sun Nov 3 11:04:44 1996 @@ -566,7 +566,8 @@ else timeout = ip_masq_expire->tcp_timeout; skb->csum = csum_partial((void *)(th + 1), size - sizeof(*th), 0); - tcp_send_check(th,iph->saddr,iph->daddr,size,skb); + tcp_v4_check(th, size, iph->saddr, iph->daddr, + skb->csum); } ip_masq_set_expire(ms, timeout); ip_send_check(iph); @@ -896,10 +897,14 @@ struct tcphdr *th; skb->csum = csum_partial((void *)(((struct tcphdr *)portptr) + 1), len - sizeof(struct tcphdr), 0); - tcp_send_check((struct tcphdr *)portptr,iph->saddr,iph->daddr,len,skb); + th = (struct tcphdr *) portptr; + th->check = 0; + tcp_v4_check(th, len, iph->saddr, iph->daddr, + skb->csum); + /* Check if TCP FIN or RST */ - th = (struct tcphdr *)portptr; + if (th->fin) { ms->flags |= IP_MASQ_F_SAW_FIN_IN; diff -u --recursive --new-file v2.1.7/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c --- v2.1.7/linux/net/ipv4/ip_output.c Tue Oct 29 19:58:49 1996 +++ linux/net/ipv4/ip_output.c Sun Nov 3 11:04:44 1996 @@ -321,42 +321,6 @@ } -/* - * If a sender wishes the packet to remain unfreed - * we add it to his send queue. This arguably belongs - * in the TCP level since nobody else uses it. BUT - * remember IPng might change all the rules. - */ -static inline void add_to_send_queue(struct sock * sk, struct sk_buff * skb) -{ - unsigned long flags; - - /* The socket now has more outstanding blocks */ - sk->packets_out++; - - /* Protect the list for a moment */ - save_flags(flags); - cli(); - - if (skb->link3 != NULL) - { - NETDEBUG(printk("ip.c: link3 != NULL\n")); - skb->link3 = NULL; - } - if (sk->send_head == NULL) - { - sk->send_tail = skb; - sk->send_head = skb; - sk->send_next = skb; - } - else - { - sk->send_tail->link3 = skb; - sk->send_tail = skb; - } - restore_flags(flags); -} - /* * Queues a packet to be sent, and starts the transmitter @@ -397,13 +361,11 @@ switch (free) { /* No reassigning numbers to fragments... */ - default: + case 2: free = 1; break; - case 0: - add_to_send_queue(sk, skb); - /* fall through */ - case 1: + default: + free = 1; iph->id = htons(ip_id_count++); } @@ -732,7 +694,7 @@ maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen; } - + /* * Start at the end of the frame by handling the remainder. */ diff -u --recursive --new-file v2.1.7/linux/net/ipv4/packet.c linux/net/ipv4/packet.c --- v2.1.7/linux/net/ipv4/packet.c Tue Oct 29 19:58:50 1996 +++ linux/net/ipv4/packet.c Sun Nov 3 11:04:44 1996 @@ -60,17 +60,6 @@ #include #include -/* - * We really ought to have a single public _inline_ min function! - */ - -static unsigned long min(unsigned long a, unsigned long b) -{ - if (a < b) - return(a); - return(b); -} - /* * This should be the easiest of all, all we do is copy it into a buffer. @@ -480,23 +469,22 @@ struct proto packet_prot = { packet_close, - ip_build_header, /* Not actually used */ NULL, + NULL, /* accept */ NULL, - ip_queue_xmit, /* These two are not actually used */ NULL, NULL, - NULL, - NULL, datagram_select, NULL, /* No ioctl */ packet_init, NULL, + NULL, NULL, /* No set/get socket options */ NULL, packet_sendmsg, /* Sendmsg */ packet_recvmsg, /* Recvmsg */ packet_bind, /* Bind */ + NULL, /* Backlog_rcv */ 128, 0, "PACKET", diff -u --recursive --new-file v2.1.7/linux/net/ipv4/proc.c linux/net/ipv4/proc.c --- v2.1.7/linux/net/ipv4/proc.c Mon Jun 3 14:23:42 1996 +++ linux/net/ipv4/proc.c Sun Nov 3 11:04:44 1996 @@ -59,6 +59,7 @@ { struct sock **s_array; struct sock *sp; + struct tcp_opt *tp; int i; int timer_active; int timer_active1; @@ -87,6 +88,7 @@ { cli(); sp = s_array[i]; + while(sp != NULL) { pos += 128; @@ -95,6 +97,9 @@ sp = sp->next; continue; } + + tp = &(sp->tp_pinfo.af_tcp); + dest = sp->daddr; src = sp->saddr; destp = sp->dummy_th.dest; @@ -122,13 +127,14 @@ sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld", i, src, srcp, dest, destp, sp->state, - format==0?sp->write_seq-sp->rcv_ack_seq:sp->wmem_alloc, - format==0?sp->acked_seq-sp->copied_seq:sp->rmem_alloc, + format==0?sp->write_seq-tp->snd_una:sp->wmem_alloc, + format==0?tp->rcv_nxt-sp->copied_seq:sp->rmem_alloc, timer_active, timer_expires-jiffies, (unsigned) sp->retransmits, (sp->socket&&SOCK_INODE(sp->socket))?SOCK_INODE(sp->socket)->i_uid:0, timer_active?sp->timeout:0, sp->socket && SOCK_INODE(sp->socket) ? SOCK_INODE(sp->socket)->i_ino : 0); + if (timer_active1) add_timer(&sp->retransmit_timer); if (timer_active2) add_timer(&sp->timer); len += sprintf(buffer+len, "%-127s\n", tmpbuf); diff -u --recursive --new-file v2.1.7/linux/net/ipv4/protocol.c linux/net/ipv4/protocol.c --- v2.1.7/linux/net/ipv4/protocol.c Tue Oct 29 19:58:50 1996 +++ linux/net/ipv4/protocol.c Sun Nov 3 11:04:44 1996 @@ -66,8 +66,8 @@ static struct inet_protocol tcp_protocol = { - tcp_rcv, /* TCP handler */ - tcp_err, /* TCP error control */ + tcp_v4_rcv, /* TCP handler */ + tcp_v4_err, /* TCP error control */ #if defined(CONFIG_NET_IPIP) && defined(CONFIG_IP_FORWARD) &ipip_protocol, #else diff -u --recursive --new-file v2.1.7/linux/net/ipv4/raw.c linux/net/ipv4/raw.c --- v2.1.7/linux/net/ipv4/raw.c Tue Oct 29 19:58:50 1996 +++ linux/net/ipv4/raw.c Sun Nov 3 11:04:44 1996 @@ -64,13 +64,6 @@ struct sock *mroute_socket=NULL; #endif -static inline unsigned long min(unsigned long a, unsigned long b) -{ - if (a < b) - return(a); - return(b); -} - /* * Raw_err does not currently get called by the icmp module - FIXME: @@ -109,7 +102,7 @@ return; } -static inline void raw_rcv_skb(struct sock * sk, struct sk_buff * skb) +static inline int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) { /* Charge it to the socket. */ @@ -118,22 +111,10 @@ ip_statistics.IpInDiscards++; skb->sk=NULL; kfree_skb(skb, FREE_READ); - return; + return 0; } ip_statistics.IpInDelivers++; -} - -/* - * This is the prot->rcv() function. It's called when we have - * backlogged packets from core/sock.c if we couldn't receive it - * when the packet arrived. - */ -static int raw_rcv_redo(struct sk_buff *skb, struct device *dev, struct options *opt, - __u32 daddr, unsigned short len, - __u32 saddr, int redo, struct inet_protocol * protocol) -{ - raw_rcv_skb(skb->sk, skb); return 0; } @@ -376,14 +357,11 @@ struct proto raw_prot = { raw_close, - ip_build_header, udp_connect, NULL, - ip_queue_xmit, NULL, NULL, NULL, - raw_rcv_redo, datagram_select, #ifdef CONFIG_IP_MROUTE ipmr_ioctl, @@ -392,14 +370,16 @@ #endif raw_init, NULL, + NULL, ip_setsockopt, ip_getsockopt, raw_sendmsg, raw_recvmsg, NULL, /* No special bind */ + raw_rcv_skb, 128, 0, "RAW", 0, 0, - {NULL,} + NULL }; diff -u --recursive --new-file v2.1.7/linux/net/ipv4/sysctl_net_ipv4.c linux/net/ipv4/sysctl_net_ipv4.c --- v2.1.7/linux/net/ipv4/sysctl_net_ipv4.c Mon Jun 3 14:07:09 1996 +++ linux/net/ipv4/sysctl_net_ipv4.c Sun Nov 3 11:04:44 1996 @@ -7,6 +7,20 @@ #include #include +#include + +/* + * TCP configuration parameters + */ + +#define TCP_PMTU_DISC 0x00000001 /* perform PMTU discovery */ +#define TCP_CONG_AVOID 0x00000002 /* congestion avoidance algorithm */ +#define TCP_DELAY_ACKS 0x00000003 /* delayed ack stategy */ + +#if 0 +static int boolean_min = 0; +static int boolean_max = 1; +#endif /* From arp.c */ extern int sysctl_arp_res_time; @@ -17,6 +31,8 @@ extern int sysctl_arp_confirm_interval; extern int sysctl_arp_confirm_timeout; +extern int sysctl_tcp_vegas_cong_avoidance; + ctl_table ipv4_table[] = { {NET_IPV4_ARP_RES_TIME, "arp_res_time", &sysctl_arp_res_time, sizeof(int), 0644, NULL, &proc_dointvec}, @@ -34,5 +50,15 @@ {NET_IPV4_ARP_CONFIRM_TIMEOUT, "arp_confirm_timeout", &sysctl_arp_confirm_timeout, sizeof(int), 0644, NULL, &proc_dointvec}, +#if 0 + {TCP_PMTU_DISC, "tcp_pmtu_discovery", + &ipv4_pmtu_discovery, sizeof(int), 644, + NULL, &proc_dointvec, &sysctl_intvec_minmax, + &boolean_min, &boolean_max}, +#endif + + {NET_IPV4_TCP_VEGAS_CONG_AVOID, "tcp_vegas_cong_avoid", + &sysctl_tcp_vegas_cong_avoidance, sizeof(int), 0644, + NULL, &proc_dointvec }, {0} }; diff -u --recursive --new-file v2.1.7/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c --- v2.1.7/linux/net/ipv4/tcp.c Tue Oct 29 19:58:50 1996 +++ linux/net/ipv4/tcp.c Thu Nov 7 11:02:38 1996 @@ -172,7 +172,7 @@ * ack if stat is TCP_CLOSED. * Alan Cox : Look up device on a retransmit - routes may * change. Doesn't yet cope with MSS shrink right - * but it's a start! + * but its a start! * Marc Tamsky : Closing in closing fixes. * Mike Shaver : RFC1122 verifications. * Alan Cox : rcv_saddr errors. @@ -194,15 +194,8 @@ * against machines running Solaris, * and seems to result in general * improvement. - * Eric Schenk : Changed receiver side silly window - * avoidance algorithm to BSD style - * algorithm. This doubles throughput - * against machines running Solaris, - * and seems to result in general - * improvement. * Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD * Willy Konynenberg : Transparent proxying support. - * Theodore Ts'o : Do secure TCP sequence numbers. * * To Fix: * Fast path the code. Two things here - fix the window calculation @@ -425,10 +418,8 @@ * (Whew. -- MS 950903) **/ -#include #include #include -#include #include #include @@ -438,25 +429,34 @@ unsigned long seq_offset; struct tcp_mib tcp_statistics; -static void tcp_close(struct sock *sk, unsigned long timeout); + /* * Find someone to 'accept'. Must be called with * the socket locked or with interrupts disabled */ -static struct sk_buff *tcp_find_established(struct sock *s) +static struct open_request *tcp_find_established(struct tcp_opt *tp) { - struct sk_buff *p=skb_peek(&s->receive_queue); - if(p==NULL) + struct open_request *req; + + req = tp->syn_wait_queue; + + if (!req) return NULL; - do - { - if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1) - return p; - p=p->next; - } - while(p!=(struct sk_buff *)&s->receive_queue); + + do { + if (req->sk && + (req->sk->state == TCP_ESTABLISHED || + req->sk->state >= TCP_FIN_WAIT1)) + { + return req; + } + + req = req->dl_next; + + } while (req != tp->syn_wait_queue); + return NULL; } @@ -493,108 +493,9 @@ /* - * This routine is called by the ICMP module when it gets some - * sort of error condition. If err < 0 then the socket should - * be closed and the error returned to the user. If err > 0 - * it's just the icmp type << 8 | icmp code. After adjustment - * header points to the first 8 bytes of the tcp header. We need - * to find the appropriate port. - */ - -void tcp_err(int type, int code, unsigned char *header, __u32 daddr, - __u32 saddr, struct inet_protocol *protocol) -{ - struct tcphdr *th = (struct tcphdr *)header; - struct sock *sk; - - /* - * This one is _WRONG_. FIXME urgently. - */ -#ifndef CONFIG_NO_PATH_MTU_DISCOVERY - struct iphdr *iph=(struct iphdr *)(header-sizeof(struct iphdr)); -#endif - th =(struct tcphdr *)header; - sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr, 0, 0); - - if (sk == NULL) - return; - - if (type == ICMP_SOURCE_QUENCH) - { - /* - * FIXME: - * Follow BSD for now and just reduce cong_window to 1 again. - * It is possible that we just want to reduce the - * window by 1/2, or that we want to reduce ssthresh by 1/2 - * here as well. - */ - sk->cong_window = 1; - sk->high_seq = sk->sent_seq; - return; - } - - if (type == ICMP_PARAMETERPROB) - { - sk->err=EPROTO; - sk->error_report(sk); - } - -#ifndef CONFIG_NO_PATH_MTU_DISCOVERY - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) - { - struct rtable * rt; - /* - * Ugly trick to pass MTU to protocol layer. - * Really we should add argument "info" to error handler. - */ - unsigned short new_mtu = ntohs(iph->id); - - if ((rt = sk->ip_route_cache) != NULL) - if (rt->rt_mtu > new_mtu) - rt->rt_mtu = new_mtu; - - /* - * FIXME:: - * Not the nicest of fixes: Lose a MTU update if the socket is - * locked this instant. Not the right answer but will be best - * for the production fix. Make 2.1 work right! - */ - - if (sk->mtu > new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr) - && new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr) && !sk->users) - sk->mtu = new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr); - - return; - } -#endif - - /* - * If we've already connected we will keep trying - * until we time out, or the user gives up. - */ - - if(code<=NR_ICMP_UNREACH) - { - if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) - { - sk->err = icmp_err_convert[code].errno; - if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) - { - tcp_statistics.TcpAttemptFails++; - tcp_set_state(sk,TCP_CLOSE); - sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ - } - } - else /* Only an error on timeout */ - sk->err_soft = icmp_err_convert[code].errno; - } -} - - -/* - * Walk down the receive queue counting readable data until we hit the end or we find a gap - * in the received data queue (ie a frame missing that needs sending to us). Not - * sorting using two queues as data arrives makes life so much harder. + * Walk down the receive queue counting readable data until we hit the + * end or we find a gap in the received data queue (ie a frame missing + * that needs sending to us). */ static int tcp_readable(struct sock *sk) @@ -627,13 +528,19 @@ do { - if (before(counted, skb->seq)) /* Found a hole so stops here */ + /* Found a hole so stops here */ + if (before(counted, skb->seq)) break; - sum = skb->len - (counted - skb->seq); /* Length - header but start from where we are up to (avoid overlaps) */ + /* + * Length - header but start from where we are up to + * avoid overlaps + */ + sum = skb->len - (counted - skb->seq); if (skb->h.th->syn) sum++; if (sum > 0) - { /* Add it up, move on */ + { + /* Add it up, move on */ amount += sum; if (skb->h.th->syn) amount--; @@ -655,9 +562,13 @@ * and a blocking read(). And the queue scan in tcp_read() * was correct. Mike */ + + /* don't count urg data */ if (skb->h.th->urg) - amount--; /* don't count urg data */ -/* if (amount && skb->h.th->psh) break;*/ + amount--; +#if 0 + if (amount && skb->h.th->psh) break; +#endif skb = skb->next; } while(skb != (struct sk_buff *)&sk->receive_queue); @@ -674,12 +585,12 @@ static int tcp_listen_select(struct sock *sk, int sel_type, select_table *wait) { if (sel_type == SEL_IN) { - struct sk_buff * skb; + struct open_request *req; lock_sock(sk); - skb = tcp_find_established(sk); + req = tcp_find_established(&sk->tp_pinfo.af_tcp); release_sock(sk); - if (skb) + if (req) return 1; select_wait(sk->sleep,wait); return 0; @@ -687,7 +598,6 @@ return 0; } - /* * Wait for a TCP event. * @@ -695,8 +605,10 @@ * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ -static int tcp_select(struct sock *sk, int sel_type, select_table *wait) +int tcp_select(struct sock *sk, int sel_type, select_table *wait) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + if (sk->state == TCP_LISTEN) return tcp_listen_select(sk, sel_type, wait); @@ -709,12 +621,12 @@ if (sk->shutdown & RCV_SHUTDOWN) return 1; - - if (sk->acked_seq == sk->copied_seq) + + if (tp->rcv_nxt == sk->copied_seq) break; if (sk->urg_seq != sk->copied_seq || - sk->acked_seq != sk->copied_seq+1 || + tp->rcv_nxt != sk->copied_seq+1 || sk->urginline || !sk->urg_data) return 1; break; @@ -726,7 +638,12 @@ return 0; if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) break; - if (sk->wmem_alloc*2 > sk->sndbuf) + /* + * This is now right thanks to a small fix + * by Matt Dillon. + */ + + if (sock_wspace(sk) < sk->mtu+128+sk->prot->max_header) break; return 1; @@ -792,50 +709,21 @@ } -/* - * This routine computes a TCP checksum. - * - * Modified January 1995 from a go-faster DOS routine by - * Jorge Cwik +/* + * This routine builds a generic TCP header. */ -#undef DEBUG_TCP_CHECK -void tcp_send_check(struct tcphdr *th, unsigned long saddr, - unsigned long daddr, int len, struct sk_buff *skb) -{ -#ifdef DEBUG_TCP_CHECK - u16 check; -#endif - th->check = 0; - th->check = tcp_check(th, len, saddr, daddr, - csum_partial((char *)th,sizeof(*th),skb->csum)); - -#ifdef DEBUG_TCP_CHECK - check = th->check; - th->check = 0; - th->check = tcp_check(th, len, saddr, daddr, - csum_partial((char *)th,len,0)); - if (check != th->check) { - static int count = 0; - if (++count < 10) { - printk("Checksum %x (%x) from %p\n", th->check, check, - (&th)[-1]); - printk("TCP=\n", th->doff*4, th->ack, th->syn, th->fin); - } - } -#endif -} - - -/* - * This routine builds a generic TCP header. - */ - -static inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push) + +extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push) { + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); memcpy(th,(void *) &(sk->dummy_th), sizeof(*th)); - th->psh = (push == 0) ? 1 : 0; th->seq = htonl(sk->write_seq); - th->ack_seq = htonl(sk->acked_seq); +#if 0 + th->psh =(push == 0) ? 1 : 0; +#endif + sk->bytes_rcv = 0; + sk->ack_timed = 0; + th->ack_seq = htonl(tp->rcv_nxt); th->window = htons(tcp_select_window(sk)); return(sizeof(*th)); @@ -890,187 +778,173 @@ lock_sock(sk); } -/* - * Add more stuff to the end of skb->len - */ -static int fill_in_partial_skb(struct sock *sk, struct sk_buff *skb, - unsigned char * from, int seglen) -{ - void (*send)(struct sock *sk, struct sk_buff *skb); - int copy, tcp_size; - tcp_size = skb->tail - (unsigned char *)(skb->h.th + 1); +static int tcp_append_tail(struct sock *sk, struct sk_buff *skb, u8 *from, + int tcp_size, int seglen) +{ + int fault; + int copy; - /* - * Now we may find the frame is as big, or too - * big for our MSS. Thats all fine. It means the - * MSS shrank (from an ICMP) after we allocated - * this frame. + /* + * Add more stuff to the end + * of the skb */ - copy = sk->mss - tcp_size; - if (copy <= 0) { - tcp_send_skb(sk, skb); - return 0; + copy = min(sk->mss - tcp_size, skb->end - skb->tail); + copy = min(copy, seglen); + + tcp_size += copy; + + fault = copy_from_user(skb->tail, from, copy); + + if (fault) + { + return -1; } - /* - * Otherwise continue to fill the buffer. - */ - send = tcp_send_skb; - if (copy > seglen) { - send = tcp_enqueue_partial; - copy = seglen; - } - copy_from_user(skb->tail, from, copy); - tcp_size += copy; - skb->tail += copy; - skb->len += copy; + skb_put(skb, copy); skb->csum = csum_partial(skb->tail - tcp_size, tcp_size, 0); + sk->write_seq += copy; - if (!sk->packets_out) - send = tcp_send_skb; - send(sk, skb); + skb->end_seq += copy; + return copy; } - /* * This routine copies from a user buffer into a socket, * and starts the transmit system. */ -static int do_tcp_sendmsg(struct sock *sk, - int iovlen, struct iovec *iov, - int len, int nonblock, int flags) +int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov, + int len, int nonblock, int flags) { - int copied = 0; - struct device *dev = NULL; + int copied = 0; + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); - /* + /* * Wait for a connection to finish. */ while (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) { - if (sk->err) + + if (copied) + return copied; + + if (sk->err) return sock_error(sk); - + if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) { if (sk->keepopen) send_sig(SIGPIPE, current, 0); return -EPIPE; } - + if (nonblock) return -EAGAIN; - + if (current->signal & ~current->blocked) return -ERESTARTSYS; - + wait_for_tcp_connect(sk); } - + + /* * Ok commence sending */ - - while (--iovlen >= 0) + + while(--iovlen >= 0) { int seglen=iov->iov_len; unsigned char * from=iov->iov_base; + u32 actual_win; + iov++; - while(seglen > 0) + while(seglen > 0) { int copy; int tmp; struct sk_buff *skb; - void (*send)(struct sock *, struct sk_buff *); /* * Stop on errors */ - if (sk->err) + if (sk->err) { - if (copied) + if (copied) return copied; return sock_error(sk); } /* - * Make sure that we are established. + * Make sure that we are established. */ - if (sk->shutdown & SEND_SHUTDOWN) + if (sk->shutdown & SEND_SHUTDOWN) { if (copied) return copied; send_sig(SIGPIPE,current,0); return -EPIPE; } - - /* - * The following code can result in copy <= if sk->mss is ever - * decreased. It shouldn't be. sk->mss is min(sk->mtu, sk->max_window). - * sk->mtu is constant once SYN processing is finished. I.e. we - * had better not get here until we've seen his SYN and at least one - * valid ack. (The SYN sets sk->mtu and the ack sets sk->max_window.) - * But ESTABLISHED should guarantee that. sk->max_window is by definition - * non-decreasing. Note that any ioctl to set user_mss must be done - * before the exchange of SYN's. If the initial ack from the other - * end has a window of 0, max_window and thus mss will both be 0. + + /* + *Now we need to check if we have a half built packet. */ - /* - * Now we need to check if we have a half built packet. - */ -#ifndef CONFIG_NO_PATH_MTU_DISCOVERY - /* - * Really, we should rebuild all the queues... - * It's difficult. Temporary hack is to send all - * queued segments with allowed fragmentation. - */ + /* if we have queued packets */ + if (tp->send_head && !(flags & MSG_OOB) ) { + int tcp_size; + + /* Tail */ + + skb = sk->write_queue.prev; + tcp_size = skb->tail - + (unsigned char *)(skb->h.th + 1); + /* - * new_mss may be zero. That indicates - * we don't have a window estimate for - * the remote box yet. - * -- AC + * This window_seq test is somewhat dangerous + * If the remote does SWS avoidance we should + * queue the best we can + * if not we should in fact send multiple + * packets... + * a method for detecting this would be most + * welcome */ - - int new_mss = min(sk->mtu, sk->max_window); - if (new_mss && new_mss < sk->mss) - { - tcp_send_partial(sk); - sk->mss = new_mss; - } - } -#endif - /* - * If there is a partly filled frame we can fill - * out. - */ - skb = tcp_dequeue_partial(sk); - if (skb) { - if (!(flags & MSG_OOB)) { - int retval; - retval = fill_in_partial_skb(sk, skb, from, seglen); - if (retval < 0) - return retval; - seglen -= retval; - from += retval; - copied += retval; - len -= retval; + if (skb->end > skb->tail && + sk->mss - tcp_size > 0 && + skb->end_seq < tp->snd_una + tp->snd_wnd) + { + + copy = tcp_append_tail(sk, skb, from, + tcp_size, + seglen); + if (copy == -1) + { + return -EFAULT; + } + + from += copy; + copied += copy; + len -= copy; + seglen -= copy; + + /* + * FIXME: if we're nagling we + * should send here. + */ continue; } - tcp_send_skb(sk, skb); - continue; } + /* - * We also need to worry about the window. - * If window < 1/2 the maximum window we've seen from this + * We also need to worry about the window. + * If window < 1/2 the maximum window we've seen from this * host, don't use it. This is sender side * silly window prevention, as specified in RFC1122. * (Note that this is different than earlier versions of @@ -1080,40 +954,51 @@ * be queued for later rather than sent. */ - copy = sk->window_seq - sk->write_seq; - if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss) - copy = sk->mss; - if (copy > seglen) - copy = seglen; + copy = min(seglen, sk->mss); + + actual_win = tp->snd_wnd - (tp->snd_nxt - tp->snd_una); + + if (copy > actual_win && + actual_win >= (sk->max_window >> 1)) + { + copy = actual_win; + } + if (copy <= 0) { - printk(KERN_CRIT "TCP: **bug**: copy=%d, sk->mss=%d\n", copy, sk->mss); - return -EFAULT; + printk(KERN_DEBUG "sendmsg: copy < 0\n"); + return -EIO; } /* - * We should really check the window here also. + * If sk->packets_out > 0 segment will be nagled + * else we kick it right away */ - send = tcp_send_skb; - tmp = copy + sk->prot->max_header + 15; - if (copy < sk->mss && !(flags & MSG_OOB) && sk->packets_out) + tmp = MAX_HEADER + sk->prot->max_header + + sizeof(struct sk_buff) + 15; + if (copy < min(sk->mss, sk->max_window >> 1) && + !(flags & MSG_OOB) && sk->packets_out) + { + tmp += min(sk->mss, sk->max_window); + } + else { - tmp = tmp - copy + sk->mtu + 128; - send = tcp_enqueue_partial; + tmp += copy; } + skb = sock_wmalloc(sk, tmp, 0, GFP_KERNEL); - + /* - * If we didn't get any memory, we need to sleep. + * If we didn't get any memory, we need to sleep. */ - - if (skb == NULL) + + if (skb == NULL) { sk->socket->flags |= SO_NOSPACE; - if (nonblock) + if (nonblock) { - if (copied) + if (copied) return copied; return -EAGAIN; } @@ -1132,27 +1017,14 @@ skb->sk = sk; skb->free = 0; skb->localroute = sk->localroute|(flags&MSG_DONTROUTE); - + /* * FIXME: we need to optimize this. * Perhaps some hints here would be good. */ - tmp = sk->prot->build_header(skb, sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, skb->truesize,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache); - if (tmp < 0 ) - { - sock_wfree(sk, skb); - if (copied) - return(copied); - return(tmp); - } -#ifndef CONFIG_NO_PATH_MTU_DISCOVERY - skb->ip_hdr->frag_off |= htons(IP_DF); -#endif - skb->dev = dev; - skb->h.th =(struct tcphdr *)skb_put(skb,sizeof(struct tcphdr)); - tmp = tcp_build_header(skb->h.th, sk, seglen-copy); + tmp = tp->af_specific->build_net_header(sk, skb); + if (tmp < 0) { sock_wfree(sk, skb); @@ -1161,6 +1033,20 @@ return(tmp); } + skb->h.th =(struct tcphdr *) + skb_put(skb,sizeof(struct tcphdr)); + + seglen -= copy; + tmp = tcp_build_header(skb->h.th, sk, seglen || iovlen); + + if (tmp < 0) + { + sock_wfree(sk, skb); + if (copied) + return(copied); + return(tmp); + } + if (flags & MSG_OOB) { skb->h.th->urg = 1; @@ -1168,92 +1054,42 @@ } skb->csum = csum_partial_copy_fromuser(from, - skb->tail, copy, 0); - skb->tail += copy; - skb->len += copy; + skb_put(skb, copy), copy, 0); + from += copy; copied += copy; len -= copy; - seglen -= copy; - sk->write_seq += copy; skb->free = 0; - - send(sk, skb); + sk->write_seq += copy; + + tcp_send_skb(sk, skb); + + release_sock(sk); + lock_sock(sk); } } + sk->err = 0; return copied; } -static int tcp_sendmsg(struct sock *sk, struct msghdr *msg, - int len, int nonblock, int flags) -{ - int retval = -EINVAL; - - /* - * Do sanity checking for sendmsg/sendto/send - */ - - if (flags & ~(MSG_OOB|MSG_DONTROUTE)) - goto out; - if (msg->msg_name) { - struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name; - - if (msg->msg_namelen < sizeof(*addr)) - goto out; - if (addr->sin_family && addr->sin_family != AF_INET) - goto out; - retval = -ENOTCONN; - if(sk->state == TCP_CLOSE) - goto out; - retval = -EISCONN; - if (addr->sin_port != sk->dummy_th.dest) - goto out; - if (addr->sin_addr.s_addr != sk->daddr) - goto out; - } - - lock_sock(sk); - retval = do_tcp_sendmsg(sk, msg->msg_iovlen, msg->msg_iov, len, nonblock, flags); + /* - * Nagle's rule. Turn Nagle off with TCP_NODELAY for highly - * interactive fast network servers. It's meant to be on and - * it really improves the throughput though not the echo time - * on my slow slip link - Alan - * - * If not nagling we can send on the before case too.. + * Send an ack if one is backlogged at this point. Ought to merge + * this with tcp_send_ack(). + * This is called for delayed acks also. */ - - if (sk->partial) { - if (!sk->packets_out || - (sk->nonagle && before(sk->write_seq , sk->window_seq))) { - tcp_send_partial(sk); - } - } - - release_sock(sk); - -out: - return retval; -} - - -/* - * Send an ack if one is backlogged at this point. - */ - + void tcp_read_wakeup(struct sock *sk) { - if (!sk->ack_backlog) - return; - /* * If we're closed, don't send an ack, or we'll get a RST * from the closed destination. */ + if ((sk->state == TCP_CLOSE) || (sk->state == TCP_TIME_WAIT)) return; @@ -1267,8 +1103,11 @@ */ static int tcp_recv_urg(struct sock * sk, int nonblock, - struct msghdr *msg, int len, int flags, int *addr_len) + struct msghdr *msg, int len, int flags, + int *addr_len) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + /* * No URG data to read */ @@ -1302,13 +1141,12 @@ memcpy_toiovec(msg->msg_iov, &c, 1); if(msg->msg_name) { - struct sockaddr_in *sin=(struct sockaddr_in *)msg->msg_name; - sin->sin_family=AF_INET; - sin->sin_addr.s_addr=sk->daddr; - sin->sin_port=sk->dummy_th.dest; + tp->af_specific->addr2sockaddr(sk, (struct sockaddr *) + msg->msg_name); } if(addr_len) - *addr_len=sizeof(struct sockaddr_in); + *addr_len= tp->af_specific->sockaddr_len; + release_sock(sk); return 1; } @@ -1332,46 +1170,57 @@ static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb) { + sk->ack_backlog++; + skb->sk = sk; __skb_unlink(skb, &sk->receive_queue); kfree_skb(skb, FREE_READ); } -/* - * FIXME: - * This routine frees used buffers. - * It should consider sending an ACK to let the - * other end know we now have a bigger window. - */ static void cleanup_rbuf(struct sock *sk) { + struct sk_buff *skb; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + /* * NOTE! The socket must be locked, so that we don't get * a messed-up receive queue. */ - while (!skb_queue_empty(&sk->receive_queue)) { - struct sk_buff *skb = sk->receive_queue.next; + + while ((skb=skb_peek(&sk->receive_queue)) != NULL) { if (!skb->used || skb->users) break; tcp_eat_skb(sk, skb); } - + + if(sk->debug) + printk("sk->rspace = %lu\n", sock_rspace(sk)); + /* - * Tell the world if we raised the window. + * We send a ACK if the sender is blocked + * else let tcp_data deal with the acking policy. */ - if (tcp_raise_window(sk)) - tcp_send_ack(sk); -} + + if (sock_rspace(sk) > tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup) && + (tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup) < sk->mss)) + { + /* Send an ack right now. */ + sk->delayed_acks++; + tcp_read_wakeup(sk); + } + +} /* - * This routine copies from a sock struct into the user buffer. + * This routine copies from a sock struct into the user buffer. */ - -static int tcp_recvmsg(struct sock *sk, struct msghdr *msg, - int len, int nonblock, int flags, int *addr_len) + +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, + int len, int nonblock, int flags, int *addr_len) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct wait_queue wait = { current, NULL }; int copied = 0; u32 peek_seq; @@ -1434,11 +1283,20 @@ current->state = TASK_INTERRUPTIBLE; - skb = sk->receive_queue.next; - while (skb != (struct sk_buff *)&sk->receive_queue) + skb = skb_peek(&sk->receive_queue); + do { - if (before(*seq, skb->seq)) + if (!skb) break; + /* + * now that we have two receive queues this + * shouldn't happen + */ + if (before(*seq, skb->seq)) { + printk("recvmsg bug: copied %X seq %X\n", + *seq, skb->seq); + break; + } offset = *seq - skb->seq; if (skb->h.th->syn) offset--; @@ -1450,6 +1308,7 @@ skb->used = 1; skb = skb->next; } + while (skb != (struct sk_buff *)&sk->receive_queue); if (copied) break; @@ -1539,7 +1398,7 @@ *seq += used; /* - * This copy_to_user can sleep. If it sleeps and we + * This memcpy_tofs can sleep. If it sleeps and we * do a second read it relies on the skb->users to avoid * a crash when cleanup_rbuf() gets called. */ @@ -1590,15 +1449,13 @@ } - if(copied>0 && msg->msg_name) + if(copied > 0 && msg->msg_name) { - struct sockaddr_in *sin=(struct sockaddr_in *)msg->msg_name; - sin->sin_family=AF_INET; - sin->sin_addr.s_addr=sk->daddr; - sin->sin_port=sk->dummy_th.dest; + tp->af_specific->addr2sockaddr(sk, (struct sockaddr *) + msg->msg_name); } if(addr_len) - *addr_len=sizeof(struct sockaddr_in); + *addr_len= tp->af_specific->sockaddr_len; remove_wait_queue(sk->sleep, &wait); current->state = TASK_RUNNING; @@ -1708,19 +1565,16 @@ sk->shutdown |= SEND_SHUTDOWN; /* - * Clear out any half completed packets. + * Clear out any half completed packets. */ - if (sk->partial) - tcp_send_partial(sk); - /* * FIN if needed */ - + if (tcp_close_state(sk,0)) tcp_send_fin(sk); - + release_sock(sk); } @@ -1741,7 +1595,7 @@ } -static void tcp_close(struct sock *sk, unsigned long timeout) +void tcp_close(struct sock *sk, unsigned long timeout) { struct sk_buff *skb; @@ -1774,22 +1628,16 @@ * descriptor close, not protocol-sourced closes, because the * reader process may not have drained the data yet! */ - + while((skb=skb_dequeue(&sk->receive_queue))!=NULL) kfree_skb(skb, FREE_READ); - /* - * Get rid off any half-completed packets. - */ - - if (sk->partial) - tcp_send_partial(sk); - + /* * Timeout is not the same thing - however the code likes * to send both the same way (sigh). */ - + if (tcp_close_state(sk,1)==1) { tcp_send_fin(sk); @@ -1836,14 +1684,13 @@ /* - * Wait for an incoming connection, avoid race - * conditions. This must be called with the socket - * locked. + * Wait for an incoming connection, avoid race + * conditions. This must be called with the socket locked. */ -static struct sk_buff * wait_for_connect(struct sock * sk) +static struct open_request * wait_for_connect(struct sock * sk) { struct wait_queue wait = { current, NULL }; - struct sk_buff * skb = NULL; + struct open_request *req = NULL; add_wait_queue(sk->sleep, &wait); for (;;) { @@ -1851,27 +1698,29 @@ release_sock(sk); schedule(); lock_sock(sk); - skb = tcp_find_established(sk); - if (skb) + req = tcp_find_established(&(sk->tp_pinfo.af_tcp)); + if (req) break; if (current->signal & ~current->blocked) break; } remove_wait_queue(sk->sleep, &wait); - return skb; + return req; } + /* * This will accept the next outstanding connection. * * Be careful about race conditions here - this is subtle. */ -static struct sock *tcp_accept(struct sock *sk, int flags) +struct sock *tcp_accept(struct sock *sk, int flags) { - int error; - struct sk_buff *skb; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct open_request *req; struct sock *newsk = NULL; + int error; /* * We need to make sure that this socket is listening, @@ -1884,12 +1733,12 @@ lock_sock(sk); - skb = tcp_find_established(sk); - if (skb) { + req = tcp_find_established(tp); + if (req) { got_new_connect: - __skb_unlink(skb, &sk->receive_queue); - newsk = skb->sk; - kfree_skb(skb, FREE_READ); + tcp_synq_unlink(tp, req); + newsk = req->sk; + kfree(req); sk->ack_backlog--; error = 0; out: @@ -1902,238 +1751,35 @@ error = EAGAIN; if (flags & O_NONBLOCK) goto out; - skb = wait_for_connect(sk); - if (skb) + req = wait_for_connect(sk); + if (req) goto got_new_connect; error = ERESTARTSYS; goto out; } -/* - * Check that a TCP address is unique, don't allow multiple - * connects to/from the same address - */ -static int tcp_unique_address(u32 saddr, u16 snum, u32 daddr, u16 dnum) -{ - int retval = 1; - struct sock * sk; - - /* Make sure we are allowed to connect here. */ - cli(); - for (sk = tcp_prot.sock_array[snum & (SOCK_ARRAY_SIZE -1)]; - sk != NULL; sk = sk->next) - { - /* hash collision? */ - if (sk->num != snum) - continue; - if (sk->saddr != saddr) - continue; - if (sk->daddr != daddr) - continue; - if (sk->dummy_th.dest != dnum) - continue; - retval = 0; - break; - } - sti(); - return retval; -} - /* - * This will initiate an outgoing connection. + * Socket option code for TCP. */ - -static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) + +int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, + int optlen) { - struct sk_buff *buff; - struct device *dev=NULL; - unsigned char *ptr; - int tmp; - int atype; - struct tcphdr *t1; - struct rtable *rt; - - if (sk->state != TCP_CLOSE) - return(-EISCONN); - - /* - * Don't allow a double connect. - */ - - if(sk->daddr) - return -EINVAL; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int val; - if (addr_len < 8) - return(-EINVAL); - - if (usin->sin_family && usin->sin_family != AF_INET) - return(-EAFNOSUPPORT); - - /* - * connect() to INADDR_ANY means loopback (BSD'ism). - */ - - if (usin->sin_addr.s_addr==INADDR_ANY) - usin->sin_addr.s_addr=ip_my_addr(); - - /* - * Don't want a TCP connection going to a broadcast address - */ - - if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) - return -ENETUNREACH; - - if (!tcp_unique_address(sk->saddr, sk->num, usin->sin_addr.s_addr, usin->sin_port)) - return -EADDRNOTAVAIL; - - lock_sock(sk); - sk->daddr = usin->sin_addr.s_addr; - - sk->rcv_ack_cnt = 1; - sk->err = 0; - sk->dummy_th.dest = usin->sin_port; - - buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL); - if (buff == NULL) + if (level != SOL_TCP) { - release_sock(sk); - return(-ENOMEM); + return tp->af_specific->setsockopt(sk, level, optname, + optval, optlen); } - buff->sk = sk; - buff->free = 0; - buff->localroute = sk->localroute; - - /* - * Put in the IP header and routing stuff. - */ - - tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache); - if (tmp < 0) - { - sock_wfree(sk, buff); - release_sock(sk); - return(-ENETUNREACH); - } - if ((rt = sk->ip_route_cache) != NULL && !sk->saddr) - sk->saddr = rt->rt_src; - sk->rcv_saddr = sk->saddr; - - /* - * Set up our outgoing TCP sequence number - */ - sk->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, - sk->dummy_th.source, - usin->sin_port); - sk->window_seq = sk->write_seq; - sk->rcv_ack_seq = sk->write_seq -1; - - t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr)); - - memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1)); - buff->seq = sk->write_seq++; - t1->seq = htonl(buff->seq); - sk->sent_seq = sk->write_seq; - buff->end_seq = sk->write_seq; - t1->ack = 0; - t1->window = 2; - t1->syn = 1; - t1->doff = 6; - /* use 512 or whatever user asked for */ - - if(rt!=NULL && (rt->rt_flags&RTF_WINDOW)) - sk->window_clamp=rt->rt_window; - else - sk->window_clamp=0; - - if (sk->user_mss) - sk->mtu = sk->user_mss; - else if (rt) - sk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr); - else - sk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr); - - /* - * but not bigger than device MTU - */ - - if(sk->mtu <32) - sk->mtu = 32; /* Sanity limit */ - - sk->mtu = min(sk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)); - -#ifdef CONFIG_SKIP - - /* - * SKIP devices set their MTU to 65535. This is so they can take packets - * unfragmented to security process then fragment. They could lie to the - * TCP layer about a suitable MTU, but it's easier to let skip sort it out - * simply because the final package we want unfragmented is going to be - * - * [IPHDR][IPSP][Security data][Modified TCP data][Security data] - */ - - if(skip_pick_mtu!=NULL) /* If SKIP is loaded.. */ - sk->mtu=skip_pick_mtu(sk->mtu,dev); -#endif - - /* - * Put in the TCP options to say MTU. - */ - - ptr = skb_put(buff,4); - ptr[0] = 2; - ptr[1] = 4; - ptr[2] = (sk->mtu) >> 8; - ptr[3] = (sk->mtu) & 0xff; - buff->csum = csum_partial(ptr, 4, 0); - tcp_send_check(t1, sk->saddr, sk->daddr, - sizeof(struct tcphdr) + 4, buff); - - /* - * This must go first otherwise a really quick response will get reset. - */ - - tcp_cache_zap(); - tcp_set_state(sk,TCP_SYN_SENT); - if(rt&&rt->rt_flags&RTF_IRTT) - sk->rto = rt->rt_irtt; - else - sk->rto = TCP_TIMEOUT_INIT; - sk->delack_timer.function = tcp_delack_timer; - sk->delack_timer.data = (unsigned long) sk; - sk->retransmit_timer.function = tcp_retransmit_timer; - sk->retransmit_timer.data = (unsigned long)sk; - sk->retransmits = 0; - sk->prot->queue_xmit(sk, dev, buff, 0); - tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); - tcp_statistics.TcpActiveOpens++; - tcp_statistics.TcpOutSegs++; - - release_sock(sk); - return(0); -} - -/* - * Socket option code for TCP. - */ - -int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) -{ - int val,err; - - if(level!=SOL_TCP) - return ip_setsockopt(sk,level,optname,optval,optlen); if (optval == NULL) return(-EINVAL); - err=verify_area(VERIFY_READ, optval, sizeof(int)); - if(err) - return err; - - get_user(val, (int *)optval); + if (get_user(val, (int *)optval)) + return -EFAULT; switch(optname) { @@ -2155,12 +1801,17 @@ } } -int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen) +int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, + int *optlen) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int val,err; - if(level!=SOL_TCP) - return ip_getsockopt(sk,level,optname,optval,optlen); + if(level != SOL_TCP) + { + return tp->af_specific->getsockopt(sk, level, optname, + optval, optlen); + } switch(optname) { @@ -2186,29 +1837,21 @@ return(0); } +void tcp_set_keepalive(struct sock *sk, int val) +{ + if (!sk->keepopen && val) + { + tcp_inc_slow_timer(TCP_SLT_KEEPALIVE); + } + else if (sk->keepopen && !val) + { + tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); + } +} -struct proto tcp_prot = { - tcp_close, - ip_build_header, - tcp_connect, - tcp_accept, - ip_queue_xmit, - tcp_retransmit, - tcp_write_wakeup, - tcp_read_wakeup, - tcp_rcv, - tcp_select, - tcp_ioctl, - NULL, - tcp_shutdown, - tcp_setsockopt, - tcp_getsockopt, - tcp_sendmsg, - tcp_recvmsg, - NULL, /* No special bind() */ - 128, - 0, - "TCP", - 0, 0, - {NULL,} -}; +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp.o tcp.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c --- v2.1.7/linux/net/ipv4/tcp_input.c Thu Oct 10 19:10:58 1996 +++ linux/net/ipv4/tcp_input.c Sun Nov 3 11:04:45 1996 @@ -18,81 +18,85 @@ * Matthew Dillon, * Arnt Gulbrandsen, * Jorge Cwik, + */ + +/* + * TODO + * - A better sock cache + * + */ + +/* + * Changes: + * Pedro Roque : Fast Retransmit/Recovery. + * Two receive queues. + * Retransmit queue handled by TCP. + * Better retransmit timer handling. + * New congestion avoidance. + * Header prediction. + * Variable renaming. * - * FIXES - * Pedro Roque : Double ACK bug + * Eric : Fast Retransmit. + * Randy Scott : MSS option defines. * Eric Schenk : Fixes to slow start algorithm. * Eric Schenk : Yet another double ACK bug. * Eric Schenk : Delayed ACK bug fixes. * Eric Schenk : Floyd style fast retrans war avoidance. - * Eric Schenk : Skip fast retransmit on small windows. - * Eric schenk : Fixes to retransmission code to - * : avoid extra retransmission. - * Theodore Ts'o : Do secure TCP sequence numbers. */ #include -#include -#include #include + /* - * Policy code extracted so it's now separate + * Policy code extracted so it's now seperate */ /* * Called each time to estimate the delayed ack timeout. This is - * how it should be done so a fast link isn't impacted by ack delay. + * how it should be done so a fast link isnt impacted by ack delay. + * + * I think we need a medium deviation here also... + * The estimated value is changing to fast */ -extern __inline__ void tcp_delack_estimator(struct sock *sk) +extern __inline__ void tcp_delack_estimator(struct tcp_opt *tp) { + int m; + /* * Delayed ACK time estimator. */ - if (sk->lrcvtime == 0) - { - sk->lrcvtime = jiffies; - sk->ato = HZ/3; - } - else + m = jiffies - tp->lrcvtime; + + tp->lrcvtime = jiffies; + + if (m < 0) + return; + + /* + * if the mesured value is bigger than + * twice the round trip time ignore it. + */ + if ((m << 2) <= tp->srtt) { - int m; - - m = jiffies - sk->lrcvtime; + m -= (tp->iat >> 3); + tp->iat += m; - sk->lrcvtime = jiffies; + if (m <0) + m = -m; - if (m <= 0) - m = 1; + m -= (tp->iat_mdev >> 2); + tp->iat_mdev += m; - /* This used to test against sk->rtt. - * On a purely receiving link, there is no rtt measure. - * The result is that we lose delayed ACKs on one-way links. - * Therefore we test against sk->rto, which will always - * at least have a default value. - */ - if (m > sk->rto) - { - sk->ato = sk->rto; - /* - * printk(KERN_DEBUG "ato: rtt %lu\n", sk->ato); - */ - } - else - { - /* - * Very fast acting estimator. - * May fluctuate too much. Probably we should be - * doing something like the rtt estimator here. - */ - sk->ato = (sk->ato >> 1) + m; - /* - * printk(KERN_DEBUG "ato: m %lu\n", sk->ato); - */ - } + tp->ato = (tp->iat >> 3) + (tp->iat_mdev >> 2); + + if (tp->ato < HZ/50) + tp->ato = HZ/50; } + else + tp->ato = 0; } /* @@ -100,8 +104,8 @@ * retransmitted [see Karn/Partridge Proceedings SIGCOMM 87]. * The algorithm is from the SIGCOMM 88 piece by Van Jacobson. */ - -extern __inline__ void tcp_rtt_estimator(struct sock *sk, struct sk_buff *oskb) + +extern __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt) { long m; /* @@ -111,130 +115,72 @@ * This is designed to be as fast as possible * m stands for "measurement". */ - - m = jiffies - oskb->when; /* RTT */ + /* + * On a 1990 paper the rto value is changed to: + * RTO = rtt + 4 * mdev + */ - if (sk->rtt != 0) { + m = mrtt; /* RTT */ + + if (tp->srtt != 0) { if(m<=0) m=1; /* IS THIS RIGHT FOR <0 ??? */ - m -= (sk->rtt >> 3); /* m is now error in rtt est */ - sk->rtt += m; /* rtt = 7/8 rtt + 1/8 new */ + m -= (tp->srtt >> 3); /* m is now error in rtt est */ + tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) m = -m; /* m is now abs(error) */ - m -= (sk->mdev >> 2); /* similar update on mdev */ - sk->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ + m -= (tp->mdev >> 2); /* similar update on mdev */ + tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ } else { - /* no previous measure. */ - sk->rtt = m<<3; /* take the measured time to be rtt */ - sk->mdev = m<<1; /* make sure rto = 3*rtt */ + /* no previous measure. */ + tp->srtt = m<<3; /* take the measured time to be rtt */ + tp->mdev = m<<2; /* make sure rto = 3*rtt */ } + /* * Now update timeout. Note that this removes any backoff. */ - /* Jacobson's algorithm calls for rto = R + 4V. - * We diverge from Jacobson's algorithm here. See the commentary - * in tcp_ack to understand why. - */ - sk->rto = (sk->rtt >> 3) + sk->mdev; - sk->rto += (sk->rto>>2) + (sk->rto >> (sk->cong_window-1)); - if (sk->rto > 120*HZ) - sk->rto = 120*HZ; - if (sk->rto < HZ/5) /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */ - sk->rto = HZ/5; - sk->backoff = 0; -} - -/* - * Cached last hit socket - */ - -static volatile unsigned long th_cache_saddr, th_cache_daddr; -static volatile unsigned short th_cache_dport, th_cache_sport; -static volatile struct sock *th_cache_sk; - -void tcp_cache_zap(void) -{ - th_cache_sk=NULL; -} + tp->rto = (tp->srtt >> 3) + tp->mdev; -/* - * Find the socket, using the last hit cache if applicable. The cache is not quite - * right... - */ + if (tp->rto > 120*HZ) + tp->rto = 120*HZ; -static inline struct sock * get_tcp_sock(u32 saddr, u16 sport, u32 daddr, u16 dport, u32 paddr, u16 pport) -{ - struct sock * sk; + /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */ + if (tp->rto < HZ/5) + tp->rto = HZ/5; - sk = (struct sock *) th_cache_sk; - if (!sk || saddr != th_cache_saddr || daddr != th_cache_daddr || - sport != th_cache_sport || dport != th_cache_dport) { - sk = get_sock(&tcp_prot, dport, saddr, sport, daddr, paddr, pport); - if (sk) { - th_cache_saddr=saddr; - th_cache_daddr=daddr; - th_cache_dport=dport; - th_cache_sport=sport; - th_cache_sk=sk; - } - } - return sk; + tp->backoff = 0; } + /* - * React to an out-of-window TCP sequence number in an incoming packet + * This functions checks to see if the tcp header is actually acceptable. */ -static void bad_tcp_sequence(struct sock *sk, struct tcphdr *th, u32 end_seq, - struct device *dev) +extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 seg_nxt) { - if (th->rst) - return; + u32 end_window = tp->rcv_wup + tp->rcv_wnd; + u32 end_seq = seg_nxt; /* - * Send a reset if we get something not ours and we are - * unsynchronized. Note: We don't do anything to our end. We - * are just killing the bogus remote connection then we will - * connect again and it will work (with luck). + * When the window is open (most common case) + * we want to accept segments if they have yet unseen data + * or in the case of a dataless segment if seg.seq == rcv.nxt + * this means: + * + * if (seq == end_seq) + * end_seq >= rcv.nxt + * else + * end_seq > rcv.nxt */ - - if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) - { - tcp_send_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl); - return; - } - /* - * This packet is old news. Usually this is just a resend - * from the far end, but sometimes it means the far end lost - * an ACK we sent, so we better send an ACK. - */ - /* - * BEWARE! Unconditional answering by ack to out-of-window ack - * can result in infinite exchange of empty acks. - * This check cures bug, found by Michiel Boland, but - * not another possible cases. - * If we are in TCP_TIME_WAIT, we have already received - * FIN, so that our peer need not window update. If our - * ACK were lost, peer would retransmit his FIN anyway. --ANK - */ - if (sk->state != TCP_TIME_WAIT || ntohl(th->seq) != end_seq) - tcp_send_ack(sk); -} + if (seq == end_seq) + end_seq++; -/* - * This functions checks to see if the tcp header is actually acceptable. - */ - -extern __inline__ int tcp_sequence(struct sock *sk, u32 seq, u32 end_seq) -{ - u32 end_window = sk->lastwin_seq + sk->window; - return /* if start is at end of window, end must be too (zero window) */ - (seq == end_window && seq == end_seq) || - /* if start is before end of window, check for interest */ - (before(seq, end_window) && !before(end_seq, sk->acked_seq)); + return ((before(seq, end_window) && after(end_seq, tp->rcv_nxt)) || + (seq == end_window && seq == end_seq)); } /* @@ -273,7 +219,7 @@ #endif if (!sk->dead) sk->state_change(sk); - kfree_skb(skb, FREE_READ); + return(0); } @@ -289,11 +235,11 @@ * as Linux gets deployed on 100Mb/sec networks. */ -static void tcp_options(struct sock *sk, struct tcphdr *th) +int tcp_parse_options(struct tcphdr *th) { unsigned char *ptr; int length=(th->doff*4)-sizeof(struct tcphdr); - int mss_seen = 0; + int mss = 0; ptr = (unsigned char *)(th + 1); @@ -304,7 +250,7 @@ switch(opcode) { case TCPOPT_EOL: - return; + return 0; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; ptr--; /* the opsize=*ptr++ above was a mistake */ @@ -312,14 +258,13 @@ default: if(opsize<=2) /* Avoid silly options looping forever */ - return; + return 0; switch(opcode) { case TCPOPT_MSS: - if(opsize==4 && th->syn) + if(opsize==TCPOLEN_MSS && th->syn) { - sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr)); - mss_seen = 1; + mss = ntohs(*(unsigned short *)ptr); } break; /* Add other options here as people feel the urge to implement stuff like large windows */ @@ -328,612 +273,420 @@ length-=opsize; } } - if (th->syn) - { - if (! mss_seen) - sk->mtu=min(sk->mtu, 536); /* default MSS if none sent */ - } -#ifdef CONFIG_INET_PCTCP - sk->mss = min(sk->max_window >> 1, sk->mtu); -#else - sk->mss = min(sk->max_window, sk->mtu); - sk->max_unacked = 2 * sk->mss; -#endif + + return mss; } -/* - * This routine handles a connection request. - * It should make sure we haven't already responded. - * Because of the way BSD works, we have to send a syn/ack now. - * This also means it will be harder to close a socket which is - * listening. +/* + * See draft-stevens-tcpca-spec-01 for documentation. */ - -static void tcp_conn_request(struct sock *sk, struct sk_buff *skb, - u32 daddr, u32 saddr, struct options *opt, struct device *dev, u32 seq) -{ - struct sock *newsk; - struct tcphdr *th; - struct rtable *rt; - - th = skb->h.th; - /* If the socket is dead, don't accept the connection. */ - if (!sk->dead) - { - sk->data_ready(sk,0); - } - else - { - if(sk->debug) - printk("Reset on %p: Connect on dead socket.\n",sk); - tcp_send_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl); - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; - } +static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup) +{ + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); /* - * Make sure we can accept more. This will prevent a - * flurry of syns from eating up all our memory. - * - * BSD does some funnies here and allows 3/2 times the - * set backlog as a fudge factor. That's just too gross. + * An ACK is a duplicate if: + * (1) it has the same sequence number as the largest number we've + * seen, + * (2) it has the same window as the last ACK, + * (3) we have outstanding data that has not been ACKed + * (4) The packet was not carrying any data. + * (5) [From Floyds paper on fast retransmit wars] + * The packet acked data after high_seq; */ - if (sk->ack_backlog >= sk->max_ack_backlog) + if (ack == tp->snd_una && sk->packets_out && (not_dup == 0) && + after(ack, tp->high_seq)) { - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; - } - - /* - * We need to build a new sock struct. - * It is sort of bad to have a socket without an inode attached - * to it, but the wake_up's will just wake up the listening socket, - * and if the listening socket is destroyed before this is taken - * off of the queue, this will take care of it. - */ + + sk->dup_acks++; + - newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC); - if (newsk == NULL) - { - /* just ignore the syn. It will get retransmitted. */ - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; - } + /* + * 1. When the third duplicate ack is received, set ssthresh + * to one half the current congestion window, but no less + * than two segments. Retransmit the missing segment. + */ + + if (sk->dup_acks == 3) + { + sk->ssthresh = max(sk->cong_window >> 1, 2); + sk->cong_window = sk->ssthresh + 3; + tcp_do_retransmit(sk, 0); + } - memcpy(newsk, sk, sizeof(*newsk)); - newsk->opt = NULL; - newsk->ip_route_cache = NULL; - if (opt && opt->optlen) - { - sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC); - if (!sk->opt) + /* + * 2. Each time another duplicate ACK arrives, increment + * cwnd by the segment size. [...] Transmit a packet... + * + * Packet transmission will be done on normal flow processing + * since we're not in "retransmit mode" + */ + + if (sk->dup_acks > 3) { - kfree_s(newsk, sizeof(struct sock)); - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; + sk->cong_window++; } - if (ip_options_echo(sk->opt, opt, daddr, saddr, skb)) + } + else + { + /* + * 3. When the next ACK arrives that acknowledges new data, + * set cwnd to ssthresh + */ + + if (sk->dup_acks >= 3) { - kfree_s(sk->opt, sizeof(struct options)+opt->optlen); - kfree_s(newsk, sizeof(struct sock)); - tcp_statistics.TcpAttemptFails++; - kfree_skb(skb, FREE_READ); - return; + sk->tp_pinfo.af_tcp.retrans_head = NULL; + sk->cong_window = sk->ssthresh; + sk->retransmits = 0; } + sk->dup_acks = 0; } - skb_queue_head_init(&newsk->write_queue); - skb_queue_head_init(&newsk->receive_queue); - newsk->send_head = NULL; - newsk->send_tail = NULL; - newsk->send_next = NULL; - skb_queue_head_init(&newsk->back_log); - newsk->rtt = 0; - newsk->rto = TCP_TIMEOUT_INIT; - newsk->mdev = TCP_TIMEOUT_INIT; - newsk->max_window = 0; - /* - * See draft-stevens-tcpca-spec-01 for discussion of the - * initialization of these values. - */ - newsk->cong_window = 1; - newsk->cong_count = 0; - newsk->ssthresh = 0x7fffffff; - - newsk->lrcvtime = 0; - newsk->idletime = 0; - newsk->high_seq = 0; - newsk->backoff = 0; - newsk->blog = 0; - newsk->intr = 0; - newsk->proc = 0; - newsk->done = 0; - newsk->partial = NULL; - newsk->pair = NULL; - newsk->wmem_alloc = 0; - newsk->rmem_alloc = 0; - newsk->localroute = sk->localroute; - - newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; - - newsk->err = 0; - newsk->shutdown = 0; - newsk->ack_backlog = 0; - newsk->acked_seq = skb->seq+1; - newsk->lastwin_seq = skb->seq+1; - newsk->delay_acks = 1; - newsk->copied_seq = skb->seq+1; - newsk->fin_seq = skb->seq; - newsk->syn_seq = skb->seq; - newsk->state = TCP_SYN_RECV; - newsk->timeout = 0; - newsk->ip_xmit_timeout = 0; - newsk->write_seq = seq; - newsk->window_seq = newsk->write_seq; - newsk->rcv_ack_seq = newsk->write_seq; - newsk->urg_data = 0; - newsk->retransmits = 0; - newsk->linger=0; - newsk->destroy = 0; - init_timer(&newsk->timer); - newsk->timer.data = (unsigned long)newsk; - newsk->timer.function = &net_timer; - init_timer(&newsk->delack_timer); - newsk->delack_timer.data = (unsigned long)newsk; - newsk->delack_timer.function = tcp_delack_timer; - init_timer(&newsk->retransmit_timer); - newsk->retransmit_timer.data = (unsigned long)newsk; - newsk->retransmit_timer.function = tcp_retransmit_timer; - newsk->dummy_th.source = skb->h.th->dest; - newsk->dummy_th.dest = skb->h.th->source; -#ifdef CONFIG_IP_TRANSPARENT_PROXY - /* - * Deal with possibly redirected traffic by setting num to - * the intended destination port of the received packet. - */ - newsk->num = ntohs(skb->h.th->dest); - -#endif - /* - * Swap these two, they are from our point of view. - */ - - newsk->daddr = saddr; - newsk->saddr = daddr; - newsk->rcv_saddr = daddr; +} - put_sock(newsk->num,newsk); - newsk->acked_seq = skb->seq + 1; - newsk->copied_seq = skb->seq + 1; - newsk->socket = NULL; +int sysctl_tcp_vegas_cong_avoidance = 1; - /* - * Grab the ttl and tos values and use them - */ +/* + * TCP slow start and congestion avoidance in two flavors: + * RFC 1122 and TCP Vegas. + * + * This is a /proc/sys configurable option. + */ - newsk->ip_ttl=sk->ip_ttl; - newsk->ip_tos=skb->ip_hdr->tos; +#define SHIFT_FACTOR 12 +static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack, + u32 seq_rtt) +{ /* - * Use 512 or whatever user asked for + * From: + * TCP Vegas: New Techniques for Congestion + * Detection and Avoidance. + * + * + * Warning: This code is a scratch implementation taken + * from the paper only. The code they distribute seams + * to have improved several things over the initial spec. */ - /* - * Note use of sk->user_mss, since user has no direct access to newsk - */ + u32 Actual, Expected; + u32 snt_bytes; + struct tcp_opt * tp; - rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : saddr, 0); - newsk->ip_route_cache = rt; + tp = &(sk->tp_pinfo.af_tcp); + + if (!seq_rtt) + seq_rtt = 1; - if(rt!=NULL && (rt->rt_flags&RTF_WINDOW)) - newsk->window_clamp = rt->rt_window; + if (tp->basertt) + tp->basertt = min(seq_rtt, tp->basertt); else - newsk->window_clamp = 0; + tp->basertt = seq_rtt; + - if (sk->user_mss) - newsk->mtu = sk->user_mss; - else if (rt) - newsk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr); - else - newsk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr); - - /* - * But not bigger than device MTU - */ - - newsk->mtu = min(newsk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)); - -#ifdef CONFIG_SKIP - - /* - * SKIP devices set their MTU to 65535. This is so they can take packets - * unfragmented to security process then fragment. They could lie to the - * TCP layer about a suitable MTU, but it's easier to let skip sort it out - * simply because the final package we want unfragmented is going to be - * - * [IPHDR][IPSP][Security data][Modified TCP data][Security data] - */ - - if(skip_pick_mtu!=NULL) /* If SKIP is loaded.. */ - sk->mtu=skip_pick_mtu(sk->mtu,dev); -#endif /* - * This will min with what arrived in the packet + * + * Actual = throughput for this segment. + * Expected = number_of_bytes in transit / BaseRTT + * */ - tcp_options(newsk,skb->h.th); - - tcp_cache_zap(); - tcp_send_synack(newsk, sk, skb); -} - + snt_bytes = (ack - seq) << SHIFT_FACTOR; + + Actual = snt_bytes / seq_rtt; + Expected = ((tp->snd_nxt - tp->snd_una) << SHIFT_FACTOR) / tp->basertt; -/* - * Handle a TCP window that shrunk on us. It shouldn't happen, - * but.. - * - * We may need to move packets from the send queue - * to the write queue, if the window has been shrunk on us. - * The RFC says you are not allowed to shrink your window - * like this, but if the other end does, you must be able - * to deal with it. - */ -void tcp_window_shrunk(struct sock * sk, u32 window_seq) -{ - struct sk_buff *skb; - struct sk_buff *skb2; - struct sk_buff *wskb = NULL; - - skb2 = sk->send_head; - sk->send_head = NULL; - sk->send_tail = NULL; - sk->send_next = NULL; - - /* - * This is an artifact of a flawed concept. We want one - * queue and a smarter send routine when we send all. - */ - cli(); - while (skb2 != NULL) - { - skb = skb2; - skb2 = skb->link3; - skb->link3 = NULL; - if (after(skb->end_seq, window_seq)) - { - if (sk->packets_out > 0) - sk->packets_out--; - /* We may need to remove this from the dev send list. */ - if (skb->next != NULL) - { - skb_unlink(skb); - } - /* Now add it to the write_queue. */ - if (wskb == NULL) - skb_queue_head(&sk->write_queue,skb); - else - skb_append(wskb,skb); - wskb = skb; - } - else - { - if (sk->send_head == NULL) - { - sk->send_head = skb; - sk->send_tail = skb; - sk->send_next = skb; +/* + printk(KERN_DEBUG "A:%x E:%x rtt:%x srtt:%x win: %d\n", + Actual, Expected, seq_rtt, tp->srtt, sk->cong_window); + */ + /* + * Slow Start + */ + + if (sk->cong_window < sk->ssthresh && + (seq == tp->snd_nxt || + (((Expected - Actual) <= + ((TCP_VEGAS_GAMMA << SHIFT_FACTOR) * sk->mss / tp->basertt)) + ) + )) + { + + /* + * "Vegas allows exponential growth only every other + * RTT" + */ + + if (sk->cong_count || sk->cong_window <= 2) + { + sk->cong_window++; + sk->cong_count = 0; + } + else + sk->cong_count++; + } + else + { + /* + * Congestion Avoidance + */ + + if (Expected - Actual <= + ((TCP_VEGAS_ALPHA << SHIFT_FACTOR) * sk->mss / tp->basertt)) + { + /* Increase Linearly */ + + if (sk->cong_count >= sk->cong_window) + { + sk->cong_window++; + sk->cong_count = 0; } else + sk->cong_count++; + } + + if (Expected - Actual >= + ((TCP_VEGAS_BETA << SHIFT_FACTOR) * sk->mss / tp->basertt)) + { + /* Decrease Linearly */ + + if (sk->cong_count >= sk->cong_window) { - sk->send_tail->link3 = skb; - sk->send_tail = skb; + sk->cong_window--; + sk->cong_count = 0; } - skb->link3 = NULL; + else + sk->cong_count++; + + + /* Never less than 2 segments */ + if (sk->cong_window < 2) + sk->cong_window = 2; } + + + } +} + +static void tcp_cong_avoid_vanj(struct sock *sk, u32 seq, u32 ack, u32 seq_rtt) +{ + + /* + * This is Jacobson's slow start and congestion avoidance. + * SIGCOMM '88, p. 328. Because we keep cong_window in + * integral mss's, we can't do cwnd += 1 / cwnd. + * Instead, maintain a counter and increment it once every + * cwnd times. + */ + + if (sk->cong_window <= sk->ssthresh) + { + /* + * In "safe" area, increase + */ + + sk->cong_window++; } - sti(); + else + { + /* + * In dangerous area, increase slowly. + * In theory this is + * sk->cong_window += 1 / sk->cong_window + */ + + if (sk->cong_count >= sk->cong_window) { + + sk->cong_window++; + sk->cong_count = 0; + } + else + sk->cong_count++; + } } +#define FLAG_DATA 0x01 +#define FLAG_WIN_UPDATE 0x02 +#define FLAG_DATA_ACKED 0x04 + /* * This routine deals with incoming acks, but not outgoing ones. - * - * This routine is totally _WRONG_. The list structuring is wrong, - * the algorithm is wrong, the code is wrong. */ -static int tcp_ack(struct sock *sk, struct tcphdr *th, u32 ack, int len) +static int tcp_ack(struct sock *sk, struct tcphdr *th, + u32 ack_seq, u32 ack, int len) { int flag = 0; - u32 window_seq; + u32 seq = 0; + u32 seq_rtt = 0; + struct sk_buff *skb; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - /* - * 1 - there was data in packet as well as ack or new data is sent or - * in shutdown state - * 2 - data from retransmit queue was acked and removed - * 4 - window shrunk or data from retransmit queue was acked and removed - */ if(sk->zapped) return(1); /* Dead, can't ack any more so why bother */ - /* - * We have dropped back to keepalive timeouts. Thus we have - * no retransmits pending. - */ - if (sk->ip_xmit_timeout == TIME_KEEPOPEN) - sk->retransmits = 0; + if (tp->pending == TIME_KEEPOPEN) + { + tp->probes_out = 0; + } + tp->rcv_tstamp = jiffies; + /* * If the ack is newer than sent or older than previous acks * then we can probably ignore it. */ - if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) + if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una)) goto uninteresting_ack; /* - * Have we discovered a larger window + * If there is data set flag 1 */ - window_seq = ntohs(th->window); - if (window_seq > sk->max_window) + + if (len != th->doff*4) { - sk->max_window = window_seq; -#ifdef CONFIG_INET_PCTCP - /* Hack because we don't send partial packets to non SWS - handling hosts */ - sk->mss = min(window_seq>>1, sk->mtu); -#else - sk->mss = min(window_seq, sk->mtu); -#endif + flag |= FLAG_DATA; + tcp_delack_estimator(tp); } - window_seq += ack; /* - * See if our window has been shrunk. + * Update our send window */ - if (after(sk->window_seq, window_seq)) - tcp_window_shrunk(sk, window_seq); /* - * Pipe has emptied - */ - if (sk->send_tail == NULL || sk->send_head == NULL) + * This is the window update code as per RFC 793 + * snd_wl{1,2} are used to prevent unordered + * segments from shrinking the window + */ + + if ((tp->snd_wl1 == 0) || before(tp->snd_wl1, ack_seq) || + (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) { - sk->send_head = NULL; - sk->send_tail = NULL; - sk->send_next = NULL; - sk->packets_out= 0; + tp->snd_wnd = ntohs(th->window); + tp->snd_wl1 = ack_seq; + tp->snd_wl2 = ack; + + flag |= FLAG_WIN_UPDATE; + + if (tp->snd_wnd > sk->max_window) + { + sk->max_window = tp->snd_wnd; + } } + /* - * We don't want too many packets out there. + * We passed data and got it acked, remove any soft error + * log. Something worked... */ - if (sk->ip_xmit_timeout == TIME_WRITE && - sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) + sk->err_soft = 0; + + /* + * If this ack opens up a zero window, clear backoff. It was + * being used to time the probes, and is probably far higher than + * it needs to be for normal retransmission. + */ + + if (tp->pending == TIME_PROBE0) { + tp->probes_out = 0; /* Our probe was answered */ - /* - * This is Jacobson's slow start and congestion avoidance. - * SIGCOMM '88, p. 328. Because we keep cong_window in integral - * mss's, we can't do cwnd += 1 / cwnd. Instead, maintain a - * counter and increment it once every cwnd times. It's possible - * that this should be done only if sk->retransmits == 0. I'm - * interpreting "new data is acked" as including data that has - * been retransmitted but is just now being acked. + /* + * Was it a usable window open ? */ - if (sk->cong_window <= sk->ssthresh) - /* - * In "safe" area, increase - */ - sk->cong_window++; - else + + /* should always be non-null */ + if (tp->send_head != NULL && + !before (ack + tp->snd_wnd, tp->send_head->end_seq)) { - /* - * In dangerous area, increase slowly. In theory this is - * sk->cong_window += 1 / sk->cong_window - */ - if (sk->cong_count >= sk->cong_window) - { - sk->cong_window++; - sk->cong_count = 0; - } - else - sk->cong_count++; - } - } + tp->backoff = 0; + tp->pending = 0; - /* - * Remember the highest ack received and update the - * right hand window edge of the host. - * We do a bit of work here to track number of times we've - * seen this ack without a change in the right edge of the - * window and no data in the packet. - * This will allow us to do fast retransmits. - */ - - /* We are looking for duplicate ACKs here. - * An ACK is a duplicate if: - * (1) it has the same sequence number as the largest number we've seen, - * (2) it has the same window as the last ACK, - * (3) we have outstanding data that has not been ACKed - * (4) The packet was not carrying any data. - * (5) [From Floyd's paper on fast retransmit wars] - * The packet acked data after high_seq; - * I've tried to order these in occurrence of most likely to fail - * to least likely to fail. - * [These are an extension of the rules BSD stacks use to - * determine if an ACK is a duplicate.] - */ - - if (sk->rcv_ack_seq == ack - && sk->window_seq == window_seq - && len != th->doff*4 - && before(ack, sk->sent_seq) - && after(ack, sk->high_seq)) - { - /* Prevent counting of duplicate ACKs if the congestion - * window is smaller than 3. Note that since we reduce - * the congestion window when we do a fast retransmit, - * we must be careful to keep counting if we were already - * counting. The idea behind this is to avoid doing - * fast retransmits if the congestion window is so small - * that we cannot get 3 ACKs due to the loss of a packet - * unless we are getting ACKs for retransmitted packets. - */ - if (sk->cong_window >= 3 || sk->rcv_ack_cnt > MAX_DUP_ACKS+1) - sk->rcv_ack_cnt++; - /* See draft-stevens-tcpca-spec-01 for explanation - * of what we are doing here. - */ - if (sk->rcv_ack_cnt == MAX_DUP_ACKS+1) { - int tmp; + tcp_clear_xmit_timer(sk, TIME_PROBE0); - /* We need to be a bit careful to preserve the - * count of packets that are out in the system here. - */ - sk->ssthresh = max(sk->cong_window >> 1, 2); - sk->cong_window = sk->ssthresh+MAX_DUP_ACKS+1; - tmp = sk->packets_out; - tcp_do_retransmit(sk,0); - sk->packets_out = tmp; - } else if (sk->rcv_ack_cnt > MAX_DUP_ACKS+1) { - sk->cong_window++; - /* - * At this point we are suppose to transmit a NEW - * packet (not retransmit the missing packet, - * this would only get us into a retransmit war.) - * I think that having just adjusted cong_window - * we will transmit the new packet below. - */ - } - } - else - { - if (sk->rcv_ack_cnt > MAX_DUP_ACKS) { - sk->cong_window = sk->ssthresh; } - sk->window_seq = window_seq; - sk->rcv_ack_seq = ack; - sk->rcv_ack_cnt = 1; - } - - /* - * We passed data and got it acked, remove any soft error - * log. Something worked... - */ - - sk->err_soft = 0; - - /* - * If this ack opens up a zero window, clear backoff. It was - * being used to time the probes, and is probably far higher than - * it needs to be for normal retransmission. - */ - - if (sk->ip_xmit_timeout == TIME_PROBE0) - { - sk->retransmits = 0; /* Our probe was answered */ - - /* - * Was it a usable window open ? - */ - - if (!skb_queue_empty(&sk->write_queue) && /* should always be true */ - ! before (sk->window_seq, sk->write_queue.next->end_seq)) + else { - sk->backoff = 0; - - /* - * Recompute rto from rtt. this eliminates any backoff. - */ - - /* - * Appendix C of Van Jacobson's final version of - * the SIGCOMM 88 paper states that although - * the original paper suggested that - * RTO = R*2V - * was the correct calculation experience showed - * better results using - * RTO = R*4V - * In particular this gives better performance over - * slow links, and should not effect fast links. - * - * Note: Jacobson's algorithm is fine on BSD which - * has a 1/2 second granularity clock, but with our - * 1/100 second granularity clock we become too - * sensitive to minor changes in the round trip time. - * We add in two compensating factors. - * First we multiply by 5/4. For large congestion - * windows this allows us to tolerate burst traffic - * delaying up to 1/4 of our packets. - * We also add in a rtt / cong_window term. - * For small congestion windows this allows - * a single packet delay, but has negligible effect - * on the compensation for large windows. - */ - sk->rto = (sk->rtt >> 3) + sk->mdev; - sk->rto += (sk->rto>>2) + (sk->rto >> (sk->cong_window-1)); - if (sk->rto > 120*HZ) - sk->rto = 120*HZ; - if (sk->rto < HZ/5) /* Was 1*HZ, then 1 - turns out we must allow about - .2 of a second because of BSD delayed acks - on a 100Mb/sec link - .2 of a second is going to need huge windows (SIGH) */ - sk->rto = HZ/5; + tcp_reset_xmit_timer(sk, TIME_PROBE0, + min(tp->rto << tp->backoff, + 120*HZ)); } } /* * See if we can take anything off of the retransmit queue. */ + + start_bh_atomic(); - for (;;) { - struct sk_buff * skb = sk->send_head; - if (!skb) - break; - + while(((skb=skb_peek(&sk->write_queue)) != NULL) && + (skb != tp->send_head)) + { /* Check for a bug. */ - if (skb->link3 && after(skb->end_seq, skb->link3->end_seq)) - printk("INET: tcp.c: *** bug send_list out of order.\n"); - + + if (skb->next != (struct sk_buff*) &sk->write_queue && + after(skb->end_seq, skb->next->seq)) + printk("INET: tcp_input.c: *** " + "bug send_list out of order.\n"); + /* * If our packet is before the ack sequence we can - * discard it as it's confirmed to have arrived the other end. + * discard it as it's confirmed to have arrived the + * other end. */ - if (after(skb->end_seq, ack)) - break; - - if (sk->retransmits) + if (!after(skb->end_seq, ack)) { - /* - * We were retransmitting. don't count this in RTT est - */ - flag |= 2; - } + if (sk->debug) + { + printk(KERN_DEBUG "removing seg %x-%x from " + "retransmit queue\n", + skb->seq, skb->end_seq); + } + + tp->retrans_head = NULL; + + flag |= FLAG_DATA_ACKED; + seq = skb->seq; + seq_rtt = jiffies - skb->when; + + skb_unlink(skb); + atomic_dec(&sk->packets_out); + skb->free = 1; - if ((sk->send_head = skb->link3) == NULL) + kfree_skb(skb, FREE_WRITE); + + if (!sk->dead) + sk->write_space(sk); + } + else { - sk->send_tail = NULL; - sk->send_next = NULL; - sk->retransmits = 0; + break; } + } - /* - * advance the send_next pointer if needed. - */ - if (sk->send_next == skb) - sk->send_next = sk->send_head; + end_bh_atomic(); + + /* + * if we where retransmiting don't count rtt estimate + */ + if (sk->retransmits) + { + if (sk->packets_out == 0) + sk->retransmits = 0; + } + else + { /* * Note that we only reset backoff and rto in the * rtt recomputation code. And that doesn't happen @@ -946,274 +699,89 @@ * suddenly. I.e. Karn's algorithm. (SIGCOMM '87, p5.) */ - /* - * We have one less packet out there. - */ - - if (sk->packets_out > 0) - sk->packets_out --; - - /* This is really only supposed to be called when we - * are actually ACKing new data, which should exclude - * the ACK handshake on an initial SYN packet as well. - * Rather than introducing a new test here for this - * special case, we just reset the initial values for - * rtt immediately after we move to the established state. - */ - if (!(flag&2)) /* Not retransmitting */ - tcp_rtt_estimator(sk,skb); - IS_SKB(skb); - - /* - * We may need to remove this from the dev send list. - */ - cli(); - if (skb->next) - skb_unlink(skb); - sti(); - kfree_skb(skb, FREE_WRITE); /* write. */ - if (!sk->dead) - sk->write_space(sk); - } - - /* - * Maybe we can take some stuff off of the write queue, - * and put it onto the xmit queue. - * There is bizarre case being tested here, to check if - * the data at the head of the queue ends before the start of - * the sequence we already ACKed. This is not an error, - * it can occur when we send a packet directly off of the write_queue - * in a zero window probe. - */ - - if (!skb_queue_empty(&sk->write_queue) && - !before(sk->window_seq, sk->write_queue.next->end_seq) && - (sk->retransmits == 0 || - sk->ip_xmit_timeout != TIME_WRITE || - !after(sk->write_queue.next->end_seq, sk->rcv_ack_seq)) && - sk->packets_out < sk->cong_window) - { - /* - * Add more data to the send queue. - */ - tcp_write_xmit(sk); - } - - /* - * Reset timers to reflect the new state. - * - * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets - * from TCP_CLOSE we don't do anything - * - * from anything else, if there is queued data (or fin) pending, - * we use a TIME_WRITE timeout, if there is data to write but - * no room in the window we use TIME_PROBE0, else if keepalive - * we reset to a KEEPALIVE timeout, else we delete the timer. - * - * We do not set flag for nominal write data, otherwise we may - * force a state where we start to write itsy bitsy tidbits - * of data. - */ - - switch(sk->state) { - case TCP_TIME_WAIT: - /* - * keep us in TIME_WAIT until we stop getting packets, - * reset the timeout. - */ - tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - break; - case TCP_CLOSE: - /* - * don't touch the timer. - */ - break; - default: - /* - * Must check send_head and write_queue - * to determine which timeout to use. - */ - if (sk->send_head) { - tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); - } else if (!skb_queue_empty(&sk->write_queue) - && sk->ack_backlog == 0) + if (flag & FLAG_DATA_ACKED) { - /* - * if the write queue is not empty when we get here - * then we failed to move any data to the retransmit - * queue above. (If we had send_head would be non-NULL). - * Furthermore, since the send_head is NULL here - * we must not be in retransmit mode at this point. - * This implies we have no packets in flight, - * hence sk->packets_out < sk->cong_window. - * Examining the conditions for the test to move - * data to the retransmission queue we find that - * we must therefore have a zero window. - * Hence, if the ack_backlog is 0 we should initiate - * a zero probe. - * We don't do a zero probe if we have a delayed - * ACK in hand since the other side may have a - * window opening, but they are waiting to hear - * from us before they tell us about it. - * (They are applying Nagle's rule). - * So, we don't set up the zero window probe - * just yet. We do have to clear the timer - * though in this case... - */ - tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto); - } else if (sk->keepopen) { - tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); - } else { - del_timer(&sk->retransmit_timer); - sk->ip_xmit_timeout = 0; + tcp_rtt_estimator(tp, seq_rtt); + if (sysctl_tcp_vegas_cong_avoidance) + { + tcp_cong_avoid_vegas(sk, seq, ack, seq_rtt); + } + else + { + tcp_cong_avoid_vanj(sk, seq, ack, seq_rtt); + } } - break; - } - - /* - * We have nothing queued but space to send. Send any partial - * packets immediately (end of Nagle rule application). - */ - - if (sk->packets_out == 0 - && sk->partial != NULL - && skb_queue_empty(&sk->write_queue) - && sk->send_head == NULL) - { - tcp_send_partial(sk); } - /* - * In the LAST_ACK case, the other end FIN'd us. We then FIN'd them, and - * we are now waiting for an acknowledge to our FIN. The other end is - * already in TIME_WAIT. - * - * Move to TCP_CLOSE on success. - */ + - if (sk->state == TCP_LAST_ACK) + /* Sanity check out packets_out counter */ + if (skb_queue_len(&sk->write_queue) == 0 || + ack == tp->snd_nxt ) { - if (!sk->dead) - sk->state_change(sk); - if(sk->debug) - printk("rcv_ack_seq: %X==%X, acked_seq: %X==%X\n", - sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq); - if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) + if (sk->packets_out) { - sk->shutdown = SHUTDOWN_MASK; - tcp_set_state(sk,TCP_CLOSE); - return 1; - } + printk(KERN_DEBUG "tcp_ack: packets_out %d\n", + sk->packets_out); + sk->packets_out = 0; + } } - /* - * Incoming ACK to a FIN we sent in the case of our initiating the close. - * - * Move to FIN_WAIT2 to await a FIN from the other end. Set - * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in. - */ - if (sk->state == TCP_FIN_WAIT1) + if (sk->packets_out) { - - if (!sk->dead) - sk->state_change(sk); - if (sk->rcv_ack_seq == sk->write_seq) + if (flag & FLAG_DATA_ACKED) { - sk->shutdown |= SEND_SHUTDOWN; - tcp_set_state(sk, TCP_FIN_WAIT2); - /* If the socket is dead, then there is no - * user process hanging around using it. - * We want to set up a FIN_WAIT2 timeout ala BSD. - */ - if (sk->dead) - tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT); + long when; + + skb = skb_peek(&sk->write_queue); + + when = tp->rto - (jiffies - skb->when); + + if (when <= 0) + { + tp->retrans_head = NULL; + /* + * This is tricky. We are retransmiting a + * segment of a window when congestion occured. + */ + tcp_do_retransmit(sk, 0); + tcp_reset_xmit_timer(sk, TIME_RETRANS, + tp->rto); + } + else + tcp_reset_xmit_timer(sk, TIME_RETRANS, when); } } + else + tcp_clear_xmit_timer(sk, TIME_RETRANS); + /* - * Incoming ACK to a FIN we sent in the case of a simultaneous close. - * - * Move to TIME_WAIT + * Remember the highest ack received. */ + + tp->snd_una = ack; + + tcp_fast_retrans(sk, ack, (flag & (FLAG_DATA|FLAG_WIN_UPDATE))); - if (sk->state == TCP_CLOSING) - { - if (!sk->dead) - sk->state_change(sk); - if (sk->rcv_ack_seq == sk->write_seq) - { - tcp_time_wait(sk); - } - } - - /* - * Final ack of a three way shake - */ - - if (sk->state==TCP_SYN_RECV) - { - tcp_set_state(sk, TCP_ESTABLISHED); - tcp_options(sk,th); - sk->dummy_th.dest=th->source; - sk->copied_seq = sk->acked_seq; - if(!sk->dead) - sk->state_change(sk); - if(sk->max_window==0) - { - sk->max_window=32; /* Sanity check */ - sk->mss=min(sk->max_window,sk->mtu); - } - /* Reset the RTT estimator to the initial - * state rather than testing to avoid - * updating it on the ACK to the SYN packet. - */ - sk->rtt = 0; - sk->rto = TCP_TIMEOUT_INIT; - sk->mdev = TCP_TIMEOUT_INIT; - } - /* - * The following code has been greatly simplified from the - * old hacked up stuff. The wonders of properly setting the - * retransmission timeouts. - * - * If we are retransmitting, and we acked a packet on the retransmit - * queue, and there is still something in the retransmit queue, - * then we can output some retransmission packets. + * Maybe we can take some stuff off of the write queue, + * and put it onto the xmit queue. */ - if (sk->send_head != NULL && (flag&2) && sk->retransmits) - { - tcp_do_retransmit(sk, 1); - } return 1; uninteresting_ack: + + tcp_fast_retrans(sk, ack, 0); + if(sk->debug) - printk("Ack ignored %u %u\n",ack,sk->sent_seq); + printk("Ack ignored %u %u\n",ack,tp->snd_nxt); - /* - * Keepalive processing. - */ - - if (after(ack, sk->sent_seq)) - { - return 0; - } - - /* - * Restart the keepalive timer. - */ - - if (sk->keepopen) - { - if(sk->ip_xmit_timeout==TIME_KEEPOPEN) - tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); - } - return 1; + return 0; } @@ -1237,6 +805,8 @@ { sk->fin_seq = skb->end_seq; + tcp_send_ack(sk); + if (!sk->dead) { sk->state_change(sk); @@ -1249,10 +819,11 @@ case TCP_SYN_SENT: case TCP_ESTABLISHED: /* - * move to CLOSE_WAIT, tcp_data() already handled - * sending the ack. + * move to CLOSE_WAIT */ - tcp_set_state(sk,TCP_CLOSE_WAIT); + + tcp_set_state(sk, TCP_CLOSE_WAIT); + if (th->rst) sk->shutdown = SHUTDOWN_MASK; break; @@ -1280,27 +851,11 @@ * This causes a WRITE timeout, which will either * move on to TIME_WAIT when we timeout, or resend * the FIN properly (maybe we get rid of that annoying - * FIN lost hang). The TIME_WRITE code is already correct - * for handling this timeout. + * FIN lost hang). The TIME_WRITE code is already + * correct for handling this timeout. */ - if (sk->ip_xmit_timeout != TIME_WRITE) { - if (sk->send_head) - tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); - else if (sk->ip_xmit_timeout != TIME_PROBE0 - || skb_queue_empty(&sk->write_queue)) { - /* BUG check case. - * We have a problem here if there - * is no timer running [leads to - * frozen socket] or no data in the - * write queue [means we sent a fin - * and lost it from the queue before - * changing the ack properly]. - */ - printk(KERN_ERR "Lost timer or fin packet in tcp_fin.\n"); - } - } - tcp_set_state(sk,TCP_CLOSING); + tcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: /* @@ -1326,156 +881,176 @@ return(0); } -/* - * Add a sk_buff to the TCP receive queue, calculating - * the ACK sequence as we go.. - */ -static inline void tcp_insert_skb(struct sk_buff * skb, struct sk_buff_head * list) -{ - struct sk_buff * prev, * next; - u32 seq; + /* - * Find where the new skb goes.. (This goes backwards, - * on the assumption that we get the packets in order) + * This one checks to see if we can put data from the + * out_of_order queue into the receive_queue */ - seq = skb->seq; - prev = list->prev; - next = (struct sk_buff *) list; - for (;;) { - if (prev == (struct sk_buff *) list || !after(prev->seq, seq)) + +static __inline__ void tcp_ofo_queue(struct sock *sk) +{ + struct sk_buff * skb; + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); + + while ((skb = skb_peek(&sk->out_of_order_queue))) { + + if (after(skb->seq, tp->rcv_nxt)) break; - next = prev; - prev = prev->prev; + + if (!after(skb->end_seq, tp->rcv_nxt)) { + + if (sk->debug) + printk("ofo packet was allready received \n"); + + skb_unlink(skb); + kfree_skb(skb, FREE_READ); + + continue; + } + + if (sk->debug) + printk("ofo requeuing : rcv_next %X seq %X - %X\n", + tp->rcv_nxt, skb->seq, skb->end_seq); + + skb_unlink(skb); + + + skb_queue_tail(&sk->receive_queue, skb); + + + tp->rcv_nxt = skb->end_seq; } - __skb_insert(skb, prev, next, list); } -/* - * Called for each packet when we find a new ACK endpoint sequence in it - */ -static inline u32 tcp_queue_ack(struct sk_buff * skb, struct sock * sk) +static __inline__ void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { + struct sk_buff * skb1; + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); + /* - * When we ack the fin, we do the FIN - * processing. + * Queue data for delivery to the user + * Packets in sequence go to the receive queue + * Out of sequence packets to out_of_order_queue */ - skb->acked = 1; - if (skb->h.th->fin) - tcp_fin(skb,sk,skb->h.th); - return skb->end_seq; -} -static void tcp_queue(struct sk_buff * skb, struct sock * sk, struct tcphdr *th) -{ - u32 ack_seq; - tcp_insert_skb(skb, &sk->receive_queue); + if (skb->seq == tp->rcv_nxt) { + + /* + * Ok. In sequence. + */ + + + skb_queue_tail(&sk->receive_queue, skb); + + + tp->rcv_nxt = skb->end_seq; + + tcp_ofo_queue(sk); + + if (skb_queue_len(&sk->out_of_order_queue) == 0) + tp->pred_flags = htonl((0x5010 << 16) | tp->snd_wnd); + return; + } + /* - * Did we get anything new to ack? + * Not in sequence + * either a retransmit or some packet got lost */ - ack_seq = sk->acked_seq; + if (!after(skb->end_seq, tp->rcv_nxt)) { + + /* + * A retransmit. + * 2nd most common case. + * force an imediate ack + */ - if (!after(skb->seq, ack_seq)) { - if (after(skb->end_seq, ack_seq)) { - /* the packet straddles our window end */ - struct sk_buff_head * list = &sk->receive_queue; - struct sk_buff * next; - ack_seq = tcp_queue_ack(skb, sk); + if (sk->debug) + printk("retransmit received: seq %X\n", skb->seq); - /* - * Do we have any old packets to ack that the above - * made visible? (Go forward from skb) - */ - next = skb->next; - while (next != (struct sk_buff *) list) { - if (after(next->seq, ack_seq)) - break; - if (after(next->end_seq, ack_seq)) - ack_seq = tcp_queue_ack(next, sk); - next = next->next; - } + sk->delayed_acks = MAX_DELAY_ACK; + kfree_skb(skb, FREE_READ); - /* - * Ok, we found new data, update acked_seq as - * necessary (and possibly send the actual - * ACK packet). - */ - sk->acked_seq = ack_seq; + return; + } - } else { - if (sk->debug) - printk("Ack duplicate packet.\n"); - tcp_send_ack(sk); - return; - } + if (before(skb->seq, tp->rcv_nxt)) { /* - * Delay the ack if possible. Send ack's to - * fin frames immediately as there shouldn't be - * anything more to come. + * Partial packet + * seq < rcv_next < end_seq */ - if (!sk->delay_acks || th->fin) { - tcp_send_ack(sk); - } else { - /* - * If psh is set we assume it's an - * interactive session that wants quick - * acks to avoid nagling too much. - */ - int delay = HZ/2; - if (th->psh) - delay = HZ/50; - tcp_send_delayed_ack(sk, delay, sk->ato); - } - /* - * Tell the user we have some more data. - */ + if (sk->debug) + printk("partial packet: rcv_next %X seq %X - %X\n", + tp->rcv_nxt, skb->seq, skb->end_seq); + + skb_queue_tail(&sk->receive_queue, skb); - if (!sk->dead) - sk->data_ready(sk,0); - } - else - { - /* - * If we've missed a packet, send an ack. - * Also start a timer to send another. - * - * 4.3reno machines look for these kind of acks so - * they can do fast recovery. Three identical 'old' - * acks lets it know that one frame has been lost - * and should be resent. Because this is before the - * whole window of data has timed out it can take - * one lost frame per window without stalling. - * [See Jacobson RFC1323, Stevens TCP/IP illus vol2] - * - * We also should be spotting triple bad sequences. - * [We now do this.] - * - */ - - if (!skb->acked) - { - if(sk->debug) - printk("Ack past end of seq packet.\n"); - tcp_send_ack(sk); - /* - * We need to be very careful here. We must - * not violate Jacobsons packet conservation condition. - * This means we should only send an ACK when a packet - * leaves the network. We can say a packet left the - * network when we see a packet leave the network, or - * when an rto measure expires. - */ - tcp_send_delayed_ack(sk,sk->rto,sk->rto); - } - } -} + tp->rcv_nxt = skb->end_seq; + + tcp_ofo_queue(sk); + + if (skb_queue_len(&sk->out_of_order_queue) == 0) + tp->pred_flags = htonl((0x5010 << 16) | tp->snd_wnd); + + return; + } + + /* + * Ok. This is an out_of_order segment + */ + + /* Force an ack */ + + sk->delayed_acks = MAX_DELAY_ACK; + + /* + * disable header predition + */ + + tp->pred_flags = 0; + + if (sk->debug) + printk("out of order segment: rcv_next %X seq %X - %X\n", + tp->rcv_nxt, skb->seq, skb->end_seq); + + if (skb_peek(&sk->out_of_order_queue) == NULL) { + skb_queue_head(&sk->out_of_order_queue,skb); + } + else + for(skb1=sk->out_of_order_queue.prev; ; skb1 = skb1->prev) { + + /* allready there */ + if (skb->seq==skb1->seq && skb->len>=skb1->len) + { + skb_append(skb1,skb); + skb_unlink(skb1); + kfree_skb(skb1,FREE_READ); + break; + } + + if (after(skb->seq, skb1->seq)) + { + skb_append(skb1,skb); + break; + } + + /* + * See if we've hit the start. If so insert. + */ + if (skb1 == skb_peek(&sk->out_of_order_queue)) { + skb_queue_head(&sk->out_of_order_queue,skb); + break; + } + } + +} /* @@ -1484,117 +1059,124 @@ * room, then we will just have to discard the packet. */ -static int tcp_data(struct sk_buff *skb, struct sock *sk, - unsigned long saddr, unsigned int len) +static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len) { struct tcphdr *th; - u32 new_seq, shut_seq; + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); th = skb->h.th; skb_pull(skb,th->doff*4); skb_trim(skb,len-(th->doff*4)); + if (skb->len == 0 && !th->fin) + { + return(0); + } + /* - * The bytes in the receive read/assembly queue has increased. Needed for the - * low memory discard algorithm + * FIXME: don't accept data after the receved fin + */ + + /* + * The bytes in the receive read/assembly queue has increased. + * Needed for the low memory discard algorithm */ sk->bytes_rcv += skb->len; - - if (skb->len == 0 && !th->fin) + + /* + * We no longer have anyone receiving data on this connection. + */ + + tcp_data_queue(sk, skb); + + if (before(tp->rcv_nxt, sk->copied_seq)) { - /* - * Don't want to keep passing ack's back and forth. - * (someone sent us dataless, boring frame) - */ - if (!th->ack) - tcp_send_ack(sk); - kfree_skb(skb, FREE_READ); - return(0); + printk("*** tcp.c:tcp_data bug acked < copied\n"); + tp->rcv_nxt = sk->copied_seq; } + sk->delayed_acks++; + /* - * We no longer have anyone receiving data on this connection. + * Now tell the user we may have some data. */ + + if (!sk->dead) + { + if(sk->debug) + printk("Data wakeup.\n"); + sk->data_ready(sk,0); + } + return(1); +} -#ifndef TCP_DONT_RST_SHUTDOWN +static void tcp_data_snd_check(struct sock *sk) +{ + struct sk_buff *skb; + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); - if(sk->shutdown & RCV_SHUTDOWN) + if ((skb = tp->send_head)) { - /* - * FIXME: BSD has some magic to avoid sending resets to - * broken 4.2 BSD keepalives. Much to my surprise a few non - * BSD stacks still have broken keepalives so we want to - * cope with it. - */ - - if(skb->len) /* We don't care if it's just an ack or - a keepalive/window probe */ + if (!after(skb->end_seq, tp->snd_una + tp->snd_wnd) && + sk->packets_out < sk->cong_window ) { - new_seq = skb->seq + skb->len + th->syn; /* Right edge of _data_ part of frame */ - - /* Do this the way 4.4BSD treats it. Not what I'd - regard as the meaning of the spec but it's what BSD - does and clearly they know everything 8) */ - /* - * This is valid because of two things - * - * a) The way tcp_data behaves at the bottom. - * b) A fin takes effect when read not when received. + * Add more data to the send queue. */ - - shut_seq = sk->acked_seq+1; /* Last byte */ - - if(after(new_seq,shut_seq)) - { - if(sk->debug) - printk("Data arrived on %p after close [Data right edge %X, Socket shut on %X] %d\n", - sk, new_seq, shut_seq, sk->blog); - if(sk->dead) - { - sk->acked_seq = new_seq + th->fin; - tcp_send_reset(sk->saddr, sk->daddr, skb->h.th, - sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl); - tcp_statistics.TcpEstabResets++; - sk->err = EPIPE; - sk->error_report(sk); - sk->shutdown = SHUTDOWN_MASK; - tcp_set_state(sk,TCP_CLOSE); - kfree_skb(skb, FREE_READ); - return 0; - } - } + + tcp_write_xmit(sk); + wake_up_interruptible(sk->sleep); } + else if (sk->packets_out == 0 && !tp->pending) + { + /* + * Data to queue but no room. + */ + tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto); + } } +} -#endif - +static __inline__ void tcp_ack_snd_check(struct sock *sk) +{ /* - * We should only call this if there is data in the frame. - */ - tcp_delack_estimator(sk); + * This also takes care of updating the window. + * This if statement needs to be simplified. + * + * rules for delaying an ack: + * - delay time <= 0.5 HZ + * - we don't have a window update to send + * - must send at least every 2 full sized packets + */ - tcp_queue(skb, sk, th); + if (sk->delayed_acks == 0) + return; - return(0); + if (sk->delayed_acks >= MAX_DELAY_ACK || tcp_raise_window(sk)) + { + tcp_send_ack(sk); + } + else + { + tcp_send_delayed_ack(sk, HZ/2); + } } - /* * This routine is only called when we have urgent data * signalled. Its the 'slow' part of tcp_urg. It could be * moved inline now as tcp_urg is only called from one * place. We handle URGent data wrong. We have to - as * BSD still doesn't use the correction from RFC961. - * * For 1003.1g we should support a new option TCP_STDURG to permit * either form. */ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); u32 ptr = ntohs(th->urg_ptr); if (ptr) @@ -1628,6 +1210,9 @@ sk->copied_seq++; /* Move the copied sequence on correctly */ sk->urg_data = URG_NOTYET; sk->urg_seq = ptr; + + /* disable header prediction */ + tp->pred_flags = 0; } /* @@ -1662,429 +1247,430 @@ } } -/* - * This should be a bit smarter and remove partially - * overlapping stuff too, but this should be good - * enough for any even remotely normal case (and the - * worst that can happen is that we have a few - * unnecessary packets in the receive queue). - * - * This function is never called with an empty list.. - */ -static inline void tcp_remove_dups(struct sk_buff_head * list) -{ - struct sk_buff * next = list->next; - - for (;;) { - struct sk_buff * skb = next; - next = next->next; - if (next == (struct sk_buff *) list) - break; - if (before(next->end_seq, skb->end_seq)) { - __skb_unlink(next, list); - kfree_skb(next, FREE_READ); - next = skb; - continue; - } - if (next->seq != skb->seq) - continue; - __skb_unlink(skb, list); - kfree_skb(skb, FREE_READ); - } -} -/* - * Throw out all unnecessary packets: we've gone over the - * receive queue limit. This shouldn't happen in a normal - * TCP connection, but we might have gotten duplicates etc. - */ -static void prune_queue(struct sk_buff_head * list) +static __inline__ void prune_queue(struct sock *sk) { - for (;;) { - struct sk_buff * skb = list->prev; + struct sk_buff * skb; - /* gone through it all? */ - if (skb == (struct sk_buff *) list) - break; - if (!skb->acked) { - __skb_unlink(skb, list); - kfree_skb(skb, FREE_READ); - continue; - } - tcp_remove_dups(list); - break; + /* + * clean the out_of_order queue + */ + + while ((skb = skb_dequeue(&sk->out_of_order_queue))) + { + kfree_skb(skb, FREE_READ); } } -#ifdef CONFIG_IP_TRANSPARENT_PROXY -/* - * Check whether a received TCP packet might be for one of our - * connections. - */ -int tcp_chkaddr(struct sk_buff *skb) +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, __u16 len) { - struct iphdr *iph = skb->h.iph; - struct tcphdr *th = (struct tcphdr *)(skb->h.raw + iph->ihl*4); - struct sock *sk; - - sk = get_sock(&tcp_prot, th->dest, iph->saddr, th->source, iph->daddr, 0, 0); - - if (!sk) return 0; - /* 0 means accept all LOCAL addresses here, not all the world... */ - if (sk->rcv_saddr == 0) return 0; - return 1; -} -#endif - -/* - * A TCP packet has arrived. - * skb->h.raw is the TCP header. - */ - -int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, - __u32 daddr, unsigned short len, - __u32 saddr, int redo, struct inet_protocol * protocol) -{ - struct tcphdr *th; - struct sock *sk; - __u32 seq; -#ifdef CONFIG_IP_TRANSPARENT_PROXY - int r; -#endif + struct tcp_opt *tp; + int queued = 0; + u32 flg; + + /* + * Header prediction. + * The code follows the one in the famous + * "30 instruction TCP receive" Van Jacobson mail. + * + * Van's trick is to deposit buffers into socket queue + * on a device interrupt, to call tcp_recv function + * on the receive process context and checksum and copy + * the buffer to user space. smart... + * + * Our current scheme is not silly either but we take the + * extra cost of the net_bh soft interrupt processing... + * We do checksum and copy also but from device to kernel. + */ + tp = &(sk->tp_pinfo.af_tcp); + flg = *(((u32 *)th) + 3); + /* - * "redo" is 1 if we have already seen this skb but couldn't - * use it at that time (the socket was locked). In that case - * we have already done a lot of the work (looked up the socket - * etc). + * pred_flags is 0x5?10 << 16 + snd_wnd + * if header_predition is to be made + * ? will be 0 else it will be !0 + * (when there are holes in the receive + * space for instance) */ - th = skb->h.th; - sk = skb->sk; - if (!redo) { - tcp_statistics.TcpInSegs++; - if (skb->pkt_type!=PACKET_HOST) - goto discard_it; - /* - * Pull up the IP header. - */ - - skb_pull(skb, skb->h.raw-skb->data); + if (flg == tp->pred_flags && skb->seq == tp->rcv_nxt) + { + if (len <= sizeof(struct tcphdr)) + { + if (len == sizeof(struct tcphdr)) + { + tcp_ack(sk, th, skb->seq, skb->ack_seq, len); + } - /* - * Try to use the device checksum if provided. - */ - switch (skb->ip_summed) + tcp_data_snd_check(sk); + + kfree_skb(skb, FREE_READ); + return; + + } + else if (skb->ack_seq == tp->snd_una) { - case CHECKSUM_NONE: - skb->csum = csum_partial((char *)th, len, 0); - case CHECKSUM_HW: - if (tcp_check(th, len, saddr, daddr, skb->csum)) - goto discard_it; - default: - /* CHECKSUM_UNNECESSARY */ - } - sk = get_tcp_sock(saddr, th->source, daddr, th->dest, dev->pa_addr, skb->redirport); - if (!sk) - goto no_tcp_socket; - skb->sk = sk; - skb->seq = ntohl(th->seq); - skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4; - skb->ack_seq = ntohl(th->ack_seq); - - skb->acked = 0; - skb->used = 0; - skb->free = 1; - skb->saddr = daddr; - skb->daddr = saddr; + /* + * Bulk data transfer: receiver + */ + + skb_pull(skb,sizeof(struct tcphdr)); + + skb_queue_tail(&sk->receive_queue, skb); + tp->rcv_nxt = skb->end_seq; + sk->bytes_rcv += len - sizeof(struct tcphdr); + + sk->data_ready(sk, 0); + tcp_delack_estimator(tp); - /* - * We may need to add it to the backlog here. - */ - if (sk->users) + if (sk->delayed_acks++) + { + tcp_send_delayed_ack(sk, HZ/2); + } + else + tcp_send_ack(sk); + + return; + } + } + + if (!tcp_sequence(tp, skb->seq, skb->end_seq)) + { + if (!th->rst) { - __skb_queue_tail(&sk->back_log, skb); - return(0); + if (after(skb->seq, tp->rcv_nxt)) + { + printk(KERN_DEBUG "->seq:%d end:%d " + "wup:%d wnd:%d\n", + skb->seq, skb->end_seq, + tp->rcv_wup, tp->rcv_wnd); + } + tcp_send_ack(sk); + kfree_skb(skb, FREE_READ); + return; } } + if(th->syn && skb->seq != sk->syn_seq) + { + printk(KERN_DEBUG "syn in established state\n"); + tcp_reset(sk, skb); + kfree_skb(skb, FREE_READ); + return; + } + + if(th->rst) + { + tcp_reset(sk,skb); + kfree_skb(skb, FREE_READ); + return; + } + + if(th->ack) + { + tcp_ack(sk, th, skb->seq, skb->ack_seq, len); + } + + /* - * If this socket has got a reset it's to all intents and purposes - * really dead. Count closed sockets as dead. - * - * Note: BSD appears to have a bug here. A 'closed' TCP in BSD - * simply drops data. This seems incorrect as a 'closed' TCP doesn't - * exist so should cause resets as if the port was unreachable. + * Process urgent data */ - if (sk->zapped || sk->state==TCP_CLOSE) - goto no_tcp_socket; + tcp_urg(sk, th, len); - if (!sk->prot) + /* + * step 7: process the segment text + */ + + + queued = tcp_data(skb, sk, len); + + /* + * step 8: check the FIN bit + */ + + if (th->fin) { - printk(KERN_CRIT "IMPOSSIBLE 3\n"); - return(0); + tcp_fin(skb, sk, th); } + tcp_data_snd_check(sk); + tcp_ack_snd_check(sk); /* - * Charge the memory to the socket. + * If our receive queue has grown past its limits, + * try to prune away duplicates etc.. */ - - skb->sk=sk; - atomic_add(skb->truesize, &sk->rmem_alloc); + if (sk->rmem_alloc > sk->rcvbuf) + prune_queue(sk); /* - * Mark the time of the last received packet. - */ - sk->idletime = jiffies; + * And done + */ + if (queued) + return; + + kfree_skb(skb, FREE_READ); +} + + +/* + * This function implements the receiving procedure of RFC 793. + * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be + * address independent. + */ + +int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, void *opt, __u16 len) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int queued = 0; + int rcv_mss; + /* - * We should now do header prediction. - */ - - /* - * This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We - * don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug - * compatibility. We also set up variables more thoroughly [Karn notes in the - * KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths]. + * state == CLOSED + * tested in tcp_v{4,6}_rcv */ - if(sk->state!=TCP_ESTABLISHED) /* Skip this lot for normal flow */ - { - - /* - * Now deal with unusual cases. + switch (sk->state) { + + + case TCP_LISTEN: + + if (th->rst) + goto discard; + + /* + * These use the socket TOS.. + * might want to be the received TOS */ - - if(sk->state==TCP_LISTEN) - { - if(th->ack) /* These use the socket TOS.. might want to be the received TOS */ - tcp_send_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl); + if(th->ack) + { /* - * We don't care for RST, and non SYN are absorbed (old segments) - * Broadcast/multicast SYN isn't allowed. Note - bug if you change the - * netmask on a running connection it can go broadcast. Even Sun's have - * this problem so I'm ignoring it + * send reset */ - -#ifdef CONFIG_IP_TRANSPARENT_PROXY - /* - * We may get non-local addresses and still want to - * handle them locally, due to transparent proxying. - * Thus, narrow down the test to what is really meant. - */ - if(th->rst || !th->syn || th->ack || (r = ip_chk_addr(daddr)) == IS_BROADCAST || r == IS_MULTICAST) -#else - if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR) -#endif - { - kfree_skb(skb, FREE_READ); - return 0; - } + + return 1; + } - /* - * Guess we need to make a new socket up - */ - seq = secure_tcp_sequence_number(saddr, daddr, - skb->h.th->dest, - skb->h.th->source); - tcp_conn_request(sk, skb, daddr, saddr, opt, dev, seq); - /* - * Now we have several options: In theory there is nothing else - * in the frame. KA9Q has an option to send data with the syn, - * BSD accepts data with the syn up to the [to be] advertised window - * and Solaris 2.1 gives you a protocol error. For now we just ignore - * it, that fits the spec precisely and avoids incompatibilities. It - * would be nice in future to drop through and process the data. + if(th->syn) + { + int err; + __u32 isn; + + isn = tp->af_specific->init_sequence(sk, skb); + err = tp->af_specific->conn_request(sk, skb, opt, isn); + + if (err < 0) + return 1; + + /* + * Now we have several options: In theory there is + * nothing else in the frame. KA9Q has an option to + * send data with the syn, BSD accepts data with the + * syn up to the [to be] advertised window and + * Solaris 2.1 gives you a protocol error. For now + * we just ignore it, that fits the spec precisely + * and avoids incompatibilities. It would be nice in + * future to drop through and process the data. * - * Now TTCP is starting to use we ought to queue this data. + * Now that TTCP is starting to be used we ought to + * queue this data. */ - - return 0; - } - - /* - * Retransmitted SYN for our socket. This is uninteresting. If sk->state==TCP_LISTEN - * then it's a new connection - */ - - if (sk->state == TCP_SYN_RECV && th->syn && skb->seq+1 == sk->acked_seq) - { - kfree_skb(skb, FREE_READ); + return 0; } + goto discard; + break; + + case TCP_SYN_SENT: + /* - * SYN sent means we have to look for a suitable ack and either reset - * for bad matches or go to connected. The SYN_SENT case is unusual and should + * SYN sent means we have to look for a suitable ack and + * either reset for bad matches or go to connected. + * The SYN_SENT case is unusual and should * not be in line code. [AC] */ - if(sk->state==TCP_SYN_SENT) + if(th->ack) { - /* Crossed SYN or previous junk segment */ - if(th->ack) + /* We got an ack, but it's not a good ack */ + if(!tcp_ack(sk,th, skb->seq, skb->ack_seq, len)) { - /* We got an ack, but it's not a good ack. - * We used to test this with a call to tcp_ack, - * but this loses, because it takes the SYN - * packet out of the send queue, even if - * the ACK doesn't have the SYN bit sent, and - * therefore isn't the one we are waiting for. - */ - if (after(skb->ack_seq, sk->sent_seq) || before(skb->ack_seq, sk->rcv_ack_seq)) - { - /* Reset the ack - it's an ack from a - different connection [ th->rst is checked in tcp_send_reset()] */ - tcp_statistics.TcpAttemptFails++; - tcp_send_reset(daddr, saddr, th, - sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); - return(0); - } - if(th->rst) - return tcp_reset(sk,skb); - if(!th->syn) - { - /* A valid ack from a different connection - start. Shouldn't happen but cover it */ - tcp_statistics.TcpAttemptFails++; - tcp_send_reset(daddr, saddr, th, - sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl); - kfree_skb(skb, FREE_READ); - return 0; - } - - /* process the ACK, get the SYN packet out - * of the send queue, do other initial - * processing stuff. [We know it's good, and - * we know it's the SYN,ACK we want.] - */ - tcp_ack(sk,th,skb->ack_seq,len); - + tcp_statistics.TcpAttemptFails++; + return 1; + } - /* - * Ok.. it's good. Set up sequence numbers and - * move to established. - */ - sk->acked_seq = skb->seq+1; - sk->lastwin_seq = skb->seq+1; - sk->fin_seq = skb->seq; - tcp_send_ack(sk); - tcp_set_state(sk, TCP_ESTABLISHED); - tcp_options(sk,th); - sk->dummy_th.dest=th->source; - sk->copied_seq = sk->acked_seq; - if(!sk->dead) - { - sk->state_change(sk); - sock_wake_async(sk->socket, 0); - } - if(sk->max_window==0) - { - sk->max_window = 32; - sk->mss = min(sk->max_window, sk->mtu); - } - /* Reset the RTT estimator to the initial - * state rather than testing to avoid - * updating it on the ACK to the SYN packet. - */ - sk->rtt = 0; - sk->rto = TCP_TIMEOUT_INIT; - sk->mdev = TCP_TIMEOUT_INIT; + if(th->rst) + { + tcp_reset(sk,skb); + goto discard; } - else + + if(!th->syn) { - /* See if SYN's cross. Drop if boring */ - if(th->syn && !th->rst) - { - /* Crossed SYN's are fine - but talking to - yourself is right out... */ - if(sk->saddr==saddr && sk->daddr==daddr && - sk->dummy_th.source==th->source && - sk->dummy_th.dest==th->dest) - { - tcp_statistics.TcpAttemptFails++; - return tcp_reset(sk,skb); - } - tcp_set_state(sk,TCP_SYN_RECV); - - /* - * FIXME: - * Must send SYN|ACK here - */ - } - /* Discard junk segment */ - kfree_skb(skb, FREE_READ); - return 0; + /* + * A valid ack from a different connection + * start. Shouldn't happen but cover it + */ + tcp_statistics.TcpAttemptFails++; + return 1; } + /* - * SYN_RECV with data maybe.. drop through + * Ok.. it's good. Set up sequence + * numbers and + * move to established. */ - goto rfc_step6; - } - /* - * BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is - * a more complex suggestion for fixing these reuse issues in RFC1644 - * but not yet ready for general use. Also see RFC1379. - * - * Note the funny way we go back to the top of this function for - * this case ("goto try_next_socket"). That also takes care of - * checking "sk->users" for the new socket as well as doing all - * the normal tests on the packet. - */ - -#define BSD_TIME_WAIT -#ifdef BSD_TIME_WAIT - if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && - after(skb->seq, sk->acked_seq) && !th->rst) - { - u32 seq = sk->write_seq; - if(sk->debug) - printk("Doing a BSD time wait\n"); - tcp_statistics.TcpEstabResets++; - atomic_sub(skb->truesize, &sk->rmem_alloc); - skb->sk = NULL; - sk->err=ECONNRESET; - tcp_set_state(sk, TCP_CLOSE); - sk->shutdown = SHUTDOWN_MASK; - sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr, dev->pa_addr, skb->redirport); - /* this is not really correct: we should check sk->users */ - if (sk && sk->state==TCP_LISTEN) - { - skb->sk = sk; - atomic_add(skb->truesize, &sk->rmem_alloc); - tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000); - return 0; + tp->rcv_nxt = skb->seq+1; + tp->rcv_wnd = 0; + tp->rcv_wup = skb->seq+1; + + tp->snd_wnd = htons(th->window); + tp->snd_wl1 = skb->seq; + tp->snd_wl2 = skb->ack_seq; + + sk->fin_seq = skb->seq; + tcp_send_ack(sk); + + tcp_set_state(sk, TCP_ESTABLISHED); + rcv_mss = tcp_parse_options(th); + + if (rcv_mss == 0) + { + rcv_mss = 536; } - kfree_skb(skb, FREE_READ); + + sk->mss = min(sk->mss, rcv_mss); + + sk->dummy_th.dest = th->source; + sk->copied_seq = tp->rcv_nxt; + + if(!sk->dead) + { + sk->state_change(sk); + sock_wake_async(sk->socket, 0); + } + + /* Drop through step 6 */ + goto step6; + } + else + { + if(th->syn && !th->rst) + { + /* + * the previous version of the code + * checked for "connecting to self" + * here. that check is done now in + * tcp_connect + */ + + tcp_set_state(sk, TCP_SYN_RECV); + + tp->rcv_nxt = skb->seq + 1; + tp->rcv_wup = skb->seq + 1; + + tp->snd_wnd = htons(th->window); + tp->snd_wl1 = skb->seq; + + tcp_send_synack(sk); + goto discard; + } + + } + break; + + case TCP_TIME_WAIT: + /* + * RFC 1122: + * "When a connection is [...] on TIME-WAIT state [...] + * [a TCP] MAY accept a new SYN from the remote TCP to + * reopen the connection directly, if it: + * + * (1) assigns its initial sequence number for the new + * connection to be larger than the largest sequence + * number it used on the previous connection incarnation, + * and + * + * (2) returns to TIME-WAIT state if the SYN turns out + * to be an old duplicate". + */ + + if (th->syn && !th->rst && after(skb->seq, tp->rcv_nxt)) + { + __u32 isn; + int err; + + atomic_sub(skb->truesize, &sk->rmem_alloc); + skb->sk = NULL; + sk->err = ECONNRESET; + tcp_set_state(sk, TCP_CLOSE); + sk->shutdown = SHUTDOWN_MASK; + + isn = tp->rcv_nxt + 128000; + + sk = tp->af_specific->get_sock(skb, th); + + if (sk == NULL) + goto discard; + + skb->sk = sk; + tp = &sk->tp_pinfo.af_tcp; + atomic_add(skb->truesize, &sk->rmem_alloc); + + err = tp->af_specific->conn_request(sk, skb, opt, isn); + + if (err < 0) + return 1; + return 0; } -#endif + + break; + } /* - * We are now in normal data flow (see the step list in the RFC) - * Note most of these are inline now. I'll inline the lot when - * I have time to test it hard and look at what gcc outputs + * step 1: check sequence number */ - if (!tcp_sequence(sk, skb->seq, skb->end_seq-th->syn)) + if (!tcp_sequence(tp, skb->seq, skb->end_seq)) { - bad_tcp_sequence(sk, th, skb->end_seq-th->syn, dev); - kfree_skb(skb, FREE_READ); - return 0; + if (!th->rst) + { + tcp_send_ack(sk); + goto discard; + } } + + /* + * step 2: check RST bit + */ + if(th->rst) - return tcp_reset(sk,skb); - + { + tcp_reset(sk,skb); + goto discard; + } + /* + * step 3: check security and precedence + * [ignored] + */ + + /* + * step 4: + * * Check for a SYN, and ensure it matches the SYN we were * first sent. We have to handle the rather unusual (but valid) * sequence that KA9Q derived products may generate of @@ -2098,77 +1684,152 @@ * We keep syn_seq as the sequence space occupied by the * original syn. */ - - if(th->syn && skb->seq!=sk->syn_seq) + + if (th->syn && skb->seq!=sk->syn_seq) { - tcp_send_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255); - return tcp_reset(sk,skb); + tcp_reset(sk, skb); + return 1; } /* - * Process the ACK + * step 5: check the ACK field */ - - if(th->ack && !tcp_ack(sk,th,skb->ack_seq,len)) + if (th->ack) { - /* - * Our three way handshake failed. - */ - - if(sk->state==TCP_SYN_RECV) - { - tcp_send_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl); + int acceptable = tcp_ack(sk,th,skb->seq, skb->ack_seq,len); + + switch(sk->state) { + case TCP_SYN_RECV: + if (acceptable) + { + tcp_set_state(sk, TCP_ESTABLISHED); + sk->dummy_th.dest=th->source; + sk->copied_seq = tp->rcv_nxt; + + if(!sk->dead) + sk->state_change(sk); + + tp->snd_una = skb->ack_seq; + tp->snd_wnd = htons(th->window); + tp->snd_wl1 = skb->seq; + tp->snd_wl2 = skb->ack_seq; + + } + else + return 1; + break; + + case TCP_FIN_WAIT1: + + if (tp->snd_una == sk->write_seq) + { + sk->shutdown |= SEND_SHUTDOWN; + tcp_set_state(sk, TCP_FIN_WAIT2); + if (!sk->dead) + sk->state_change(sk); + } + break; + + case TCP_CLOSING: + + if (tp->snd_una == sk->write_seq) + { + tcp_time_wait(sk); + if (!sk->dead) + sk->state_change(sk); + } + break; + + case TCP_LAST_ACK: + + if (tp->snd_una == sk->write_seq) + { + sk->shutdown = SHUTDOWN_MASK; + tcp_set_state(sk,TCP_CLOSE); + if (!sk->dead) + sk->state_change(sk); + goto discard; + } + break; + + case TCP_TIME_WAIT: + /* + * keep us in TIME_WAIT until we stop getting + * packets, reset the timeout. + */ + tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + break; + } - kfree_skb(skb, FREE_READ); - return 0; } - -rfc_step6: /* I'll clean this up later */ + else + goto discard; - /* - * If the accepted buffer put us over our queue size we - * now drop it (we must process the ack first to avoid - * deadlock cases). - */ + step6: /* - * Process urgent data + * step 6: check the URG bit */ - + tcp_urg(sk, th, len); - - /* - * Process the encapsulated data - */ - - if(tcp_data(skb,sk, saddr, len)) - kfree_skb(skb, FREE_READ); /* - * If our receive queue has grown past its limits, - * try to prune away duplicates etc.. + * step 7: process the segment text */ - if (sk->rmem_alloc > sk->rcvbuf) - prune_queue(&sk->receive_queue); - /* - * And done - */ + switch (sk->state) { + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + if (!before(skb->seq, sk->fin_seq)) + break; - return 0; + case TCP_FIN_WAIT1: + case TCP_FIN_WAIT2: -no_tcp_socket: - /* - * No such TCB. If th->rst is 0 send a reset (checked in tcp_send_reset) - */ - tcp_send_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255); + /* + * RFC 793 says to queue data in this states, + * RFC 1122 says we MUST send a reset. + * BSD 4.4 also does reset. + */ + + if ((sk->shutdown & RCV_SHUTDOWN) && sk->dead) + { + if (after(skb->end_seq - th->fin, tp->rcv_nxt)) + { + tcp_reset(sk, skb); + return 1; + } + } + + case TCP_ESTABLISHED: + queued = tcp_data(skb, sk, len); + break; + } -discard_it: /* - * Discard frame + * step 8: check the FIN bit */ - skb->sk = NULL; + + if (th->fin) + { + tcp_fin(skb, sk, th); + } + + tcp_data_snd_check(sk); + tcp_ack_snd_check(sk); + + if (queued) + return 0; + discard: + kfree_skb(skb, FREE_READ); return 0; } + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp_input.o tcp_input.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c --- v2.1.7/linux/net/ipv4/tcp_ipv4.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv4/tcp_ipv4.c Sun Nov 3 11:04:45 1996 @@ -0,0 +1,1350 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Implementation of the Transmission Control Protocol(TCP). + * + * + * IPv4 specific functions + * + * + * code split from: + * linux/ipv4/tcp.c + * linux/ipv4/tcp_input.c + * linux/ipv4/tcp_output.c + * + * See tcp.c for author information + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +static void tcp_v4_send_reset(unsigned long saddr, unsigned long daddr, + struct tcphdr *th, struct proto *prot, + struct options *opt, + struct device *dev, int tos, int ttl); + +void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, + struct sk_buff *skb); + +/* + * Cached last hit socket + */ + +static volatile unsigned long th_cache_saddr, th_cache_daddr; +static volatile unsigned short th_cache_dport, th_cache_sport; +static volatile struct sock *th_cache_sk; + +void tcp_cache_zap(void) +{ + th_cache_sk=NULL; +} + +/* + * Find the socket, using the last hit cache if applicable. + * The cache is not quite right... + */ + +static inline struct sock * get_tcp_sock(u32 saddr, u16 sport, + u32 daddr, u16 dport, + u32 paddr, u16 pport) +{ + struct sock * sk; + + sk = (struct sock *) th_cache_sk; + if (!sk || saddr != th_cache_saddr || daddr != th_cache_daddr || + sport != th_cache_sport || dport != th_cache_dport) { + sk = get_sock(&tcp_prot, dport, saddr, sport, daddr, + paddr, pport); + if (sk) { + th_cache_saddr=saddr; + th_cache_daddr=daddr; + th_cache_dport=dport; + th_cache_sport=sport; + th_cache_sk=sk; + } + } + return sk; +} + +static __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) +{ + return secure_tcp_sequence_number(sk->saddr, sk->daddr, + skb->h.th->dest, + skb->h.th->source); +} + +/* + * From tcp.c + */ + +/* + * Check that a TCP address is unique, don't allow multiple + * connects to/from the same address + */ + +static int tcp_unique_address(u32 saddr, u16 snum, u32 daddr, u16 dnum) +{ + int retval = 1; + struct sock * sk; + + /* Make sure we are allowed to connect here. */ + cli(); + for (sk = tcp_prot.sock_array[snum & (SOCK_ARRAY_SIZE -1)]; + sk != NULL; sk = sk->next) + { + /* hash collision? */ + if (sk->num != snum) + continue; + if (sk->saddr != saddr) + continue; + if (sk->daddr != daddr) + continue; + if (sk->dummy_th.dest != dnum) + continue; + retval = 0; + break; + } + sti(); + return retval; +} + +/* + * This will initiate an outgoing connection. + */ + +int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + struct sk_buff *buff; + struct sk_buff *skb1; + struct device *dev=NULL; + unsigned char *ptr; + int tmp; + int atype; + struct tcphdr *t1; + struct rtable *rt; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; + + if (sk->state != TCP_CLOSE) + return(-EISCONN); + + /* + * Don't allow a double connect. + */ + + if(sk->daddr) + return -EINVAL; + + if (addr_len < sizeof(struct sockaddr_in)) + return(-EINVAL); + + if (usin->sin_family && usin->sin_family != AF_INET) + return(-EAFNOSUPPORT); + + /* + * connect() to INADDR_ANY means loopback (BSD'ism). + */ + + if (usin->sin_addr.s_addr==INADDR_ANY) + usin->sin_addr.s_addr=ip_my_addr(); + + /* + * Don't want a TCP connection going to a broadcast address + */ + + if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST + || atype==IS_MULTICAST) + { + return -ENETUNREACH; + } + + if (!tcp_unique_address(sk->saddr, sk->num, usin->sin_addr.s_addr, + usin->sin_port)) + { + return -EADDRNOTAVAIL; + } + + lock_sock(sk); + sk->daddr = usin->sin_addr.s_addr; + sk->dummy_th.dest = usin->sin_port; + sk->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, + sk->dummy_th.source, + usin->sin_port); + + tp->snd_wnd = 0; + tp->snd_wl1 = 0; + tp->snd_wl2 = sk->write_seq; + tp->snd_una = sk->write_seq; + + tp->rcv_nxt = 0; + + sk->err = 0; + + buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL); + if (buff == NULL) + { + release_sock(sk); + return(-ENOMEM); + } + + buff->sk = sk; + buff->free = 0; + buff->localroute = sk->localroute; + + /* + * Put in the IP header and routing stuff. + */ + + tmp = ip_build_header(buff, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, NULL, MAX_SYN_SIZE, sk->ip_tos, + sk->ip_ttl,&sk->ip_route_cache); + + if (tmp < 0) + { + sock_wfree(sk, buff); + release_sock(sk); + return(-ENETUNREACH); + } + if ((rt = sk->ip_route_cache) != NULL && !sk->saddr) + sk->saddr = rt->rt_src; + sk->rcv_saddr = sk->saddr; + + t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr)); + buff->h.th = t1; + + memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1)); + buff->seq = sk->write_seq++; + t1->seq = htonl(buff->seq); + tp->snd_nxt = sk->write_seq; + buff->end_seq = sk->write_seq; + t1->ack = 0; + t1->window = htons(512); + t1->syn = 1; + t1->doff = 6; + + /* use 512 or whatever user asked for */ + + if(rt!=NULL && (rt->rt_flags&RTF_WINDOW)) + sk->window_clamp=rt->rt_window; + else + sk->window_clamp=0; + + + if (rt) + sk->mtu = rt->rt_mtu; + else + sk->mtu = dev->mtu; + +#ifdef CONFIG_SKIP + + /* + * SKIP devices set their MTU to 65535. This is so they can take packets + * unfragmented to security process then fragment. They could lie to the + * TCP layer about a suitable MTU, but its easier to let skip sort it out + * simply because the final package we want unfragmented is going to be + * + * [IPHDR][IPSP][Security data][Modified TCP data][Security data] + */ + + if(skip_pick_mtu!=NULL) /* If SKIP is loaded.. */ + sk->mtu=skip_pick_mtu(sk->mtu,dev); +#endif + + if(sk->mtu < 64) + sk->mtu = 64; /* Sanity limit */ + + if (sk->user_mss) + sk->mss = sk->user_mss; + else + sk->mss = (sk->mtu - sizeof(struct iphdr) - + sizeof(struct tcphdr)); + + /* + * Put in the TCP options to say MSS. + */ + + ptr = skb_put(buff,4); + ptr[0] = TCPOPT_MSS; + ptr[1] = TCPOLEN_MSS; + ptr[2] = (sk->mss) >> 8; + ptr[3] = (sk->mss) & 0xff; + buff->csum = csum_partial(ptr, 4, 0); + tcp_v4_send_check(sk, t1, sizeof(struct tcphdr) + 4, buff); + + /* + * This must go first otherwise a really quick response + * will get reset. + */ + + tcp_cache_zap(); + tcp_set_state(sk,TCP_SYN_SENT); + + if(rt && (rt->rt_flags&RTF_IRTT)) + tp->rto = rt->rt_irtt; + else + tp->rto = TCP_TIMEOUT_INIT; + + tcp_init_xmit_timers(sk); + + /* Now works the right way instead of a hacked initial setting */ + sk->retransmits = 0; + + skb_queue_tail(&sk->write_queue, buff); + + sk->packets_out++; + buff->when = jiffies; + + skb1 = skb_clone(buff, GFP_KERNEL); + sk->wmem_alloc += skb1->truesize; + ip_queue_xmit(sk, dev, skb1, 1); + + /* Timer for repeating the SYN until an answer */ + tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); + tcp_statistics.TcpActiveOpens++; + tcp_statistics.TcpOutSegs++; + + release_sock(sk); + return(0); +} + +static int tcp_v4_sendmsg(struct sock *sk, struct msghdr *msg, + int len, int nonblock, int flags) +{ + int retval = -EINVAL; + + /* + * Do sanity checking for sendmsg/sendto/send + */ + + if (flags & ~(MSG_OOB|MSG_DONTROUTE)) + goto out; + if (msg->msg_name) { + struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name; + + if (msg->msg_namelen < sizeof(*addr)) + goto out; + if (addr->sin_family && addr->sin_family != AF_INET) + goto out; + retval = -ENOTCONN; + if(sk->state == TCP_CLOSE) + goto out; + retval = -EISCONN; + if (addr->sin_port != sk->dummy_th.dest) + goto out; + if (addr->sin_addr.s_addr != sk->daddr) + goto out; + } + + lock_sock(sk); + retval = tcp_do_sendmsg(sk, msg->msg_iovlen, msg->msg_iov, + len, nonblock, flags); + + release_sock(sk); + +out: + return retval; +} + +/* + * This routine is called by the ICMP module when it gets some + * sort of error condition. If err < 0 then the socket should + * be closed and the error returned to the user. If err > 0 + * it's just the icmp type << 8 | icmp code. After adjustment + * header points to the first 8 bytes of the tcp header. We need + * to find the appropriate port. + */ + +void tcp_v4_err(int type, int code, unsigned char *header, __u32 info, + __u32 daddr, __u32 saddr, struct inet_protocol *protocol) +{ + struct tcphdr *th = (struct tcphdr *)header; + struct tcp_opt *tp; + struct sock *sk; + + th =(struct tcphdr *)header; + sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr, 0, 0); + + if (sk == NULL) + return; + + if (type == ICMP_SOURCE_QUENCH) + { + /* + * FIXME: + * Follow BSD for now and just reduce cong_window to 1 again. + * It is possible that we just want to reduce the + * window by 1/2, or that we want to reduce ssthresh by 1/2 + * here as well. + */ + + tp = &sk->tp_pinfo.af_tcp; + + sk->cong_window = 1; + tp->high_seq = tp->snd_nxt; + + return; + } + + if (type == ICMP_PARAMETERPROB) + { + sk->err=EPROTO; + sk->error_report(sk); + } + +#ifndef CONFIG_NO_PATH_MTU_DISCOVERY + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + { + struct rtable * rt; + /* + * Ugly trick to pass MTU to protocol layer. + * Really we should add argument "info" to error handler. + */ + unsigned short new_mtu = info; + + if ((rt = sk->ip_route_cache) != NULL) + if (rt->rt_mtu > new_mtu) + rt->rt_mtu = new_mtu; + + if ((sk->mtu > new_mtu) && + (new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr))) + { + sk->mss = (new_mtu - sizeof(struct iphdr) + - sizeof(struct tcphdr)); + } + + return; + } +#endif + + /* + * If we've already connected we will keep trying + * until we time out, or the user gives up. + */ + + if (code <= NR_ICMP_UNREACH) + { + if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) + { + sk->err = icmp_err_convert[code].errno; + if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) + { + tcp_statistics.TcpAttemptFails++; + tcp_set_state(sk,TCP_CLOSE); + sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ + } + } + else /* Only an error on timeout */ + sk->err_soft = icmp_err_convert[code].errno; + } +} + +/* + * This routine computes a TCP checksum. + * + * Modified January 1995 from a go-faster DOS routine by + * Jorge Cwik + */ +void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, + struct sk_buff *skb) +{ + __u32 saddr = sk->saddr; + __u32 daddr = sk->daddr; +#ifdef DEBUG_TCP_CHECK + u16 check; +#endif + th->check = 0; + th->check = tcp_v4_check(th, len, saddr, daddr, + csum_partial((char *)th, sizeof(*th), + skb->csum)); + +#ifdef DEBUG_TCP_CHECK + check = th->check; + th->check = 0; + th->check = tcp_v4_check(th, len, saddr, daddr, + csum_partial((char *)th,len,0)); + if (check != th->check) { + static int count = 0; + if (++count < 10) { + printk("Checksum %x (%x) from %p\n", th->check, check, + __builtin_return_address(0)); + printk("TCP= len=%d\n", th->doff*4, th->ack, th->syn, th->fin, len); + } + } +#endif +} + +/* + * This routine will send an RST to the other tcp. + */ + +static void tcp_v4_send_reset(unsigned long saddr, unsigned long daddr, + struct tcphdr *th, struct proto *prot, + struct options *opt, + struct device *dev, int tos, int ttl) +{ + struct sk_buff *buff; + struct tcphdr *t1; + int tmp; + struct device *ndev=NULL; + + /* + * Cannot reset a reset (Think about it). + */ + + if(th->rst) + return; + + /* + * We need to grab some memory, and put together an RST, + * and then put it into the queue to be sent. + */ + + buff = alloc_skb(MAX_RESET_SIZE, GFP_ATOMIC); + if (buff == NULL) + return; + + buff->sk = NULL; + buff->dev = dev; + buff->localroute = 0; + + + /* + * Put in the IP header and routing stuff. + */ + + tmp = ip_build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt, + sizeof(struct tcphdr),tos,ttl,NULL); + if (tmp < 0) + { + buff->free = 1; + sock_wfree(NULL, buff); + return; + } + + t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr)); + memset(t1, 0, sizeof(*t1)); + + /* + * Swap the send and the receive. + */ + + t1->dest = th->source; + t1->source = th->dest; + t1->doff = sizeof(*t1)/4; + t1->rst = 1; + + if(th->ack) + { + t1->seq = th->ack_seq; + } + else + { + t1->ack = 1; + if(!th->syn) + t1->ack_seq = th->seq; + else + t1->ack_seq = htonl(ntohl(th->seq)+1); + } + + + buff->csum = csum_partial((u8 *) t1, sizeof(*t1), 0); + t1->check = tcp_v4_check(t1, sizeof(*t1), saddr, daddr, buff->csum); + + ip_queue_xmit(NULL, ndev, buff, 1); + tcp_statistics.TcpOutSegs++; +} + +#ifdef CONFIG_IP_TRANSPARENT_PROXY +/* + * Check whether a received TCP packet might be for one of our + * connections. + */ + +int tcp_chkaddr(struct sk_buff *skb) +{ + struct iphdr *iph = skb->h.iph; + struct tcphdr *th = (struct tcphdr *)(skb->h.raw + iph->ihl*4); + struct sock *sk; + + sk = get_sock(&tcp_prot, th->dest, iph->saddr, th->source, iph->daddr, + 0, 0); + + if (!sk) + return 0; + + /* 0 means accept all LOCAL addresses here, not all the world... */ + + if (sk->rcv_saddr == 0) + return 0; + + return 1; +} +#endif + +static void tcp_v4_send_synack(struct sock *sk, struct open_request *req) +{ + struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct sk_buff * skb; + struct device *dev = NULL; + struct rtable *rt = NULL; + struct tcphdr *th; + unsigned char *ptr; + int mss; + int tmp; + + skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC); + + if (skb == NULL) + { + return; + } + + tmp = ip_build_header(skb, af_req->loc_addr, af_req->rmt_addr, &dev, + IPPROTO_TCP, af_req->opt, skb->truesize, + sk->ip_tos, sk->ip_ttl, &rt); + + if (tmp < 0) + { + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + return; + } + + skb->dev = dev; + + if (rt) + mss = rt->rt_mtu; + else + mss = dev->mtu; + + mss -= sizeof(struct iphdr) + sizeof(struct tcphdr); + + if (sk->user_mss) + mss = min(mss, sk->user_mss); + + ip_rt_put(rt); + + th =(struct tcphdr *) skb_put(skb, sizeof(struct tcphdr)); + skb->h.th = th; + memset(th, 0, sizeof(struct tcphdr)); + + th->syn = 1; + th->ack = 1; + + th->source = sk->dummy_th.source; + th->dest = req->rmt_port; + + skb->seq = req->snt_isn; + skb->end_seq = skb->seq + 1; + + th->seq = ntohl(skb->seq); + th->ack_seq = htonl(req->rcv_isn + 1); + th->doff = sizeof(*th)/4 + 1; + + th->window = ntohs(tp->rcv_wnd); + + ptr = skb_put(skb, TCPOLEN_MSS); + ptr[0] = TCPOPT_MSS; + ptr[1] = TCPOLEN_MSS; + ptr[2] = (mss >> 8) & 0xff; + ptr[3] = mss & 0xff; + skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0); + + th->check = tcp_v4_check(th, sizeof(*th) + TCPOLEN_MSS, af_req->loc_addr, + af_req->rmt_addr, + csum_partial((char *)th, sizeof(*th), skb->csum)); + + ip_queue_xmit(sk, dev, skb, 1); + tcp_statistics.TcpOutSegs++; + +} + +static void tcp_v4_or_free(struct open_request *req) +{ + struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req; + + if (af_req->req.sk) + return; + + if (af_req->opt) + { + kfree_s(af_req->opt, sizeof(struct options) + af_req->opt->optlen); + } +} + +static struct or_calltable or_ipv4 = { + tcp_v4_send_synack, + tcp_v4_or_free +}; + +static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr) +{ + return 0; +} + +int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn) +{ + struct options *opt = (struct options *) ptr; + struct tcp_v4_open_req *af_req; + struct open_request *req; + struct tcphdr *th = skb->h.th; + __u32 saddr = skb->saddr; + __u32 daddr = skb->daddr; + + /* If the socket is dead, don't accept the connection. */ + if (sk->dead) + { + if(sk->debug) + { + printk("Reset on %p: Connect on dead socket.\n",sk); + } + tcp_statistics.TcpAttemptFails++; + return -ENOTCONN; + } + + if (sk->ack_backlog >= sk->max_ack_backlog || + tcp_v4_syn_filter(sk, skb, saddr)) + { + printk(KERN_DEBUG "droping syn ack:%d max:%d\n", + sk->ack_backlog, sk->max_ack_backlog); +#ifdef CONFIG_IP_TCPSF + tcp_v4_random_drop(sk); +#endif + tcp_statistics.TcpAttemptFails++; + goto exit; + } + + + af_req = kmalloc(sizeof(struct tcp_v4_open_req), GFP_ATOMIC); + + if (af_req == NULL) + { + tcp_statistics.TcpAttemptFails++; + goto exit; + } + + sk->ack_backlog++; + req = (struct open_request *) af_req; + + memset(af_req, 0, sizeof(struct tcp_v4_open_req)); + + req->rcv_isn = skb->seq; + req->snt_isn = isn; + + /* mss */ + req->mss = tcp_parse_options(th); + + if (!req->mss) + { + req->mss = 536; + } + + req->rmt_port = th->source; + + af_req->loc_addr = daddr; + af_req->rmt_addr = saddr; + + /* + * options + */ + + if (opt && opt->optlen) + { + af_req->opt = (struct options*) kmalloc(sizeof(struct options) + + opt->optlen, GFP_ATOMIC); + if (af_req->opt) + { + if (ip_options_echo(af_req->opt, opt, skb->daddr, + skb->saddr, skb)) + { + kfree_s(af_req->opt, sizeof(struct options) + + opt->optlen); + af_req->opt = NULL; + } + } + } + + req->class = &or_ipv4; + + tcp_v4_send_synack(sk, req); + + req->expires = jiffies + TCP_TIMEOUT_INIT; + tcp_inc_slow_timer(TCP_SLT_SYNACK); + tcp_synq_queue(&sk->tp_pinfo.af_tcp, req); + + sk->data_ready(sk, 0); + + exit: + kfree_skb(skb, FREE_READ); + return 0; +} + +struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct open_request *req) +{ + struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req; + struct tcp_opt *newtp; + struct sock *newsk; + struct rtable *rt; + int snd_mss; + + newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC); + if (newsk == NULL) + { + return NULL; + } + + memcpy(newsk, sk, sizeof(*newsk)); + newsk->opt = NULL; + newsk->ip_route_cache = NULL; + skb_queue_head_init(&newsk->write_queue); + skb_queue_head_init(&newsk->receive_queue); + skb_queue_head_init(&newsk->out_of_order_queue); + + /* + * Unused + */ + + newsk->send_head = NULL; + newsk->send_tail = NULL; + + newtp = &(newsk->tp_pinfo.af_tcp); + newtp->send_head = NULL; + newtp->retrans_head = NULL; + + newtp->pending = 0; + + skb_queue_head_init(&newsk->back_log); + + newsk->prot->init(newsk); + + newsk->cong_count = 0; + newsk->ssthresh = 0; + newtp->backoff = 0; + newsk->blog = 0; + newsk->intr = 0; + newsk->proc = 0; + newsk->done = 0; + newsk->partial = NULL; + newsk->pair = NULL; + newsk->wmem_alloc = 0; + newsk->rmem_alloc = 0; + newsk->localroute = sk->localroute; + + newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; + + newsk->err = 0; + newsk->shutdown = 0; + newsk->ack_backlog = 0; + + newsk->fin_seq = req->rcv_isn; + newsk->syn_seq = req->rcv_isn; + newsk->state = TCP_SYN_RECV; + newsk->timeout = 0; + newsk->ip_xmit_timeout = 0; + + newsk->write_seq = req->snt_isn; + + newtp->snd_wnd = ntohs(skb->h.th->window); + newsk->max_window = newtp->snd_wnd; + newtp->snd_wl1 = req->rcv_isn; + newtp->snd_wl2 = newsk->write_seq; + newtp->snd_una = newsk->write_seq++; + newtp->snd_nxt = newsk->write_seq; + + newsk->urg_data = 0; + newsk->packets_out = 0; + newsk->retransmits = 0; + newsk->linger=0; + newsk->destroy = 0; + init_timer(&newsk->timer); + newsk->timer.data = (unsigned long) newsk; + newsk->timer.function = &net_timer; + + tcp_init_xmit_timers(newsk); + + newsk->dummy_th.source = sk->dummy_th.source; + newsk->dummy_th.dest = req->rmt_port; + + newtp->rcv_nxt = req->rcv_isn + 1; + newtp->rcv_wup = req->rcv_isn + 1; + newsk->copied_seq = req->rcv_isn + 1; + + newsk->socket = NULL; + + newsk->daddr = af_req->rmt_addr; + newsk->saddr = af_req->loc_addr; + newsk->rcv_saddr = af_req->loc_addr; + + /* + * options / mss / route_cache + */ + newsk->opt = af_req->opt; + rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : + newsk->saddr, 0); + + newsk->ip_route_cache = rt; + + if(rt != NULL && (rt->rt_flags&RTF_WINDOW)) + newsk->window_clamp = rt->rt_window; + else + newsk->window_clamp = 0; + + if (rt) + snd_mss = rt->rt_mtu; + else + snd_mss = skb->dev->mtu; + + newsk->mtu = snd_mss; + /* sanity check */ + if (newsk->mtu < 64) + { + newsk->mtu = 64; + } + + snd_mss -= sizeof(struct iphdr) - sizeof(struct tcphdr); + + if (sk->user_mss) + { + snd_mss = min(snd_mss, sk->user_mss); + } + + newsk->mss = min(req->mss, snd_mss); + + inet_put_sock(newsk->num, newsk); + + tcp_cache_zap(); + + return newsk; +} + +/* + * From tcp_input.c + */ + +int tcp_v4_rcv(struct sk_buff *skb, struct device *dev, struct options *opt, + __u32 daddr, unsigned short len, + __u32 saddr, int redo, struct inet_protocol * protocol) +{ + struct tcphdr *th; + struct sock *sk; + + /* + * "redo" is 1 if we have already seen this skb but couldn't + * use it at that time (the socket was locked). In that case + * we have already done a lot of the work (looked up the socket + * etc). + */ + + th = skb->h.th; + + sk = skb->sk; + + if (!redo) + { + + if (skb->pkt_type!=PACKET_HOST) + goto discard_it; + + /* + * Pull up the IP header. + */ + + skb_pull(skb, skb->h.raw-skb->data); + + /* + * Try to use the device checksum if provided. + */ + + switch (skb->ip_summed) + { + case CHECKSUM_NONE: + skb->csum = csum_partial((char *)th, len, 0); + case CHECKSUM_HW: + if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) + goto discard_it; + default: + /* CHECKSUM_UNNECESSARY */ + } + + sk = get_tcp_sock(saddr, th->source, daddr, th->dest, + dev->pa_addr, skb->redirport); + + if (!sk) + goto no_tcp_socket; + + skb->sk = sk; + skb->seq = ntohl(th->seq); + skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4; + skb->ack_seq = ntohl(th->ack_seq); + + skb->acked = 0; + skb->used = 0; + skb->free = 1; + skb->saddr = saddr; + skb->daddr = daddr; + } + + /* + * We may need to add it to the backlog here. + */ + + if (sk->users) + { + __skb_queue_tail(&sk->back_log, skb); + return(0); + } + + if (!sk->prot) + { + printk(KERN_DEBUG "tcp_rcv: sk->prot == NULL\n"); + return(0); + } + + atomic_add(skb->truesize, &sk->rmem_alloc); + + if (sk->state == TCP_ESTABLISHED) + { + tcp_rcv_established(sk, skb, th, len); + return 0; + } + + if (sk->state == TCP_LISTEN) + { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct open_request *req; + struct tcp_v4_open_req *af_req; + + /* + * assumption: the socket is not in use. + * as we checked the user count above and we're + * running from a soft interrupt. + */ + + req = tp->syn_wait_queue; + af_req = (struct tcp_v4_open_req *) req; + + if (req) + { + do { + if (af_req->rmt_addr == saddr && + af_req->loc_addr == daddr && + req->rmt_port == th->source) + { + if (req->sk) + { + printk(KERN_DEBUG "bug: syn_recv socket " + "exists\n"); + break; + } + + /* match */ + + atomic_sub(skb->truesize, &sk->rmem_alloc); + sk = tp->af_specific->syn_recv_sock(sk, skb, req); + + tcp_dec_slow_timer(TCP_SLT_SYNACK); + + if (sk == NULL) + { + goto no_tcp_socket; + } + + atomic_add(skb->truesize, &sk->rmem_alloc); + req->sk = sk; + skb->sk = sk; + break; + } + + req = req->dl_next; + } while (req != tp->syn_wait_queue); + } + } + + if (tcp_rcv_state_process(sk, skb, th, opt, len) == 0) + return 0; + +no_tcp_socket: + + /* + * No such TCB. If th->rst is 0 send a reset + * (checked in tcp_send_reset) + */ + + tcp_v4_send_reset(daddr, saddr, th, &tcp_prot, opt, dev, + skb->ip_hdr->tos, 255); + +discard_it: + + /* + * Discard frame + */ + + kfree_skb(skb, FREE_READ); + return 0; +} + +int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb) +{ + struct options * opt = (struct options*)skb->proto_priv; + struct device * dev; + struct rtable *rt; + struct iphdr *iph; + struct tcphdr *th; + int size; + + /* + * Discard the surplus MAC header + */ + + skb_pull(skb, ((unsigned char *)skb->ip_hdr)-skb->data); + + iph = skb->ip_hdr; + th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2)); + size = skb->tail - (unsigned char *) th; + + dev = skb->dev; + + rt = ip_check_route(&sk->ip_route_cache, + opt->srr?opt->faddr:iph->daddr, + skb->localroute); + + +#ifndef CONFIG_NO_PATH_MTU_DISCOVERY + if (rt && ntohs(iph->tot_len) > rt->rt_mtu) + iph->frag_off &= ~htons(IP_DF); +#endif + + if (rt==NULL) /* Deep poo */ + { + if(skb->sk) + { + skb->sk->err_soft=ENETUNREACH; + skb->sk->error_report(skb->sk); + } + return -1; + } + + + dev=rt->rt_dev; + skb->raddr=rt->rt_gateway; + skb->dev=dev; + skb->arp=1; + + if (rt->rt_hh) + { + memcpy(skb_push(skb, dev->hard_header_len), + rt->rt_hh->hh_data, dev->hard_header_len); + + if (!rt->rt_hh->hh_uptodate) + { + skb->arp = 0; +#if RT_CACHE_DEBUG >= 2 + printk("tcp_do_rebuild_header: " + "hh miss %08x via %08x\n", + iph->daddr, rt->rt_gateway); +#endif + } + } + else if (dev->hard_header) + { + if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, + skb->len)<0) + skb->arp=0; + } + + return 0; +} + +int tcp_v4_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + return tcp_v4_rcv(skb, skb->dev, (struct options *) skb->proto_priv, + skb->daddr, skb->len, skb->saddr, 1, + (struct inet_protocol *) sk->pair); +} + +static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th) +{ + struct sock *sk; + + sk = get_tcp_sock(skb->saddr, th->source, skb->daddr, th->dest, 0, 0); + + return sk; +} + +int tcp_v4_build_header(struct sock *sk, struct sk_buff *skb) +{ + struct device *dev = NULL; + int tmp; + + tmp = ip_build_header(skb, sk->saddr, sk->daddr, &dev, + IPPROTO_TCP, sk->opt, skb->truesize, + sk->ip_tos, sk->ip_ttl, + &sk->ip_route_cache); + skb->dev = dev; + +#ifndef CONFIG_NO_PATH_MTU_DISCOVERY + if (tmp > 0) + { + skb->ip_hdr->frag_off |= htons(IP_DF); + } +#endif + + return tmp; +} + + +static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; + + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = sk->daddr; + sin->sin_port = sk->dummy_th.dest; + +} + +struct tcp_func ipv4_specific = { + tcp_v4_build_header, + ip_queue_xmit, + tcp_v4_send_check, + tcp_v4_rebuild_header, + tcp_v4_conn_request, + tcp_v4_syn_recv_sock, + tcp_v4_init_sequence, + tcp_v4_get_sock, + ip_setsockopt, + ip_getsockopt, + v4_addr2sockaddr, + sizeof(struct sockaddr_in) +}; + +static int tcp_v4_init_sock(struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + skb_queue_head_init(&sk->out_of_order_queue); + tcp_init_xmit_timers(sk); + + tp->srtt = 0; + tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ + tp->mdev = TCP_TIMEOUT_INIT; + + tp->ato = 0; + tp->iat = (HZ/5) << 3; + + tp->rcv_wnd = 8192; + + /* + * See draft-stevens-tcpca-spec-01 for discussion of the + * initialization of these values. + */ + sk->cong_window = 1; + sk->ssthresh = 0x7fffffff; + + sk->priority = 1; + sk->state = TCP_CLOSE; + + /* this is how many unacked bytes we will accept for this socket. */ + sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ + sk->max_ack_backlog = SOMAXCONN; + + sk->mtu = 576; + sk->mss = 536; + + sk->dummy_th.doff = sizeof(sk->dummy_th)/4; + + + /* + * Speed up by setting some standard state for the dummy_th + * if TCP uses it (maybe move to tcp_init later) + */ + + sk->dummy_th.ack=1; + sk->dummy_th.doff=sizeof(struct tcphdr)>>2; + + sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific; + + return 0; +} + +static int tcp_v4_destroy_sock(struct sock *sk) +{ + struct sk_buff *skb; + + tcp_clear_xmit_timers(sk); + + if (sk->keepopen) + { + tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); + } + + /* + * Cleanup up the write buffer. + */ + + while((skb = skb_dequeue(&sk->write_queue)) != NULL) { + IS_SKB(skb); + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + } + + /* + * Cleans up our, hopefuly empty, out_of_order_queue + */ + + while((skb = skb_dequeue(&sk->out_of_order_queue)) != NULL) { + IS_SKB(skb); + kfree_skb(skb, FREE_READ); + } + + return 0; +} + +struct proto tcp_prot = { + tcp_close, + tcp_v4_connect, + tcp_accept, + NULL, + tcp_write_wakeup, + tcp_read_wakeup, + tcp_select, + tcp_ioctl, + tcp_v4_init_sock, + tcp_v4_destroy_sock, + tcp_shutdown, + tcp_setsockopt, + tcp_getsockopt, + tcp_v4_sendmsg, + tcp_recvmsg, + NULL, /* No special bind() */ + tcp_v4_backlog_rcv, + 128, + 0, + "TCP", + 0, 0, + NULL +}; + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp_ipv4.o tcp_ipv4.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c --- v2.1.7/linux/net/ipv4/tcp_output.c Wed Oct 9 08:55:24 1996 +++ linux/net/ipv4/tcp_output.c Thu Nov 7 11:02:38 1996 @@ -18,89 +18,73 @@ * Matthew Dillon, * Arnt Gulbrandsen, * Jorge Cwik, - * - * Fixes: Eric Schenk : avoid multiple retransmissions in one - * : round trip timeout. */ -#include -#include -#include -#include -#include - - /* - * RFC 1122 says: + * Changes: Pedro Roque : Retransmit queue handled by TCP. + * : Fragmentation on mtu decrease + * : Segment collapse on retransmit + * : AF independence * - * "the suggested [SWS] avoidance algorithm for the receiver is to keep - * RECV.NEXT + RCV.WIN fixed until: - * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" + * Linus Torvalds : send_delayed_ack * - * Experiments against BSD and Solaris machines show that following - * these rules results in the BSD and Solaris machines making very - * bad guesses about how much data they can have in flight. - * - * Instead we follow the BSD lead and offer a window that gives - * the size of the current free space, truncated to a multiple - * of 1024 bytes. If the window is smaller than - * min(sk->mss, MAX_WINDOW/2) - * then we advertise the window as having size 0, unless this - * would shrink the window we offered last time. - * This results in as much as double the throughput as the original - * implementation. - * - * We do BSD style SWS avoidance -- note that RFC1122 only says we - * must do silly window avoidance, it does not require that we use - * the suggested algorithm. - * - * The "rcvbuf" and "rmem_alloc" values are shifted by 1, because - * they also contain buffer handling overhead etc, so the window - * we actually use is essentially based on only half those values. */ -int tcp_new_window(struct sock * sk) -{ - unsigned long window; - unsigned long minwin, maxwin; - /* Get minimum and maximum window values.. */ - minwin = sk->mss; - if (!minwin) - minwin = sk->mtu; - maxwin = sk->window_clamp; - if (!maxwin) - maxwin = MAX_WINDOW; - if (minwin > maxwin/2) - minwin = maxwin/2; - - /* Get current rcvbuf size.. */ - window = sk->rcvbuf/2; - if (window < minwin) { - sk->rcvbuf = minwin*2; - window = minwin; - } - - /* Check rcvbuf against used and minimum window */ - window -= sk->rmem_alloc/2; - if ((long)(window - minwin) < 0) /* SWS avoidance */ - window = 0; - - if (window > 1023) - window &= ~1023; - if (window > maxwin) - window = maxwin; - return window; -} +#include /* * Get rid of any delayed acks, we sent one already.. */ static __inline__ void clear_delayed_acks(struct sock * sk) { - sk->ack_timed = 0; + sk->delayed_acks = 0; sk->ack_backlog = 0; sk->bytes_rcv = 0; - del_timer(&sk->delack_timer); + tcp_clear_xmit_timer(sk, TIME_DACK); +} + +static __inline__ void update_send_head(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + tp->send_head = tp->send_head->next; + + if (tp->send_head == (struct sk_buff *) &sk->write_queue) + { + tp->send_head = NULL; + } + +} + +static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int nagle_check = 1; + int len; + + /* + * RFC 1122 - section 4.2.3.4 + * + * We must queue if + * + * a) The right edge of this frame exceeds the window + * b) There are packets in flight and we have a small segment + * [SWS avoidance and Nagle algorithm] + * (part of SWS is done on packetization) + * c) We are retransmiting [Nagle] + * d) We have too many packets 'in flight' + */ + + len = skb->end_seq - skb->seq; + + if (!sk->nonagle && len < (sk->mss >> 1) && sk->packets_out) + { + nagle_check = 0; + } + + return (nagle_check && sk->packets_out < sk->cong_window && + !after(skb->end_seq, tp->snd_una + tp->snd_wnd) && + sk->retransmits == 0); } /* @@ -108,10 +92,11 @@ * having checked it is sane seeming. */ -void tcp_send_skb(struct sock *sk, struct sk_buff *skb) +int tcp_send_skb(struct sock *sk, struct sk_buff *skb) { - int size; struct tcphdr * th = skb->h.th; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int size; /* * length of packet (not counting length of pre-tcp headers) @@ -125,10 +110,10 @@ if (size < sizeof(struct tcphdr) || size > skb->len) { - printk(KERN_ERR "tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n", + printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n", skb, skb->data, th, skb->len); kfree_skb(skb, FREE_WRITE); - return; + return 0; } /* @@ -138,165 +123,245 @@ if (size == sizeof(struct tcphdr)) { - /* If it's got a syn or fin it's notionally included in the size..*/ + /* + * If it's got a syn or fin discard + */ if(!th->syn && !th->fin) { - printk(KERN_ERR "tcp_send_skb: attempt to queue a bogon.\n"); + printk("tcp_send_skb: attempt to queue a bogon.\n"); kfree_skb(skb,FREE_WRITE); - return; + return 0; } } - /* - * Jacobson recommends this in the appendix of his SIGCOMM'88 paper. - * The idea is to do a slow start again if we haven't been doing - * anything for a long time, in which case we have no reason to - * believe that our congestion window is still correct. - */ - if (sk->send_head == 0 && (jiffies - sk->idletime) > sk->rto) - sk->cong_window = 1; /* * Actual processing. */ - + tcp_statistics.TcpOutSegs++; skb->seq = ntohl(th->seq); skb->end_seq = skb->seq + size - 4*th->doff; - /* - * We must queue if - * - * a) The right edge of this frame exceeds the window - * b) We are retransmitting (Nagle's rule) - * c) We have too many packets 'in flight' - */ - - if (after(skb->end_seq, sk->window_seq) || - (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) || - sk->packets_out >= sk->cong_window) - { - /* checksum will be supplied by tcp_write_xmit. So - * we shouldn't need to set it at all. I'm being paranoid */ - th->check = 0; - if (skb->next != NULL) + + if (tp->send_head || !tcp_snd_test(sk, skb)) + { + /* + * Remember where we must start sending + */ + + if (tp->send_head == NULL) + tp->send_head = skb; + + skb_queue_tail(&sk->write_queue, skb); + + if (sk->packets_out == 0 && !tp->pending) { - printk(KERN_ERR "tcp_send_partial: next != NULL\n"); - skb_unlink(skb); + tp->pending = TIME_PROBE0; + tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto); } - skb_queue_tail(&sk->write_queue, skb); - - if (before(sk->window_seq, sk->write_queue.next->end_seq) && - sk->send_head == NULL && sk->ack_backlog == 0) - tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto); + } - else + else { + struct sk_buff * buff; + /* * This is going straight out */ - clear_delayed_acks(sk); - th->ack_seq = htonl(sk->acked_seq); + + skb_queue_tail(&sk->write_queue, skb); + + clear_delayed_acks(sk); + + th->ack_seq = htonl(tp->rcv_nxt); th->window = htons(tcp_select_window(sk)); - tcp_send_check(th, sk->saddr, sk->daddr, size, skb); + tp->af_specific->send_check(sk, th, size, skb); - sk->sent_seq = sk->write_seq; + tp->snd_nxt = skb->end_seq; + + atomic_inc(&sk->packets_out); - /* - * This is mad. The tcp retransmit queue is put together - * by the ip layer. This causes half the problems with - * unroutable FIN's and other things. - */ - - sk->prot->queue_xmit(sk, skb->dev, skb, 0); + skb->when = jiffies; - /* - * Set for next retransmit based on expected ACK time - * of the first packet in the resend queue. - * This is no longer a window behind. - */ + buff = skb_clone(skb, GFP_ATOMIC); + atomic_add(buff->truesize, &sk->wmem_alloc); + + tp->af_specific->queue_xmit(sk, skb->dev, buff, 1); - tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); + if (!tcp_timer_is_set(sk, TIME_RETRANS)) + tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); } + + return 0; } /* - * Locking problems lead us to a messy situation where we can have - * multiple partially complete buffers queued up. This is really bad - * as we don't want to be sending partial buffers. Fix this with - * a semaphore or similar to lock tcp_write per socket. - * - * These routines are pretty self descriptive. + * Function to create two new tcp segments. + * Shrinks the given segment to the specified size and appends a new + * segment with the rest of the packet to the list. + * This won't be called frenquently, I hope... */ - -struct sk_buff * tcp_dequeue_partial(struct sock * sk) + +static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) { - struct sk_buff * skb; - unsigned long flags; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct sk_buff *buff; + struct tcphdr *th, *nth; + int nsize; + int tmp; + + th = skb->h.th; + + /* size of new segment */ + nsize = skb->tail - ((unsigned char *) (th + 1)) - len; - save_flags(flags); - cli(); - skb = sk->partial; - if (skb) { - sk->partial = NULL; - del_timer(&sk->partial_timer); + if (nsize <= 0) + { + printk(KERN_DEBUG "tcp_fragment: bug size <= 0\n"); + return -1; + } + + /* + * Get a new skb... force flag on + */ + buff = sock_wmalloc(sk, nsize + 128 + sk->prot->max_header + 15, 1, + GFP_ATOMIC); + + if (buff == NULL) + return -1; + + buff->sk = sk; + buff->localroute = sk->localroute; + + /* + * Put headers on the new packet + */ + + tmp = tp->af_specific->build_net_header(sk, buff); + + if (tmp < 0) + { + sock_wfree(sk, buff); + return -1; + } + + /* + * Move the TCP header over + */ + + nth = (struct tcphdr *) skb_put(buff, sizeof(*th)); + + buff->h.th = nth; + + memcpy(nth, th, sizeof(*th)); + + /* + * Correct the new header + */ + + buff->seq = skb->seq + len; + buff->end_seq = skb->end_seq; + nth->seq = htonl(buff->seq); + nth->check = 0; + nth->doff = 5; + + /* urg data is always an headache */ + if (th->urg) + { + if (th->urg_ptr > len) + { + th->urg = 0; + nth->urg_ptr -= len; + } + else + { + nth->urg = 0; + } } - restore_flags(flags); - return skb; + + /* + * Copy TCP options and data start to our new buffer + */ + + buff->csum = csum_partial_copy(((u8 *)(th + 1)) + len, + skb_put(buff, nsize), + nsize, 0); + + + skb->end_seq -= nsize; + + skb_trim(skb, skb->len - nsize); + + /* remember to checksum this packet afterwards */ + th->check = 0; + skb->csum = csum_partial((u8*) (th + 1), skb->tail - ((u8 *) (th + 1)), + 0); + + skb_append(skb, buff); + + return 0; } -/* - * Empty the partial queue - */ - -void tcp_send_partial(struct sock *sk) +static void tcp_wrxmit_prob(struct sock *sk, struct sk_buff *skb) { - struct sk_buff *skb; + /* + * This is acked data. We can discard it. This + * cannot currently occur. + */ - if (sk == NULL) - return; - while ((skb = tcp_dequeue_partial(sk)) != NULL) - tcp_send_skb(sk, skb); + sk->retransmits = 0; + + printk(KERN_DEBUG "tcp_write_xmit: bug skb in write queue\n"); + + update_send_head(sk); + + skb_unlink(skb); + skb->sk = NULL; + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + + if (!sk->dead) + sk->write_space(sk); } -/* - * Queue a partial frame - */ - -void tcp_enqueue_partial(struct sock * sk, struct sk_buff * skb) +static int tcp_wrxmit_frag(struct sock *sk, struct sk_buff *skb, int size) { - struct sk_buff * tmp; - unsigned long flags; - - save_flags(flags); - cli(); - tmp = sk->partial; - if (tmp) - del_timer(&sk->partial_timer); - sk->partial = skb; - init_timer(&sk->partial_timer); - /* - * Wait up to 1 second for the buffer to fill. - */ - sk->partial_timer.expires = jiffies+HZ/10; - sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial; - sk->partial_timer.data = (unsigned long) sk; - add_timer(&sk->partial_timer); - restore_flags(flags); - if (tmp) - tcp_send_skb(sk, tmp); + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + printk(KERN_DEBUG "tcp_write_xmit: frag needed size=%d mss=%d\n", + size, sk->mss); + + if (tcp_fragment(sk, skb, sk->mss)) + { + /* !tcp_frament Failed! */ + tp->send_head = skb; + atomic_dec(&sk->packets_out); + return -1; + } + else + { + /* + * If tcp_fragment succeded then + * the send head is the resulting + * fragment + */ + tp->send_head = skb->next; + } + return 0; } /* - * This routine takes stuff off of the write queue, - * and puts it in the xmit queue. This happens as incoming acks - * open up the remote window for us. + * This routine writes packets to the network. + * It advances the send_head. + * This happens as incoming acks open up the remote window for us. */ void tcp_write_xmit(struct sock *sk) { struct sk_buff *skb; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; /* * The bytes will have to remain here. In time closedown will @@ -308,64 +373,43 @@ /* * Anything on the transmit queue that fits the window can - * be added providing we are not + * be added providing we are: * - * a) retransmitting (Nagle's rule) - * b) exceeding our congestion window. + * a) following SWS avoidance [and Nagle algorithm] + * b) not exceeding our congestion window. + * c) not retransmiting [Nagle] */ - - while((skb = skb_peek(&sk->write_queue)) != NULL && - !after(skb->end_seq, sk->window_seq) && - (sk->retransmits == 0 || - sk->ip_xmit_timeout != TIME_WRITE || - !after(skb->end_seq, sk->rcv_ack_seq)) - && sk->packets_out < sk->cong_window) + + start_bh_atomic(); + + while((skb = tp->send_head) && tcp_snd_test(sk, skb)) { IS_SKB(skb); - skb_unlink(skb); - + /* - * See if we really need to send the whole packet. + * See if we really need to send the packet. */ - if (before(skb->end_seq, sk->rcv_ack_seq +1)) { - /* - * This is acked data. We can discard it. - * This implies the packet was sent out - * of the write queue by a zero window probe. - */ - - sk->retransmits = 0; - kfree_skb(skb, FREE_WRITE); - if (!sk->dead) - sk->write_space(sk); - } else { + if (!after(skb->end_seq, tp->snd_una)) + { + tcp_wrxmit_prob(sk, skb); + } + else + { struct tcphdr *th; - struct iphdr *iph; + struct sk_buff *buff; int size; - iph = skb->ip_hdr; - th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2)); + /* + * Advance the send_head + * This one is going out. + */ + + update_send_head(sk); + + atomic_inc(&sk->packets_out); - /* See if we need to shrink the leading packet on - * the retransmit queue. Strictly speaking, we - * should never need to do this, but some buggy TCP - * implementations get confused if you send them - * a packet that contains both old and new data. (Feh!) - * Soooo, we have this uglyness here. - */ - if (after(sk->rcv_ack_seq,skb->seq+th->syn+th->fin)) - tcp_shrink_skb(sk,skb,sk->rcv_ack_seq); - size = skb->len - (((unsigned char *) th) - skb->data); -#ifndef CONFIG_NO_PATH_MTU_DISCOVERY - if (size > sk->mtu - sizeof(struct iphdr)) - { - iph->frag_off &= ~htons(IP_DF); - ip_send_check(iph); - } -#endif - /* * put in the ack seq and window at this point rather than earlier, * in order to keep them monotonic. We really want to avoid taking @@ -373,79 +417,140 @@ * Ack and window will in general have changed since this packet was put * on the write queue. */ - th->ack_seq = htonl(sk->acked_seq); - th->window = htons(tcp_select_window(sk)); - tcp_send_check(th, sk->saddr, sk->daddr, size, skb); + th = skb->h.th; + size = skb->len - (((unsigned char *) th) - skb->data); - sk->sent_seq = skb->end_seq; + if (size - (th->doff << 2) > sk->mss) + { + if (tcp_wrxmit_frag(sk, skb, size)) + break; + } - /* - * IP manages our queue for some crazy reason - */ - - sk->prot->queue_xmit(sk, skb->dev, skb, skb->free); + th->ack_seq = htonl(tp->rcv_nxt); + th->window = htons(tcp_select_window(sk)); + + tp->af_specific->send_check(sk, th, size, skb); + if (before(skb->end_seq, tp->snd_nxt)) + printk(KERN_DEBUG "tcp_write_xmit:" + " sending already sent seq\n"); + else + tp->snd_nxt = skb->end_seq; + clear_delayed_acks(sk); + + skb->when = jiffies; + + buff = skb_clone(skb, GFP_ATOMIC); + atomic_add(buff->truesize, &sk->wmem_alloc); - tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); + tp->af_specific->queue_xmit(sk, skb->dev, buff, 1); + + if (!tcp_timer_is_set(sk, TIME_RETRANS)) + { + tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); + } } } + + end_bh_atomic(); +} + +static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th1, *th2; + int size1, size2, avail; + struct sk_buff *buff = skb->next; + + th1 = skb->h.th; + + if (th1->urg) + return -1; + + avail = skb->end - skb->tail; + + /* + * size of tcp payload + */ + + size1 = skb->tail - (u8 *) (th1 + 1); + + th2 = buff->h.th; + + size2 = buff->tail - (u8 *) (th2 + 1); + + if (size2 > avail || size1 + size2 > sk->mss ) + return -1; + + /* + * ok. we will be able to collapse the packet + */ + + skb_unlink(buff); + + memcpy(skb_put(skb, size2), ((char *) th2) + (th2->doff << 2), size2); + + /* + * update sizes on original skb. both TCP and IP + */ + + skb->end_seq += size2; + + if (th2->urg) + { + th1->urg = 1; + th1->urg_ptr = th2->urg_ptr + size1; + } + + /* + * ... and off you go. + */ + + buff->free = 1; + kfree_skb(buff, FREE_WRITE); + atomic_dec(&sk->packets_out); + + /* + * Header checksum will be set by the retransmit procedure + * after calling rebuild header + */ + + th1->check = 0; + skb->csum = csum_partial((u8*) (th1+1), size1 + size2, 0); + + return 0; } /* * A socket has timed out on its send queue and wants to do a - * little retransmitting. Currently this means TCP. + * little retransmitting. + * retransmit_head can be different from the head of the write_queue + * if we are doing fast retransmit. */ void tcp_do_retransmit(struct sock *sk, int all) { struct sk_buff * skb; - struct proto *prot; - struct device *dev; - struct rtable *rt; + int ct=0; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - prot = sk->prot; - if (!all) { - /* - * If we are just retransmitting one packet reset - * to the start of the queue. - */ - sk->send_next = sk->send_head; - sk->packets_out = 0; - } - skb = sk->send_next; + start_bh_atomic(); - while (skb != NULL) + if (tp->retrans_head == NULL) + tp->retrans_head = skb_peek(&sk->write_queue); + + if (tp->retrans_head == tp->send_head) + tp->retrans_head = NULL; + + while ((skb = tp->retrans_head) != NULL) { struct tcphdr *th; - struct iphdr *iph; - int size; + u32 tcp_size; - dev = skb->dev; IS_SKB(skb); - skb->when = jiffies; - - /* dl1bke 960201 - @%$$! Hope this cures strange race conditions */ - /* with AX.25 mode VC. (esp. DAMA) */ - /* if the buffer is locked we should not retransmit */ - /* anyway, so we don't need all the fuss to prepare */ - /* the buffer in this case. */ - /* (the skb_pull() changes skb->data while we may */ - /* actually try to send the data. Ouch. A side */ - /* effect is that we'll send some unnecessary data, */ - /* but the alternative is disastrous... */ - if (skb_device_locked(skb)) - break; - - /* - * Discard the surplus MAC header - */ - - skb_pull(skb,((unsigned char *)skb->ip_hdr)-skb->data); - /* * In general it's OK just to use the old packet. However we * need to use the current ack and window fields. Urg and @@ -455,143 +560,76 @@ * changing the packet, we have to issue a new IP identifier. */ - iph = (struct iphdr *)skb->data; - th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2)); - size = ntohs(iph->tot_len) - (iph->ihl<<2); - - /* - * Note: We ought to check for window limits here but - * currently this is done (less efficiently) elsewhere. - */ + th = skb->h.th; - /* - * Put a MAC header back on (may cause ARPing) - */ - - { - /* ANK: UGLY, but the bug, that was here, should be fixed. - */ - struct options * opt = (struct options*)skb->proto_priv; - rt = ip_check_route(&sk->ip_route_cache, opt->srr?opt->faddr:iph->daddr, skb->localroute); - } - - iph->id = htons(ip_id_count++); -#ifndef CONFIG_NO_PATH_MTU_DISCOVERY - if (rt && ntohs(iph->tot_len) > rt->rt_mtu) - iph->frag_off &= ~htons(IP_DF); -#endif - ip_send_check(iph); - - if (rt==NULL) /* Deep poo */ + tcp_size = skb->tail - ((unsigned char *) (th + 1)); + + if (tcp_size > sk->mss) { - if(skb->sk) + if (tcp_fragment(sk, skb, sk->mss)) { - skb->sk->err_soft=ENETUNREACH; - skb->sk->error_report(skb->sk); + printk(KERN_DEBUG "tcp_fragment failed\n"); + return; } - /* Can't transmit this packet, no reason - * to transmit the later ones, even if - * the congestion window allows. - */ - break; + atomic_inc(&sk->packets_out); } - else + + if (!th->syn && + tcp_size < (sk->mss >> 1) && + skb->next != tp->send_head && + skb->next != (struct sk_buff *)&sk->write_queue) { - dev=rt->rt_dev; - skb->raddr=rt->rt_gateway; - skb->dev=dev; - skb->arp=1; -#ifdef CONFIG_FIREWALL - if (call_out_firewall(PF_INET, skb->dev, iph, NULL) < FW_ACCEPT) { - /* The firewall wants us to dump the packet. - * We have to check this here, because - * the drop in ip_queue_xmit only catches the - * first time we send it. We must drop on - * every resend as well. - */ - break; - } -#endif - if (rt->rt_hh) - { - memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len); - if (!rt->rt_hh->hh_uptodate) - { - skb->arp = 0; -#if RT_CACHE_DEBUG >= 2 - printk("tcp_do_retransmit: hh miss %08x via %08x\n", iph->daddr, rt->rt_gateway); -#endif - } - } - else if (dev->hard_header) - { - if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, skb->len)<0) - skb->arp=0; - } - + tcp_retrans_try_collapse(sk, skb); + } + + if (tp->af_specific->rebuild_header(sk, skb) == 0) + { + struct sk_buff *buff; + int size; + + if (sk->debug) + printk("retransmit sending\n"); + /* - * This is not the right way to handle this. We have to - * issue an up to date window and ack report with this - * retransmit to keep the odd buggy tcp that relies on - * the fact BSD does this happy. - * We don't however need to recalculate the entire - * checksum, so someone wanting a small problem to play - * with might like to implement RFC1141/RFC1624 and speed - * this up by avoiding a full checksum. + * update ack and window */ - - th->ack_seq = htonl(sk->acked_seq); - clear_delayed_acks(sk); + th->ack_seq = htonl(tp->rcv_nxt); th->window = ntohs(tcp_select_window(sk)); - tcp_send_check(th, sk->saddr, sk->daddr, size, skb); - - /* - * If the interface is (still) up and running, kick it. - */ - - if (dev->flags & IFF_UP) - { - /* - * If the packet is still being sent by the device/protocol - * below then don't retransmit. This is both needed, and good - - * especially with connected mode AX.25 where it stops resends - * occurring of an as yet unsent anyway frame! - * We still add up the counts as the round trip time wants - * adjusting. - */ - if (sk && !skb_device_locked(skb)) - { - /* Remove it from any existing driver queue first! */ - skb_unlink(skb); - /* Now queue it */ - ip_statistics.IpOutRequests++; - dev_queue_xmit(skb, dev, sk->priority); - sk->packets_out++; - } - } + + size = skb->tail - (unsigned char *) th; + tp->af_specific->send_check(sk, th, size, skb); + + skb->when = jiffies; + buff = skb_clone(skb, GFP_ATOMIC); + atomic_add(buff->truesize, &sk->wmem_alloc); + + clear_delayed_acks(sk); + + tp->af_specific->queue_xmit(sk, skb->dev, buff, 1); + } + else + { + printk(KERN_DEBUG "tcp_do_rebuild_header failed\n"); + break; } /* * Count retransmissions */ - sk->prot->retransmits++; + ct++; + sk->prot->retransmits ++; tcp_statistics.TcpRetransSegs++; /* * Record the high sequence number to help avoid doing * to much fast retransmission. */ + if (sk->retransmits) - sk->high_seq = sk->sent_seq; + tp->high_seq = tp->snd_nxt; /* - * Advance the send_next pointer so we don't keep - * retransmitting the same stuff every time we get an ACK. - */ - sk->send_next = skb->link3; - - /* * Only one retransmit requested. */ @@ -602,87 +640,22 @@ * This should cut it off before we send too many packets. */ - if (sk->packets_out >= sk->cong_window) + if (ct >= sk->cong_window) break; - skb = skb->link3; - } -} - -/* - * This routine will send an RST to the other tcp. - */ - -void tcp_send_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th, - struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl) -{ - struct sk_buff *buff; - struct tcphdr *t1; - int tmp; - struct device *ndev=NULL; - - /* - * Cannot reset a reset (Think about it). - */ - - if(th->rst) - return; - - /* - * We need to grab some memory, and put together an RST, - * and then put it into the queue to be sent. - */ - - buff = alloc_skb(MAX_RESET_SIZE, GFP_ATOMIC); - if (buff == NULL) - return; - - buff->sk = NULL; - buff->dev = dev; - buff->localroute = 0; - buff->csum = 0; - - /* - * Put in the IP header and routing stuff. - */ - - tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt, - sizeof(struct tcphdr),tos,ttl,NULL); - if (tmp < 0) - { - buff->free = 1; - sock_wfree(NULL, buff); - return; - } - - t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr)); - memset(t1, 0, sizeof(*t1)); - - /* - * Swap the send and the receive. - */ - - t1->dest = th->source; - t1->source = th->dest; - t1->doff = sizeof(*t1)/4; - t1->rst = 1; - - if(th->ack) - { - t1->seq = th->ack_seq; - } - else - { - t1->ack = 1; - if(!th->syn) - t1->ack_seq = th->seq; - else - t1->ack_seq = htonl(ntohl(th->seq)+1); + /* + * Advance the pointer + */ + + tp->retrans_head = skb->next; + if ((tp->retrans_head == tp->send_head) || + (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) + { + tp->retrans_head = NULL; + } } - tcp_send_check(t1, saddr, daddr, sizeof(*t1), buff); - prot->queue_xmit(NULL, ndev, buff, 1); - tcp_statistics.TcpOutSegs++; + end_bh_atomic(); } /* @@ -691,19 +664,19 @@ void tcp_send_fin(struct sock *sk) { - struct proto *prot =(struct proto *)sk->prot; struct tcphdr *th =(struct tcphdr *)&sk->dummy_th; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct tcphdr *t1; struct sk_buff *buff; - struct device *dev=NULL; int tmp; + - buff = sock_wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL); + buff = sock_wmalloc(sk, MAX_RESET_SIZE, 1, GFP_KERNEL); if (buff == NULL) { /* This is a disaster if it occurs */ - printk(KERN_CRIT "tcp_send_fin: Impossible malloc failure"); + printk("tcp_send_fin: Impossible malloc failure"); return; } @@ -719,9 +692,8 @@ * Put in the IP header and routing stuff. */ - tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, - sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache); + tmp = tp->af_specific->build_net_header(sk, buff); + if (tmp < 0) { int t; @@ -747,126 +719,115 @@ */ t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr)); - buff->dev = dev; + buff->h.th = t1; + memcpy(t1, th, sizeof(*t1)); buff->seq = sk->write_seq; sk->write_seq++; buff->end_seq = sk->write_seq; t1->seq = htonl(buff->seq); - t1->ack_seq = htonl(sk->acked_seq); + t1->ack_seq = htonl(tp->rcv_nxt); t1->window = htons(tcp_select_window(sk)); t1->fin = 1; - tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff); + + tp->af_specific->send_check(sk, t1, sizeof(*t1), buff); /* - * If there is data in the write queue, the fin must be appended to - * the write queue. + * The fin can only be transmited after the data. */ - if (skb_peek(&sk->write_queue) != NULL) - { - buff->free = 0; - if (buff->next != NULL) - { - printk(KERN_ERR "tcp_send_fin: next != NULL\n"); - skb_unlink(buff); - } - skb_queue_tail(&sk->write_queue, buff); - } - else - { - sk->sent_seq = sk->write_seq; - sk->prot->queue_xmit(sk, dev, buff, 0); - tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); + skb_queue_tail(&sk->write_queue, buff); + + if (tp->send_head == NULL) + { + struct sk_buff *skb1; + + atomic_inc(&sk->packets_out); + tp->snd_nxt = sk->write_seq; + buff->when = jiffies; + + skb1 = skb_clone(buff, GFP_KERNEL); + atomic_add(skb1->truesize, &sk->wmem_alloc); + + tp->af_specific->queue_xmit(sk, skb1->dev, skb1, 1); + + if (!tcp_timer_is_set(sk, TIME_RETRANS)) + tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); } } - -void tcp_send_synack(struct sock * newsk, struct sock * sk, struct sk_buff * skb) +int tcp_send_synack(struct sock *sk) { - struct tcphdr *t1; - unsigned char *ptr; + struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp); + struct sk_buff * skb; struct sk_buff * buff; - struct device *ndev=NULL; + struct tcphdr *th; + unsigned char *ptr; int tmp; + + skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC); - buff = sock_wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC); - if (buff == NULL) + if (skb == NULL) { - sk->err = ENOMEM; - destroy_sock(newsk); - kfree_skb(skb, FREE_READ); - tcp_statistics.TcpAttemptFails++; - return; + return -ENOMEM; } - - buff->sk = newsk; - buff->localroute = newsk->localroute; - - /* - * Put in the IP header and routing stuff. - */ - - tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev, - IPPROTO_TCP, newsk->opt, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&newsk->ip_route_cache); - /* - * Something went wrong. - */ + skb->sk = sk; + skb->localroute = sk->localroute; - if (tmp < 0) + tmp = tp->af_specific->build_net_header(sk, skb); + + if (tmp < 0) { - sk->err = tmp; - buff->free = 1; - kfree_skb(buff,FREE_WRITE); - destroy_sock(newsk); - skb->sk = sk; - kfree_skb(skb, FREE_READ); - tcp_statistics.TcpAttemptFails++; - return; + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + return tmp; } - t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr)); - - memcpy(t1, skb->h.th, sizeof(*t1)); - buff->seq = newsk->write_seq++; - buff->end_seq = newsk->write_seq; - /* - * Swap the send and the receive. - */ - t1->dest = skb->h.th->source; - t1->source = newsk->dummy_th.source; - t1->seq = ntohl(buff->seq); - newsk->sent_seq = newsk->write_seq; - t1->window = ntohs(tcp_select_window(newsk)); - t1->syn = 1; - t1->ack = 1; - t1->urg = 0; - t1->rst = 0; - t1->psh = 0; - t1->ack_seq = htonl(newsk->acked_seq); - t1->doff = sizeof(*t1)/4+1; - ptr = skb_put(buff,4); - ptr[0] = 2; - ptr[1] = 4; - ptr[2] = ((newsk->mtu) >> 8) & 0xff; - ptr[3] =(newsk->mtu) & 0xff; - buff->csum = csum_partial(ptr, 4, 0); - tcp_send_check(t1, newsk->saddr, newsk->daddr, sizeof(*t1)+4, buff); - newsk->prot->queue_xmit(newsk, ndev, buff, 0); - tcp_reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT); - skb->sk = newsk; + th =(struct tcphdr *) skb_put(skb, sizeof(struct tcphdr)); + skb->h.th = th; + memset(th, 0, sizeof(struct tcphdr)); - /* - * Charge the sock_buff to newsk. - */ - - atomic_sub(skb->truesize, &sk->rmem_alloc); - atomic_add(skb->truesize, &newsk->rmem_alloc); + th->syn = 1; + th->ack = 1; + + th->source = sk->dummy_th.source; + th->dest = sk->dummy_th.dest; + + skb->seq = tp->snd_una; + skb->end_seq = skb->seq + 1 /* th->syn */ ; + th->seq = ntohl(skb->seq); + + th->window = ntohs(tp->rcv_wnd); + + th->ack_seq = htonl(tp->rcv_nxt); + th->doff = sizeof(*th)/4 + 1; + + ptr = skb_put(skb, TCPOLEN_MSS); + ptr[0] = TCPOPT_MSS; + ptr[1] = TCPOLEN_MSS; + ptr[2] = ((sk->mss) >> 8) & 0xff; + ptr[3] = (sk->mss) & 0xff; + skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0); + + tp->af_specific->send_check(sk, th, sizeof(*th)+4, skb); + + skb_queue_tail(&sk->write_queue, skb); + + atomic_inc(&sk->packets_out); - skb_queue_tail(&sk->receive_queue,skb); - sk->ack_backlog++; + skb->when = jiffies; + buff = skb_clone(skb, GFP_ATOMIC); + + atomic_add(skb->truesize, &sk->wmem_alloc); + + tp->af_specific->queue_xmit(sk, skb->dev, buff, 1); + + tcp_reset_xmit_timer(sk, TIME_RETRANS, TCP_TIMEOUT_INIT); + tcp_statistics.TcpOutSegs++; + + return 0; } /* @@ -876,31 +837,31 @@ * - delay time <= 0.5 HZ * - must send at least every 2 full sized packets * - we don't have a window update to send - * - * additional thoughts: - * - we should not delay sending an ACK if we have ato > 0.5 HZ. - * My thinking about this is that in this case we will just be - * systematically skewing the RTT calculation. (The rule about - * sending every two full sized packets will never need to be - * invoked, the delayed ack will be sent before the ATO timeout - * every time. Of course, the relies on our having a good estimate - * for packet interarrival times.) */ -void tcp_send_delayed_ack(struct sock * sk, int max_timeout, unsigned long timeout) + +void tcp_send_delayed_ack(struct sock * sk, int max_timeout) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + unsigned long timeout, now; + /* Calculate new timeout */ - if (timeout > max_timeout) - timeout = max_timeout; - if (sk->bytes_rcv >= sk->max_unacked) - timeout = 0; - timeout += jiffies; - - /* Use new timeout only if there wasn't an older one earlier */ - if (!del_timer(&sk->delack_timer) || timeout < sk->delack_timer.expires) - sk->delack_timer.expires = timeout; + now = jiffies; + timeout = tp->ato; - sk->ack_backlog++; - add_timer(&sk->delack_timer); + if (timeout > max_timeout || sk->bytes_rcv > (sk->mss << 2)) + { + timeout = now; + } + else + timeout += now; + + /* Use new timeout only if there wasn't a older one earlier */ + if (!del_timer(&tp->delack_timer) || timeout < tp->delack_timer.expires) + { + tp->delack_timer.expires = timeout; + } + + add_timer(&tp->delack_timer); } @@ -912,29 +873,15 @@ void tcp_send_ack(struct sock *sk) { struct sk_buff *buff; - struct tcphdr *t1; - struct device *dev = NULL; + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); + struct tcphdr *th; int tmp; + if(sk->zapped) - return; /* We have been reset, we may not send again */ - - /* - * If we have nothing queued for transmit and the transmit timer - * is on we are just doing an ACK timeout and need to switch - * to a keepalive. - */ - - clear_delayed_acks(sk); - - if (sk->send_head == NULL - && skb_queue_empty(&sk->write_queue) - && sk->ip_xmit_timeout == TIME_WRITE) { - if (sk->keepopen) - tcp_reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN); - else - del_timer(&sk->retransmit_timer); + /* We have been reset, we may not send again */ + return; } /* @@ -951,11 +898,13 @@ * bandwidth on slow links to send a spare ack than * resend packets. */ - - tcp_send_delayed_ack(sk, HZ/2, HZ/2); + + tcp_send_delayed_ack(sk, HZ/2); return; } + clear_delayed_acks(sk); + /* * Assemble a suitable TCP frame */ @@ -968,35 +917,39 @@ * Put in the IP header and routing stuff. */ - tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache); + tmp = tp->af_specific->build_net_header(sk, buff); + if (tmp < 0) { buff->free = 1; sock_wfree(sk, buff); return; } -#if 0 /* why does this result in problems? */ -#ifndef CONFIG_NO_PATH_MTU_DISCOVERY - buff->ip_hdr->frag_off |= htons(IP_DF); -#endif -#endif - t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr)); + th =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr)); + + memcpy(th, &sk->dummy_th, sizeof(struct tcphdr)); + + /* + * Swap the send and the receive. + */ + + th->window = ntohs(tcp_select_window(sk)); + th->seq = ntohl(tp->snd_nxt); + th->ack_seq = ntohl(tp->rcv_nxt); /* * Fill in the packet and send it */ - - memcpy(t1, &sk->dummy_th, sizeof(*t1)); - t1->seq = htonl(sk->sent_seq); - t1->ack_seq = htonl(sk->acked_seq); - t1->window = htons(tcp_select_window(sk)); - tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff); + tp->af_specific->send_check(sk, th, sizeof(struct tcphdr), buff); + if (sk->debug) - printk(KERN_ERR "\rtcp_ack: seq %x ack %x\n", sk->sent_seq, sk->acked_seq); - sk->prot->queue_xmit(sk, dev, buff, 1); + printk("\rtcp_send_ack: seq %x ack %x\n", + tp->snd_nxt, tp->rcv_nxt); + + tp->af_specific->queue_xmit(sk, buff->dev, buff, 1); + tcp_statistics.TcpOutSegs++; } @@ -1007,9 +960,9 @@ void tcp_write_wakeup(struct sock *sk) { - struct sk_buff *buff,*skb; + struct sk_buff *buff, *skb; struct tcphdr *t1; - struct device *dev=NULL; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int tmp; if (sk->zapped) @@ -1030,115 +983,56 @@ { return; } - if ( before(sk->sent_seq, sk->window_seq) && - (skb=skb_peek(&sk->write_queue))) + + if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && + (skb=tp->send_head)) { /* * We are probing the opening of a window * but the window size is != 0 * must have been a result SWS avoidance ( sender ) */ - - struct iphdr *iph; - struct tcphdr *th; - struct tcphdr *nth; - unsigned long win_size; -#if 0 - unsigned long ow_size; -#endif - - /* - * How many bytes can we send ? - */ - - win_size = sk->window_seq - sk->sent_seq; - - /* - * Recover the buffer pointers - */ - - iph = (struct iphdr *)skb->ip_hdr; - th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2)); - /* - * Grab the data for a temporary frame - */ - - buff = sock_wmalloc(sk, win_size + th->doff * 4 + - (iph->ihl << 2) + - sk->prot->max_header + 15, - 1, GFP_ATOMIC); - if ( buff == NULL ) - return; - - /* - * If we strip the packet on the write queue we must - * be ready to retransmit this one - */ - - buff->free = /*0*/1; + struct tcphdr *th; + unsigned long win_size; - buff->sk = sk; - buff->localroute = sk->localroute; - - /* - * Put headers on the new packet - */ + win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una); - tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, buff->truesize, - sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache); - if (tmp < 0) - { - sock_wfree(sk, buff); - return; + if (win_size < skb->end_seq - skb->seq) + { + if (tcp_fragment(sk, skb, win_size)) + { + printk(KERN_DEBUG "tcp_write_wakeup: " + "fragment failed\n"); + return; + } } + + + th = skb->h.th; - /* - * Move the TCP header over - */ + tp->af_specific->send_check(sk, th, th->doff * 4 + win_size, + skb); - buff->dev = dev; + buff = skb_clone(skb, GFP_ATOMIC); - nth = (struct tcphdr *) skb_put(buff,sizeof(*th)); + atomic_add(buff->truesize, &sk->wmem_alloc); + atomic_inc(&sk->packets_out); - memcpy(nth, th, sizeof(*th)); - - /* - * Correct the new header - */ - - nth->ack = 1; - nth->ack_seq = htonl(sk->acked_seq); - nth->window = htons(tcp_select_window(sk)); - nth->check = 0; + clear_delayed_acks(sk); - /* - * Copy TCP options and data start to our new buffer - */ - - buff->csum = csum_partial_copy((void *)(th + 1), skb_put(buff,win_size), - win_size + th->doff*4 - sizeof(*th), 0); - - /* - * Remember our right edge sequence number. - */ - - buff->end_seq = sk->sent_seq + win_size; - sk->sent_seq = buff->end_seq; /* Hack */ - if(th->urg && ntohs(th->urg_ptr) < win_size) - nth->urg = 0; + if (!tcp_timer_is_set(sk, TIME_RETRANS)) + tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); - /* - * Checksum the split buffer - */ - - tcp_send_check(nth, sk->saddr, sk->daddr, - nth->doff * 4 + win_size , buff); + skb->when = jiffies; + + update_send_head(sk); + + tp->snd_nxt = skb->end_seq; } else { - buff = sock_wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC); + buff = sock_wmalloc(sk,MAX_ACK_SIZE, 1, GFP_ATOMIC); if (buff == NULL) return; @@ -1151,15 +1045,15 @@ * Put in the IP header and routing stuff. */ - tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev, - IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache); + tmp = tp->af_specific->build_net_header(sk, buff); + if (tmp < 0) { sock_wfree(sk, buff); return; } - t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr)); + t1 = (struct tcphdr *) skb_put(buff, sizeof(struct tcphdr)); memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1)); /* @@ -1167,89 +1061,43 @@ * This should cause the other end to send an ack. */ - t1->seq = htonl(sk->sent_seq-1); + t1->seq = htonl(tp->snd_nxt-1); /* t1->fin = 0; -- We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */ - t1->ack_seq = htonl(sk->acked_seq); + t1->ack_seq = htonl(tp->rcv_nxt); t1->window = htons(tcp_select_window(sk)); - tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff); - + + tp->af_specific->send_check(sk, t1, sizeof(*t1), buff); } /* * Send it. */ - - sk->prot->queue_xmit(sk, dev, buff, 1); + + tp->af_specific->queue_xmit(sk, buff->dev, buff, 1); tcp_statistics.TcpOutSegs++; } /* * A window probe timeout has occurred. + * If window is not closed send a partial packet + * else a zero probe. */ void tcp_send_probe0(struct sock *sk) { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + if (sk->zapped) return; /* After a valid reset we can send no more */ - tcp_write_wakeup(sk); - - sk->backoff++; - sk->rto = min(sk->rto << 1, 120*HZ); - sk->retransmits++; - sk->prot->retransmits ++; - tcp_reset_xmit_timer (sk, TIME_PROBE0, sk->rto); -} - -/* - * Remove the portion of a packet that has already been sent. - * Needed to deal with buggy TCP implementations that can't deal - * with seeing a packet that contains some data that has already - * been received. - */ -void tcp_shrink_skb(struct sock *sk, struct sk_buff *skb, u32 ack) -{ - struct iphdr *iph; - struct tcphdr *th; - unsigned char *old, *new; - unsigned long len; - int diff; - /* - * Recover the buffer pointers - */ - - iph = (struct iphdr *)skb->ip_hdr; - th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2)); + tcp_write_wakeup(sk); - /* how much data are we droping from the tcp frame */ - diff = ack - skb->seq; - /* how much data are we keeping in the tcp frame */ - len = (skb->end_seq - (th->fin + th->syn)) - ack; - - /* pointers to new start of remaining data, and old start */ - new = (unsigned char *)th + th->doff*4; - old = new+diff; - - /* Update our starting seq number */ - skb->seq = ack; - th->seq = htonl(ack); - iph->tot_len = htons(ntohs(iph->tot_len)-diff); - - /* Get the partial checksum for the IP options */ - if (th->doff*4 - sizeof(*th) > 0) - skb->csum = csum_partial((void *)(th+1), - th->doff*4-sizeof(*th),0); - else - skb->csum = 0; + tp->pending = TIME_PROBE0; - /* Copy the good data down and get it's checksum */ - skb->csum = csum_partial_copy((void *)old,(void *)new,len,skb->csum); + tp->backoff++; + tp->probes_out++; - /* shorten the skb */ - skb_trim(skb,skb->len-diff); - - /* Checksum the shrunk buffer */ - tcp_send_check(th, sk->saddr, sk->daddr, - th->doff * 4 + len , skb); + tcp_reset_xmit_timer (sk, TIME_PROBE0, + min(tp->rto << tp->backoff, 120*HZ)); } diff -u --recursive --new-file v2.1.7/linux/net/ipv4/tcp_timer.c linux/net/ipv4/tcp_timer.c --- v2.1.7/linux/net/ipv4/tcp_timer.c Sun Jun 2 13:23:13 1996 +++ linux/net/ipv4/tcp_timer.c Thu Nov 7 14:09:18 1996 @@ -18,132 +18,142 @@ * Matthew Dillon, * Arnt Gulbrandsen, * Jorge Cwik, - * - * Fixes: - * - * Eric Schenk : Fix retransmission timeout counting. */ #include -void tcp_delack_timer(unsigned long data) +static void tcp_sltimer_handler(unsigned long); +static void tcp_syn_recv_timer(unsigned long); +static void tcp_keepalive(unsigned long data); + +struct timer_list tcp_slow_timer = { + NULL, NULL, + 0, 0, + tcp_sltimer_handler, +}; + + +struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = { + {0, TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer}, /* SYNACK */ + {0, TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive} /* KEEPALIVE */ +}; + +/* + * Using different timers for retransmit, delayed acks and probes + * We may wish use just one timer maintaining a list of expire jiffies + * to optimize. + */ + +void tcp_init_xmit_timers(struct sock *sk) { - tcp_send_ack((struct sock *) data); + init_timer(&sk->tp_pinfo.af_tcp.retransmit_timer); + sk->tp_pinfo.af_tcp.retransmit_timer.function=&tcp_retransmit_timer; + sk->tp_pinfo.af_tcp.retransmit_timer.data = (unsigned long) sk; + + init_timer(&sk->tp_pinfo.af_tcp.delack_timer); + sk->tp_pinfo.af_tcp.delack_timer.function=&tcp_delack_timer; + sk->tp_pinfo.af_tcp.delack_timer.data = (unsigned long) sk; + + init_timer(&sk->tp_pinfo.af_tcp.probe_timer); + sk->tp_pinfo.af_tcp.probe_timer.function=&tcp_probe_timer; + sk->tp_pinfo.af_tcp.probe_timer.data = (unsigned long) sk; } /* * Reset the retransmission timer */ -void tcp_reset_xmit_timer(struct sock *sk, int why, unsigned long when) +void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) { - del_timer(&sk->retransmit_timer); - sk->ip_xmit_timeout = why; - if (why == TIME_WRITE) { - /* In this case we want to timeout on the first packet - * in the resend queue. If the resend queue is empty, - * then the packet we are sending hasn't made it there yet, - * so we timeout from the current time. - */ - if (sk->send_head) { - sk->retransmit_timer.expires = - sk->send_head->when + when; - } else { - /* This should never happen! - */ - printk(KERN_ERR "Error: send_head NULL in xmit_timer\n"); - sk->ip_xmit_timeout = 0; - return; - } - } else { - sk->retransmit_timer.expires = jiffies+when; - } + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - if (sk->retransmit_timer.expires < jiffies) { - /* We can get here if we reset the timer on an event - * that could not fire because the interrupts were disabled. - * make sure it happens soon. - */ - sk->retransmit_timer.expires = jiffies+2; + if((long)when <= 0) + { + printk("xmit_timer <= 0 - timer:%d when:%lx\n", what, when); + when=HZ/50; } - add_timer(&sk->retransmit_timer); -} -/* - * POLICY: - * - * This is the normal code called for timeouts. It does the retransmission - * and then does backoff. tcp_do_retransmit is separated out because - * tcp_ack needs to send stuff from the retransmit queue without - * initiating a backoff. - */ + switch (what) { + case TIME_RETRANS: + /* + * When seting the transmit timer the probe timer + * should not be set. + * The delayed ack timer can be set if we are changing the + * retransmit timer when removing acked frames. + */ + del_timer(&tp->probe_timer); + del_timer(&tp->retransmit_timer); + tp->retransmit_timer.expires=jiffies+when; + add_timer(&tp->retransmit_timer); + break; + case TIME_DACK: + del_timer(&tp->delack_timer); + tp->delack_timer.expires=jiffies+when; + add_timer(&tp->delack_timer); + break; -static void tcp_retransmit_time(struct sock *sk, int all) -{ - /* - * record how many times we've timed out. - * This determines when we should quite trying. - * This needs to be counted here, because we should not be - * counting one per packet we send, but rather one per round - * trip timeout. - */ - sk->retransmits++; + case TIME_PROBE0: + del_timer(&tp->probe_timer); + tp->probe_timer.expires=jiffies+when; + add_timer(&tp->probe_timer); + break; - tcp_do_retransmit(sk, all); + case TIME_WRITE: + printk("bug: tcp_reset_xmit_timer TIME_WRITE\n"); + break; - /* - * Increase the timeout each time we retransmit. Note that - * we do not increase the rtt estimate. rto is initialized - * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests - * that doubling rto each time is the least we can get away with. - * In KA9Q, Karn uses this for the first few times, and then - * goes to quadratic. netBSD doubles, but only goes up to *64, - * and clamps at 1 to 64 sec afterwards. Note that 120 sec is - * defined in the protocol as the maximum possible RTT. I guess - * we'll have to use something other than TCP to talk to the - * University of Mars. - * - * PAWS allows us longer timeouts and large windows, so once - * implemented ftp to mars will work nicely. We will have to fix - * the 120 second clamps though! - */ + default: + printk("bug: unknown timer value\n"); + } +} - sk->backoff++; - sk->rto = min(sk->rto << 1, 120*HZ); +void tcp_clear_xmit_timer(struct sock *sk, int what) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - /* be paranoid about the data structure... */ - if (sk->send_head) - tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto); - else - printk(KERN_ERR "send_head NULL in tcp_retransmit_time\n"); + switch (what) { + case TIME_RETRANS: + del_timer(&tp->retransmit_timer); + break; + case TIME_DACK: + del_timer(&tp->delack_timer); + break; + case TIME_PROBE0: + del_timer(&tp->probe_timer); + break; + default: + printk("bug: unknown timer value\n"); + } } -/* - * POLICY: - * Congestion control. - * - * A timer event has trigger a tcp retransmit timeout. The - * socket xmit queue is ready and set up to send. Because - * the ack receive code keeps the queue straight we do - * nothing clever here. - */ - -void tcp_retransmit(struct sock *sk, int all) +int tcp_timer_is_set(struct sock *sk, int what) { - if (all) - { - tcp_retransmit_time(sk, all); - return; - } + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; - sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */ - /* sk->ssthresh in theory can be zero. I guess that's OK */ - sk->cong_count = 0; - sk->cong_window = 1; + switch (what) { + case TIME_RETRANS: + return tp->retransmit_timer.next != NULL; + break; + case TIME_DACK: + return tp->delack_timer.next != NULL; + break; + case TIME_PROBE0: + return tp->probe_timer.next != NULL; + break; + default: + printk("bug: unknown timer value\n"); + } + return 0; +} - /* Do the actual retransmit. */ - tcp_retransmit_time(sk, all); +void tcp_clear_xmit_timers(struct sock *sk) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + del_timer(&tp->retransmit_timer); + del_timer(&tp->delack_timer); + del_timer(&tp->probe_timer); } /* @@ -175,8 +185,11 @@ sk->err=sk->err_soft; else sk->err=ETIMEDOUT; + + printk(KERN_DEBUG "syn timeout\n"); + sk->error_report(sk); - del_timer(&sk->retransmit_timer); + tcp_clear_xmit_timers(sk); tcp_statistics.TcpAttemptFails++; /* Is this right ??? - FIXME - */ tcp_set_state(sk,TCP_CLOSE); /* Don't FIN, we got nothing back */ @@ -192,7 +205,9 @@ else sk->err = ETIMEDOUT; sk->error_report(sk); - del_timer(&sk->retransmit_timer); + + tcp_clear_xmit_timers(sk); + /* * Time wait the socket */ @@ -213,19 +228,147 @@ return 1; } -/* - * It could be we got here because we needed to send an ack, - * so we need to check for that and not just normal retransmit. - */ -static void tcp_time_write_timeout(struct sock * sk) -{ + +void tcp_delack_timer(unsigned long data) { + + struct sock *sk = (struct sock*)data; + + if(sk->zapped) + { + return; + } + + if (sk->delayed_acks) + { + tcp_read_wakeup(sk); + } +} + +void tcp_probe_timer(unsigned long data) { + + struct sock *sk = (struct sock*)data; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + if(sk->zapped) + { + return; + } + + if (sk->users) + { + /* + * Try again in second + */ + + tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ); + return; + } + /* - * Retransmission + * *WARNING* RFC 1122 forbids this + * FIXME: We ought not to do it, Solaris 2.5 actually has fixing + * this behaviour in Solaris down as a bug fix. [AC] */ - sk->prot->retransmit (sk, 0); - tcp_write_timeout(sk); + if (tp->probes_out > TCP_RETR2) + { + if(sk->err_soft) + sk->err = sk->err_soft; + else + sk->err = ETIMEDOUT; + sk->error_report(sk); + + /* + * Time wait the socket + */ + if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 + || sk->state == TCP_CLOSING ) + { + tcp_set_state(sk, TCP_TIME_WAIT); + tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); + } + else + { + /* + * Clean up time. + */ + tcp_set_state(sk, TCP_CLOSE); + } + } + + tcp_send_probe0(sk); } +static __inline__ int tcp_keepopen_proc(struct sock *sk) +{ + int res = 0; + + if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + __u32 elapsed = jiffies - tp->rcv_tstamp; + + if (elapsed >= TCP_KEEPALIVE_TIME) + { + if (tp->probes_out > TCP_KEEPALIVE_PROBES) + { + if(sk->err_soft) + sk->err = sk->err_soft; + else + sk->err = ETIMEDOUT; + + tcp_set_state(sk, TCP_CLOSE); + } + else + { + tp->probes_out++; + tp->pending = TIME_KEEPOPEN; + tcp_write_wakeup(sk); + res = 1; + } + } + } + return res; +} + +/* + * Check all sockets for keepalive timer + * Called every 75 seconds + * This timer is started by af_inet init routine and is constantly + * running. + * + * It might be better to maintain a count of sockets that need it using + * setsockopt/tcp_destroy_sk and only set the timer when needed. + */ + +/* + * don't send over 5 keepopens at a time to avoid burstiness + * on big servers [AC] + */ +#define MAX_KA_PROBES 5 + +static void tcp_keepalive(unsigned long data) +{ + struct sock *sk; + int count = 0; + int i; + + for(i=0; i < SOCK_ARRAY_SIZE; i++) + { + sk = tcp_prot.sock_array[i]; + while (sk) + { + if (sk->keepopen) + { + count += tcp_keepopen_proc(sk); + } + + if (count == MAX_KA_PROBES) + return; + + sk = sk->next; + } + } +} /* * The TCP retransmit timer. This lacks a few small details. @@ -235,67 +378,174 @@ * 2. On a 'major timeout' as defined by RFC1122 we shouldn't report * ETIMEDOUT if we know an additional 'soft' error caused this. * tcp_err should save a 'soft error' for us. + * [Unless someone has broken it then it does, except for one 2.0 + * broken case of a send when the route/device is directly unreachable, + * and we error but should retry! - FIXME] [AC] */ void tcp_retransmit_timer(unsigned long data) { struct sock *sk = (struct sock*)data; - int why = sk->ip_xmit_timeout; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; /* * We are reset. We will send no more retransmits. */ - + if(sk->zapped) + { + tcp_clear_xmit_timer(sk, TIME_RETRANS); return; - - /* - * Only process if socket is not in use + } + + /* + * Clear delay ack timer */ - if (sk->users) + tcp_clear_xmit_timer(sk, TIME_DACK); + + /* + * Retransmission + */ + + tp->retrans_head = NULL; + + + if (sk->retransmits == 0) { - /* Try again in 1 second */ - sk->retransmit_timer.expires = jiffies+HZ; - add_timer(&sk->retransmit_timer); - return; + /* + * remember window where we lost + * "one half of the current window but at least 2 segments" + */ + + sk->ssthresh = max(sk->cong_window >> 1, 2); + sk->cong_count = 0; + sk->cong_window = 1; } - if (sk->ack_backlog && !sk->dead) - sk->data_ready(sk,0); + atomic_inc(&sk->retransmits); + + tcp_do_retransmit(sk, 0); - /* Now we need to figure out why the socket was on the timer. */ + /* + * Increase the timeout each time we retransmit. Note that + * we do not increase the rtt estimate. rto is initialized + * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests + * that doubling rto each time is the least we can get away with. + * In KA9Q, Karn uses this for the first few times, and then + * goes to quadratic. netBSD doubles, but only goes up to *64, + * and clamps at 1 to 64 sec afterwards. Note that 120 sec is + * defined in the protocol as the maximum possible RTT. I guess + * we'll have to use something other than TCP to talk to the + * University of Mars. + * + * PAWS allows us longer timeouts and large windows, so once + * implemented ftp to mars will work nicely. We will have to fix + * the 120 second clamps though! + */ + + tp->backoff++; + tp->rto = min(tp->rto << 1, 120*HZ); + tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); + + tcp_write_timeout(sk); +} - switch (why) +/* + * Slow timer for SYN-RECV sockets + */ + +static void tcp_syn_recv_timer(unsigned long data) +{ + struct sock *sk; + unsigned long now = jiffies; + int i; + + for(i=0; i < SOCK_ARRAY_SIZE; i++) { - /* Window probing */ - case TIME_PROBE0: - tcp_send_probe0(sk); - tcp_write_timeout(sk); - break; + sk = tcp_prot.sock_array[i]; + while (sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + if (sk->state == TCP_LISTEN && !sk->users && + tp->syn_wait_queue) + { + struct open_request *req; + + req = tp->syn_wait_queue; + + while (tp->syn_wait_queue && + (((long)(req->expires - now)) <= 0)) + { + struct open_request *conn; + + conn = req; + req = req->dl_next; + + if (conn->sk && conn->sk->state > TCP_SYN_RECV) + continue; + + tcp_synq_unlink(tp, conn); + + if (conn->retrans >= TCP_RETR1) + { + printk(KERN_DEBUG "syn_recv: " + "too many retransmits\n"); + (*conn->class->destructor)(conn); + tcp_dec_slow_timer(TCP_SLT_SYNACK); + kfree(conn); + } + else + { + __u32 timeo; + + (*conn->class->rtx_syn_ack)(sk, conn); + + conn->retrans++; + printk(KERN_DEBUG "syn_ack rtx %d\n", conn->retrans); + timeo = min((TCP_TIMEOUT_INIT + << conn->retrans), + 120*HZ); + conn->expires = now + timeo; + tcp_synq_queue(tp, conn); + } + } + } + + sk = sk->next; + } + } +} - /* Retransmitting */ - case TIME_WRITE: - tcp_time_write_timeout(sk); - break; +void tcp_sltimer_handler(unsigned long data) +{ + struct tcp_sl_timer *slt = tcp_slt_array; + unsigned long next = ~0UL; + unsigned long now = jiffies; + int i; - /* Sending Keepalives */ - case TIME_KEEPOPEN: - /* - * this reset_timer() call is a hack, this is not - * how KEEPOPEN is supposed to work. - */ - tcp_reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN); - /* Send something to keep the connection open. */ - if (sk->prot->write_wakeup) - sk->prot->write_wakeup (sk); - sk->retransmits++; - sk->prot->retransmits++; - tcp_write_timeout(sk); - break; + for (i=0; i < TCP_SLT_MAX; i++, slt++) + { + if (slt->count) + { + long trigger; - default: - printk (KERN_ERR "rexmit_timer: timer expired - reason unknown\n"); - break; + trigger = slt->period - ((long)(now - slt->last)); + + if (trigger <= 0) + { + (*slt->handler)((unsigned long) slt); + slt->last = now; + trigger = slt->period; + } + next = min(next, trigger); + } + } + + if (next != ~0UL) + { + tcp_slow_timer.expires = now + next; + add_timer(&tcp_slow_timer); } } diff -u --recursive --new-file v2.1.7/linux/net/ipv4/timer.c linux/net/ipv4/timer.c --- v2.1.7/linux/net/ipv4/timer.c Thu Feb 22 16:50:46 1996 +++ linux/net/ipv4/timer.c Sun Nov 3 11:04:45 1996 @@ -50,7 +50,7 @@ #include #include -void delete_timer (struct sock *t) +void net_delete_timer (struct sock *t) { unsigned long flags; @@ -63,9 +63,9 @@ restore_flags (flags); } -void reset_timer (struct sock *t, int timeout, unsigned long len) +void net_reset_timer (struct sock *t, int timeout, unsigned long len) { - delete_timer (t); + net_delete_timer (t); t->timeout = timeout; #if 1 /* FIXME: ??? */ @@ -116,7 +116,7 @@ case TIME_DONE: /* If the socket hasn't been closed off, re-try a bit later */ if (!sk->dead) { - reset_timer(sk, TIME_DONE, TCP_DONE_TIME); + net_reset_timer(sk, TIME_DONE, TCP_DONE_TIME); break; } @@ -140,11 +140,11 @@ case TIME_CLOSE: /* We've waited long enough, close the socket. */ sk->state = TCP_CLOSE; - delete_timer (sk); + net_delete_timer (sk); if (!sk->dead) sk->state_change(sk); sk->shutdown = SHUTDOWN_MASK; - reset_timer (sk, TIME_DONE, TCP_DONE_TIME); + net_reset_timer (sk, TIME_DONE, TCP_DONE_TIME); break; default: diff -u --recursive --new-file v2.1.7/linux/net/ipv4/udp.c linux/net/ipv4/udp.c --- v2.1.7/linux/net/ipv4/udp.c Tue Oct 29 19:58:50 1996 +++ linux/net/ipv4/udp.c Sun Nov 3 11:04:45 1996 @@ -150,8 +150,8 @@ * to find the appropriate port. */ -void udp_err(int type, int code, unsigned char *header, __u32 daddr, - __u32 saddr, struct inet_protocol *protocol) +void udp_err(int type, int code, unsigned char *header, __u32 info, + __u32 daddr, __u32 saddr, struct inet_protocol *protocol) { struct udphdr *uh; struct sock *sk; @@ -437,8 +437,8 @@ * Temporary */ -static int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len, int noblock, - int flags) +int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len, int noblock, + int flags) { if(msg->msg_iovlen==1) return udp_sendto(sk,msg->msg_iov[0].iov_base,len, noblock, flags, msg->msg_name, msg->msg_namelen); @@ -523,7 +523,7 @@ /* - * This should be easy, if there is something there we\ + * This should be easy, if there is something there we * return it, otherwise we block. */ @@ -591,8 +591,9 @@ return(copied); } -int udp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len) +int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct rtable *rt; if (addr_len < sizeof(*usin)) return(-EINVAL); @@ -632,7 +633,7 @@ destroy_sock(sk); } -static inline void udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +static inline int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { /* * Charge it to the socket, dropping if the queue is full. @@ -647,9 +648,10 @@ ip_statistics.IpInDelivers--; skb->sk = NULL; kfree_skb(skb, FREE_WRITE); - return; + return 0; } udp_statistics.UdpInDatagrams++; + return 0; } @@ -699,15 +701,6 @@ int addr_type; /* - * If we're doing a "redo" (the socket was busy last time - * around), we can just queue the packet now.. - */ - if (redo) { - udp_queue_rcv_skb(skb->sk, skb); - return 0; - } - - /* * First time through the loop.. Do all the setup stuff * (including finding out the socket we go to etc) */ @@ -843,26 +836,26 @@ struct proto udp_prot = { udp_close, - ip_build_header, udp_connect, NULL, - ip_queue_xmit, NULL, NULL, NULL, - udp_rcv, datagram_select, udp_ioctl, NULL, NULL, + NULL, ip_setsockopt, ip_getsockopt, udp_sendmsg, udp_recvmsg, - NULL, /* No special bind function */ + NULL, /* No special bind function */ + udp_queue_rcv_skb, 128, 0, "UDP", 0, 0, - {NULL,} + NULL }; + diff -u --recursive --new-file v2.1.7/linux/net/ipv6/Makefile linux/net/ipv6/Makefile --- v2.1.7/linux/net/ipv6/Makefile Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/Makefile Sun Nov 3 11:04:45 1996 @@ -0,0 +1,18 @@ +# +# Makefile for the Linux TCP/IP (INET6) layer. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# + + +O_TARGET := ipv6.o +O_OBJS := af_inet6.o ipv6_output.o ipv6_input.o addrconf.o sit.o \ + ipv6_route.o ipv6_sockglue.o ndisc.o udp.o raw.o \ + protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + exthdrs.o sysctl_net_ipv6.o datagram.o + +M_OBJS := $(O_TARGET) + +include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.7/linux/net/ipv6/addrconf.c linux/net/ipv6/addrconf.c --- v2.1.7/linux/net/ipv6/addrconf.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/addrconf.c Sun Nov 3 11:04:45 1996 @@ -0,0 +1,1311 @@ +/* + * IPv6 Address [auto]configuration + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define HASH_SIZE 16 +/* + * Configured unicast address list + */ +struct inet6_ifaddr *inet6_addr_lst[HASH_SIZE]; + +/* + * Hash list of configured multicast addresses + */ +struct ipv6_mc_list *inet6_mcast_lst[HASH_SIZE]; + +/* + * AF_INET6 device list + */ +struct inet6_dev *inet6_dev_lst; +int in6_ifnum = 0; + +atomic_t addr_list_lock = 0; + +void addrconf_verify(unsigned long); + +static struct timer_list addr_chk_timer = { + NULL, NULL, + 0, 0, addrconf_verify +}; + + +int DupAddrDetectTransmits = 1; + +/* + * /proc/sys switch for autoconf (enabled by default) + */ +int addrconf_sys_autoconf = 1; + +static void addrconf_dad_start(struct inet6_ifaddr *ifp); +static void addrconf_rs_timer(unsigned long data); + +int ipv6_addr_type(struct in6_addr *addr) +{ + u32 st; + + st = addr->s6_addr32[0]; + + /* + * UCast Provider Based Address + * 0x4/3 + */ + + if ((st & __constant_htonl(0xE0000000)) == + __constant_htonl(0x40000000)) + { + return IPV6_ADDR_UNICAST; + } + + if ((st & __constant_htonl(0xFF000000)) == + __constant_htonl(0xFF000000)) + { + int type = IPV6_ADDR_MULTICAST; + + switch((st >> 16) & 0x0f) + { + case 0x01: + type |= IPV6_ADDR_LOOPBACK; + break; + case 0x02: + type |= IPV6_ADDR_LINKLOCAL; + break; + case 0x05: + type |= IPV6_ADDR_SITELOCAL; + break; + } + return type; + } + + if ((st & __constant_htonl(0xFFC00000)) == + __constant_htonl(0xFE800000)) + { + return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST); + } + + if ((st & __constant_htonl(0xFFC00000)) == + __constant_htonl(0xFEC00000)) + { + return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST); + } + + if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) + { + if (addr->s6_addr32[2] == 0) + { + if (addr->in6_u.u6_addr32[3] == 0) + { + return IPV6_ADDR_ANY; + } + + if (addr->s6_addr32[3] == __constant_htonl(0x00000001)) + { + return (IPV6_ADDR_LOOPBACK | + IPV6_ADDR_UNICAST); + } + + return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST); + } + + if (addr->s6_addr32[2] == __constant_htonl(0x0000ffff)) + return IPV6_ADDR_MAPPED; + } + + return IPV6_ADDR_RESERVED; +} + +struct inet6_dev * ipv6_add_dev(struct device *dev) +{ + struct inet6_dev *dev6; + + /* + * called by netdev notifier from a syscall + */ + dev6 = (struct inet6_dev *) kmalloc(sizeof(struct inet6_dev), + GFP_ATOMIC); + + if (dev6 == NULL) + return NULL; + + memset(dev6, 0, sizeof(struct inet6_dev)); + dev6->dev = dev; + dev6->if_index = ++in6_ifnum; + + /* + * insert at head. + */ + + dev6->next = inet6_dev_lst; + inet6_dev_lst = dev6; + + return dev6; +} + +struct inet6_dev * ipv6_dev_by_index(int index) +{ + struct inet6_dev *in6_dev; + + for (in6_dev = inet6_dev_lst; in6_dev; in6_dev = in6_dev->next) + { + if (in6_dev->if_index == index) + return in6_dev; + } + + return NULL; +} + +void addrconf_forwarding_on(void) +{ + struct inet6_dev *in6_dev; + struct in6_addr maddr; + + for (in6_dev = inet6_dev_lst; in6_dev; in6_dev = in6_dev->next) + { + printk(KERN_DEBUG "dev %s\n", in6_dev->dev->name); + + if (in6_dev->dev->type == ARPHRD_ETHER) + { + printk(KERN_DEBUG "joining all-routers\n"); + in6_dev->router = 1; + ipv6_addr_all_routers(&maddr); + ipv6_dev_mc_inc(in6_dev->dev, &maddr); + } + } + + if (last_resort_rt && (last_resort_rt->rt_flags & RTI_ALLONLINK)) + { + rt_release(last_resort_rt); + last_resort_rt = NULL; + } +} + +struct inet6_dev * ipv6_get_idev(struct device *dev) +{ + struct inet6_dev *in6_dev; + + for (in6_dev = inet6_dev_lst; in6_dev; in6_dev = in6_dev->next) + { + if (in6_dev->dev == dev) + { + return in6_dev; + } + } + return NULL; +} + +struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, + struct in6_addr *addr, int scope) +{ + struct inet6_ifaddr * ifaddr; + int hash; + unsigned long flags; + + save_flags(flags); + cli(); + + ifaddr = (struct inet6_ifaddr *) kmalloc(sizeof(struct inet6_ifaddr), + GFP_ATOMIC); + + if (ifaddr == NULL) + { + printk(KERN_DEBUG "ipv6_add_addr: malloc failed\n"); + restore_flags(flags); + return NULL; + } + + memset(ifaddr, 0, sizeof(struct inet6_ifaddr)); + memcpy(&ifaddr->addr, addr, sizeof(struct in6_addr)); + + ifaddr->scope = scope; + ifaddr->idev = idev; + + + /* add to list */ + + hash = ipv6_addr_hash(addr); + + ifaddr->lst_next = inet6_addr_lst[hash]; + inet6_addr_lst[hash] = ifaddr; + + + /* add to inet6_dev unicast addr list */ + ifaddr->if_next = idev->addr_list; + idev->addr_list = ifaddr; + + restore_flags(flags); + return ifaddr; + +} + +void ipv6_del_addr(struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *iter, **back; + int hash; + + if (addr_list_lock) + { + ifp->flags |= ADDR_INVALID; + return; + } + + hash = ipv6_addr_hash(&ifp->addr); + + iter = inet6_addr_lst[hash]; + back = &inet6_addr_lst[hash]; + + for (; iter; iter = iter->lst_next) + { + if (iter == ifp) + { + *back = ifp->lst_next; + ifp->lst_next = NULL; + break; + } + back = &(iter->lst_next); + } + + iter = ifp->idev->addr_list; + back = &ifp->idev->addr_list; + + for (; iter; iter = iter->if_next) + { + if (iter == ifp) + { + *back = ifp->if_next; + ifp->if_next = NULL; + break; + } + back = &(iter->if_next); + } + + kfree(ifp); +} + +/* + * Choose an apropriate source address + * should do: + * i) get an address with an apropriate scope + * ii) see if there is a specific route for the destination and use + * an address of the attached interface + * iii) don't use deprecated addresses + * + * at the moment i believe only iii) is missing. + */ +struct inet6_ifaddr * ipv6_get_saddr(struct rt6_info *rt, struct in6_addr *daddr) +{ + int scope; + struct inet6_ifaddr * ifp = NULL; + struct inet6_dev * i6dev; + struct inet6_ifaddr * match = NULL; + struct device *dev = NULL; + int i; + + if (rt) + { + dev = rt->rt_dev; + } + + atomic_inc(&addr_list_lock); + + scope = ipv6_addr_type(daddr); + + scope &= IPV6_ADDR_SCOPE_MASK; + + if (rt && (rt->rt_flags & RTI_ALLONLINK)) + { + /* + * route for the "all destinations on link" rule + * when no routers are present + */ + scope = IFA_LINK; + } + + /* + * known dev + * search dev and walk through dev addresses + */ + + if (dev) + { + if (dev->flags & IFF_LOOPBACK) + { + scope = IFA_HOST; + } + + for (i6dev = inet6_dev_lst; i6dev; i6dev=i6dev->next) + { + if (i6dev->dev == dev) + { + for (ifp=i6dev->addr_list; ifp; + ifp=ifp->if_next) + { + if (ifp->scope == scope) + { + if (!(ifp->flags & ADDR_STATUS)) + { + goto out; + } + if (!(ifp->flags & ADDR_INVALID)) + { + match = ifp; + } + } + } + break; + } + } + } + + if (scope == IFA_LINK) + { + goto out; + } + + /* + * dev == NULL or search failed for specified dev + */ + + for (i=0; i < HASH_SIZE; i++) + { + for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) + { + if (ifp->scope == scope) + { + if (!(ifp->flags & ADDR_STATUS)) + { + goto out; + } + if (!(ifp->flags & ADDR_INVALID)) + { + match = ifp; + } + } + } + } + + out: + if (ifp == NULL && match) + { + ifp = match; + } + atomic_dec(&addr_list_lock); + return ifp; +} + +struct inet6_ifaddr * ipv6_get_lladdr(struct device *dev) +{ + struct inet6_ifaddr *ifp; + struct inet6_dev *i6dev; + + for (i6dev = inet6_dev_lst; i6dev; i6dev=i6dev->next) + { + if (i6dev->dev == dev) + { + for (ifp=i6dev->addr_list; ifp; ifp=ifp->if_next) + { + if (ifp->scope == IFA_LINK) + return ifp; + } + break; + } + } + return NULL; +} + +/* + * Retrieve the ifaddr struct from an v6 address + * Called from ipv6_rcv to check if the address belongs + * to the host. + */ + +struct inet6_ifaddr * ipv6_chk_addr(struct in6_addr *addr) +{ + struct inet6_ifaddr * ifp; + u8 hash; + + atomic_inc(&addr_list_lock); + + hash = ipv6_addr_hash(addr); + + for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) + { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0) + { + break; + } + } + + atomic_dec(&addr_list_lock); + return ifp; +} + +static void sit_route_add(struct device *dev) +{ + struct in6_rtmsg rtmsg; + int err; + + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + + memset(&rtmsg.rtmsg_dst, 0, sizeof(struct in6_addr)); + memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr)); + + if (dev->pa_dstaddr == 0) + { + /* prefix length - 96 bytes "::d.d.d.d" */ + rtmsg.rtmsg_prefixlen = 96; + rtmsg.rtmsg_metric = 1; + rtmsg.rtmsg_flags = RTF_NEXTHOP|RTF_UP; + } + else + { + rtmsg.rtmsg_prefixlen = 128; + rtmsg.rtmsg_dst.s6_addr32[3] = dev->pa_dstaddr; + rtmsg.rtmsg_metric = 1; + rtmsg.rtmsg_flags = RTF_HOST|RTF_UP; + } + + strcpy(rtmsg.rtmsg_device, dev->name); + + err = ipv6_route_add(&rtmsg); + + if (err) + { + printk(KERN_DEBUG "sit_route_add: error in route_add\n"); + } +} + +static void init_loopback(struct device *dev) +{ + struct in6_addr addr; + struct inet6_dev *idev; + struct inet6_ifaddr * ifp; + struct in6_rtmsg rtmsg; + char devname[] = "lo"; + int err; + + /* ::1 */ + + memset(&addr, 0, sizeof(struct in6_addr)); + addr.s6_addr[15] = 1; + + idev = ipv6_add_dev(dev); + + if (idev == NULL) + { + printk(KERN_DEBUG "init loopback: add_dev failed\n"); + return; + } + + ifp = ipv6_add_addr(idev, &addr, IFA_HOST); + + if (ifp == NULL) + { + printk(KERN_DEBUG "init_loopback: add_addr failed\n"); + return; + } + + ifp->flags |= ADDR_PERMANENT; + + memcpy(&rtmsg.rtmsg_dst, &addr, sizeof(struct in6_addr)); + memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr)); + + rtmsg.rtmsg_prefixlen = 128; + rtmsg.rtmsg_metric = 1; + strcpy(rtmsg.rtmsg_device, devname); + + rtmsg.rtmsg_flags = RTF_NEXTHOP|RTF_HOST|RTF_UP; + + err = ipv6_route_add(&rtmsg); + + if (err) + { + printk(KERN_DEBUG "init_loopback: error in route_add\n"); + } + + /* add route for ::127.0.0.1 */ +} + +static void addrconf_eth_config(struct device *dev) +{ + struct in6_addr addr; + struct in6_addr maddr; + struct inet6_ifaddr * ifp; + struct inet6_dev * idev; + + memset(&addr, 0, sizeof(struct in6_addr)); + + /* generate link local address*/ + addr.s6_addr[0] = 0xFE; + addr.s6_addr[1] = 0x80; + + memcpy(addr.s6_addr + (sizeof(struct in6_addr) - dev->addr_len), + dev->dev_addr, dev->addr_len); + + idev = ipv6_add_dev(dev); + + if (idev == NULL) + return; + + ifp = ipv6_add_addr(idev, &addr, IFA_LINK); + + if (ifp == NULL) + return; + + ifp->flags |= (DAD_INCOMPLETE | ADDR_PERMANENT); + ifp->prefix_len = 10; + + /* join to all nodes multicast group */ + ipv6_addr_all_nodes(&maddr); + ipv6_dev_mc_inc(dev, &maddr); + + if (ipv6_forwarding) + { + idev->router = 1; + ipv6_addr_all_routers(&maddr); + ipv6_dev_mc_inc(dev, &maddr); + } + + /* join to solicited addr multicast group */ + addrconf_addr_solict_mult(&addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); + + /* start dad */ + addrconf_dad_start(ifp); +} + +void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) +{ + struct prefix_info *pinfo; + struct rt6_info *rt; + __u32 valid_lft; + __u32 prefered_lft; + int addr_type; + unsigned long rt_expires; + + pinfo = (struct prefix_info *) opt; + + if (len < sizeof(struct prefix_info)) + { + printk(KERN_DEBUG "addrconf: prefix option too short\n"); + return; + } + + /* + * Validation checks ([ADDRCONF], page 19) + */ + + addr_type = ipv6_addr_type(&pinfo->prefix); + + if (addr_type & IPV6_ADDR_LINKLOCAL) + { + return; + } + + valid_lft = ntohl(pinfo->valid); + prefered_lft = ntohl(pinfo->prefered); + + if (prefered_lft > valid_lft) + { + printk(KERN_WARNING + "addrconf: prefix option has invalid lifetime\n"); + return; + } + + /* + * If we where using an "all destinations on link" route + * delete it + */ + + if (last_resort_rt && (last_resort_rt->rt_flags & RTI_ALLONLINK)) + { + rt_release(last_resort_rt); + last_resort_rt = NULL; + } + + /* + * Two things going on here: + * 1) Add routes for on-link prefixes + * 2) Configure prefixes with the auto flag set + */ + + rt_expires = jiffies + valid_lft * HZ; + if (rt_expires < jiffies) + { + rt_expires = ~0; + } + + rt = fibv6_lookup(&pinfo->prefix, dev, RTI_DYNAMIC|RTI_GATEWAY); + + if (rt) + { + if (pinfo->onlink == 0 || valid_lft == 0) + { + /* + * delete route + */ + fib6_del_rt(rt); + rt = NULL; + } + else + { + rt->rt_expires = rt_expires; + } + } + else if (pinfo->onlink && valid_lft) + { + struct in6_rtmsg rtmsg; + + printk(KERN_DEBUG "adding on link route\n"); + ipv6_addr_copy(&rtmsg.rtmsg_dst, &pinfo->prefix); + memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr)); + + rtmsg.rtmsg_prefixlen = pinfo->prefix_len; + rtmsg.rtmsg_metric = 1; + memcpy(rtmsg.rtmsg_device, dev->name, strlen(dev->name) + 1); + rtmsg.rtmsg_flags = RTF_UP | RTF_ADDRCONF; + rtmsg.rtmsg_info = rt_expires; + + ipv6_route_add(&rtmsg); + } + + if (pinfo->autoconf && addrconf_sys_autoconf) + { + struct inet6_ifaddr * ifp; + struct in6_addr addr; + int plen; + + plen = pinfo->prefix_len >> 3; + + if (plen + dev->addr_len == sizeof(struct in6_addr)) + { + memcpy(&addr, &pinfo->prefix, plen); + memcpy(addr.s6_addr + plen, dev->dev_addr, + dev->addr_len); + } + else + { + printk(KERN_DEBUG + "addrconf: prefix_len invalid\n"); + return; + } + + ifp = ipv6_chk_addr(&addr); + + if (ifp == NULL && valid_lft) + { + /* create */ + + struct inet6_dev *in6_dev; + + in6_dev = ipv6_get_idev(dev); + + if (in6_dev == NULL) + { + printk(KERN_DEBUG + "addrconf: device not configured\n"); + } + + ifp = ipv6_add_addr(in6_dev, &addr, + addr_type & IPV6_ADDR_SCOPE_MASK); + + if (dev->flags & IFF_MULTICAST) + { + struct in6_addr maddr; + + /* join to solicited addr multicast group */ + addrconf_addr_solict_mult(&addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); + } + + ifp->flags |= DAD_INCOMPLETE; + ifp->prefix_len = pinfo->prefix_len; + + addrconf_dad_start(ifp); + + } + + if (ifp && valid_lft == 0) + { + ipv6_del_addr(ifp); + ifp = NULL; + } + + if (ifp) + { + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + } + } + +} + +/* + * Set destination address. + * Special case for SIT interfaces where we create a new "virtual" + * device. + */ +int addrconf_set_dstaddr(void *arg) +{ + struct in6_ifreq ireq; + struct device *dev; + int err; + + err = copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)); + + if (err) + return -EFAULT; + + dev = dev_get(ireq.devname); + + if (dev->type == ARPHRD_SIT) + { + struct device *dev; + + if (!(ipv6_addr_type(&ireq.addr) & IPV6_ADDR_COMPATv4)) + { + return -EADDRNOTAVAIL; + } + + dev = sit_add_tunnel(ireq.addr.s6_addr32[3]); + + if (dev == NULL) + return -ENOMEM; + + return 0; + } + + return -EINVAL; +} + +/* + * Manual configuration of address on an interface + */ +int addrconf_add_ifaddr(void *arg) +{ + struct inet6_dev *in6_dev; + struct in6_ifreq ireq; + struct inet6_ifaddr *ifp; + struct device *dev; + int addr_type; + int err; + + if (!suser()) + return -EPERM; + + err = copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)); + if (err) + return -EFAULT; + + dev = dev_get(ireq.devname); + + if (dev == NULL) + return -EINVAL; + + in6_dev = ipv6_get_idev(dev); + + if (in6_dev == NULL) + return -EINVAL; + + addr_type = ipv6_addr_type(&ireq.addr); + addr_type &= IPV6_ADDR_SCOPE_MASK; + + ifp = ipv6_add_addr(in6_dev, &ireq.addr, addr_type); + + if (ifp == NULL) + return -ENOMEM; + + if (dev->flags & IFF_MULTICAST) + { + struct in6_addr maddr; + + /* join to solicited addr multicast group */ + addrconf_addr_solict_mult(&ireq.addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); + } + + + ifp->prefix_len = ireq.prefix_len; + ifp->flags |= ADDR_PERMANENT; + + if (!(dev->flags & (IFF_NOARP|IFF_LOOPBACK))) + { + ifp->flags |= DAD_INCOMPLETE; + addrconf_dad_start(ifp); + } + return 0; +} + +static void sit_add_v4_addrs(struct inet6_dev *idev) +{ + struct inet6_ifaddr * ifp; + struct in6_addr addr; + struct device *dev; + int flag; + + memset(&addr, 0, sizeof(struct in6_addr)); + + if (idev->dev->pa_dstaddr) + { + addr.s6_addr32[0] = __constant_htonl(0xfe800000); + flag = IFA_LINK; + } + else + { + flag = IFA_GLOBAL | IPV6_ADDR_COMPATv4; + } + + for (dev = dev_base; dev != NULL; dev = dev->next) + { + if (dev->family == AF_INET && (dev->flags & IFF_UP)) + { + addr.s6_addr32[3] = dev->pa_addr; + + if (dev->flags & IFF_LOOPBACK) + { + if (idev->dev->pa_dstaddr) + continue; + + flag = IFA_HOST | IPV6_ADDR_COMPATv4; + } + + ifp = ipv6_add_addr(idev, &addr, flag); + + if (ifp == NULL) + continue; + + ifp->flags |= ADDR_PERMANENT; + } + } +} + +int addrconf_notify(struct notifier_block *this, unsigned long event, + void * data) +{ + struct device *dev; + struct inet6_dev * idev; + + dev = (struct device *) data; + + switch(event) { + case NETDEV_UP: + switch(dev->type) { + case ARPHRD_SIT: + + printk(KERN_DEBUG "sit device up: %s\n", dev->name); + + /* + * Configure the tunnel with one of our IPv4 + * addresses... we should configure all of + * our v4 addrs in the tunnel + */ + + idev = ipv6_add_dev(dev); + + sit_add_v4_addrs(idev); + + /* + * we do an hack for now to configure the tunnel + * route. + */ + + sit_route_add(dev); + break; + + case ARPHRD_LOOPBACK: + init_loopback(dev); + break; + + case ARPHRD_ETHER: + + printk(KERN_DEBUG "Configuring eth interface\n"); + addrconf_eth_config(dev); + break; + } + rt6_sndmsg(RTMSG_NEWDEVICE, NULL, NULL, 0, 0, dev->name, 0); + break; + + case NETDEV_DOWN: + /* + * Remove all addresses from this interface + * and take the interface out of the list. + */ + rt6_sndmsg(RTMSG_NEWDEVICE, NULL, NULL, 0, 0, dev->name, 0); + + break; + } + + return NOTIFY_OK; +} + +static void addrconf_dad_completed(struct inet6_ifaddr *ifp) +{ + struct in6_rtmsg rtmsg; + struct device *dev; + int err; + + + if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) + { + struct in6_addr all_routers; + + /* + * 1) configure a link route for this interface + * 2) send a (delayed) router solicitation + */ + + memcpy(&rtmsg.rtmsg_dst, &ifp->addr, sizeof(struct in6_addr)); + memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr)); + + dev = ifp->idev->dev; + + rtmsg.rtmsg_prefixlen = ifp->prefix_len; + rtmsg.rtmsg_metric = 1; + memcpy(rtmsg.rtmsg_device, dev->name, strlen(dev->name) + 1); + + rtmsg.rtmsg_flags = RTF_UP; + + err = ipv6_route_add(&rtmsg); + + if (err) + { + printk(KERN_DEBUG "dad_complete: error in route_add\n"); + } + + if (ipv6_forwarding == 0) + { + ipv6_addr_set(&all_routers, + __constant_htonl(0xff020000U), 0, 0, + __constant_htonl(0x2U)); + + /* + * If a host as already performed a random delay + * [...] as part of DAD [...] there is no need + * to delay again before sending the first RS + */ + ndisc_send_rs(ifp->idev->dev, &ifp->addr, + &all_routers); + + ifp->probes = 1; + ifp->timer.function = addrconf_rs_timer; + ifp->timer.expires = (jiffies + + RTR_SOLICITATION_INTERVAL); + ifp->idev->if_flags |= IF_RS_SENT; + add_timer(&ifp->timer); + } + } + +} + +static void addrconf_dad_timer(unsigned long data) +{ + struct inet6_ifaddr *ifp; + struct in6_addr unspec; + struct in6_addr mcaddr; + + ifp = (struct inet6_ifaddr *) data; + + if (ifp->probes-- == 0) + { + /* + * DAD was successful + */ + + ifp->flags &= ~DAD_INCOMPLETE; + addrconf_dad_completed(ifp); + return; + } + + /* send a neighbour solicitation for our addr */ + memset(&unspec, 0, sizeof(unspec)); + addrconf_addr_solict_mult(&ifp->addr, &mcaddr); + + ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); + + ifp->timer.expires = jiffies + RETRANS_TIMER; + add_timer(&ifp->timer); +} + +static void addrconf_rs_timer(unsigned long data) +{ + struct inet6_ifaddr *ifp; + + ifp = (struct inet6_ifaddr *) data; + + if (ipv6_forwarding) + return; + + if (ifp->idev->if_flags & IF_RA_RCVD) + { + /* + * Announcement received after solicitation + * was sent + */ + return; + } + + if (ifp->probes++ <= MAX_RTR_SOLICITATIONS) + { + struct in6_addr all_routers; + + ipv6_addr_set(&all_routers, + __constant_htonl(0xff020000U), 0, 0, + __constant_htonl(0x2U)); + + ndisc_send_rs(ifp->idev->dev, &ifp->addr, + &all_routers); + + + ifp->timer.function = addrconf_rs_timer; + ifp->timer.expires = jiffies + RTR_SOLICITATION_INTERVAL; + add_timer(&ifp->timer); + } + else + { + printk(KERN_DEBUG "%s: no IPv6 routers present\n", + ifp->idev->dev->name); + + if (!default_rt_list && !last_resort_rt) + { + struct rt6_info *rt; + + /* + * create a last resort route with all + * destinations on link + */ + rt = kmalloc(sizeof(struct rt6_info), GFP_ATOMIC); + + if (rt) + { + memset(rt, 0, sizeof(struct rt6_info)); + rt->rt_dev = ifp->idev->dev; + rt->rt_ref = 1; + rt->rt_flags = (RTI_ALLONLINK | RTF_UP); + last_resort_rt = rt; + } + } + } +} + +static void addrconf_dad_start(struct inet6_ifaddr *ifp) +{ + static int rand_seed = 1; + int rand_num; + + if (rand_seed) + { + rand_seed = 0; + nd_rand_seed = ifp->addr.s6_addr32[3]; + } + + init_timer(&ifp->timer); + ifp->probes = DupAddrDetectTransmits; + + rand_num = ipv6_random() % MAX_RTR_SOLICITATION_DELAY; + + ifp->timer.function = addrconf_dad_timer; + ifp->timer.data = (unsigned long) ifp; + ifp->timer.expires = jiffies + rand_num; + + add_timer(&ifp->timer); +} + +static int iface_proc_info(char *buffer, char **start, off_t offset, + int length, int dummy) +{ + struct inet6_ifaddr *ifp; + int i; + int len = 0; + + for (i=0; i < HASH_SIZE; i++) + for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) + { + int j; + + for (j=0; j<16; j++) + { + sprintf(buffer + len, "%02x", + ifp->addr.s6_addr[j]); + len += 2; + } + + len += sprintf(buffer + len, + " %02x %02x %02x %02x %8s\n", + ifp->idev->if_index, + ifp->prefix_len, + ifp->scope, + ifp->flags, + ifp->idev->dev->name); + } + + *start = buffer + offset; + + len -= offset; + + if (len > length) + len = length; + return len; +} + +struct proc_dir_entry iface_proc_entry = +{ + 0, 8, "if_inet6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, NULL, + &iface_proc_info +}; + + +/* + * Periodic address status verification + */ + +void addrconf_verify(unsigned long foo) +{ + struct inet6_ifaddr *ifp; + unsigned long now = jiffies; + int i; + + for (i=0; i < HASH_SIZE; i++) + { + for (ifp=inet6_addr_lst[i]; ifp;) + { + if (!(ifp->flags & ADDR_PERMANENT)) + { + struct inet6_ifaddr *bp; + unsigned long age; + + age = (now - ifp->tstamp) / HZ; + + if (age > ifp->prefered_lft) + { + ifp->flags |= ADDR_DEPRECATED; + } + + bp = ifp; + ifp=ifp->lst_next; + + if (age > bp->valid_lft) + { + ipv6_del_addr(bp); + } + continue; + } + ifp=ifp->lst_next; + } + } + + addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY; + add_timer(&addr_chk_timer); +} + +void addrconf_init() +{ + struct device *dev; + + /* init addr hash list */ + memset(inet6_addr_lst, 0, 16 * sizeof(struct inet6_ifaddr *)); + + memset(inet6_mcast_lst, 0, 16 * sizeof(struct ipv6_mc_list *)); + + inet6_dev_lst = NULL; + + /* + * Init loopback device + */ + + dev = dev_get("lo"); + + if (dev && (dev->flags & IFF_UP)) + init_loopback(dev); + + /* + * and maybe: + * search availiable AF_INET devs and try to configure them + */ + + dev = dev_get("eth0"); + + if (dev && (dev->flags & IFF_UP)) + addrconf_eth_config(dev); + + proc_register_dynamic(&proc_net, &iface_proc_entry); + + addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY; + add_timer(&addr_chk_timer); +} + +void addrconf_cleanup(void) +{ + struct inet6_dev *idev, *bidev; + struct inet6_ifaddr *ifa, *bifa; + int i; + + del_timer(&addr_chk_timer); + + /* + * clean dev list. + */ + + for (idev = inet6_dev_lst; idev; ) + { + bidev = idev; + idev = idev->next; + kfree(bidev); + } + + /* + * clean addr_list + */ + + for (i=0; i<16; i++) + { + for (ifa=inet6_addr_lst[i]; ifa; ) + { + bifa = ifa; + ifa = ifa->lst_next; + kfree(bifa); + } + } + + proc_unregister(&proc_net, iface_proc_entry.low_ino); +} + +/* + * Local variables: + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/af_inet6.c linux/net/ipv6/af_inet6.c --- v2.1.7/linux/net/ipv6/af_inet6.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/af_inet6.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,865 @@ +/* + * AF_INET6 socket family + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Adapted from linux/net/ipv4/af_inet.c + * + * $Id: af_inet6.c,v 1.13 1996/10/31 19:47:17 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Default callbacks for user INET sockets. These just wake up + * the user owning the socket. + */ + +static void def_callback1(struct sock *sk) +{ + if(!sk->dead) + wake_up_interruptible(sk->sleep); +} + +static void def_callback2(struct sock *sk,int len) +{ + if(!sk->dead) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 1); + } +} + +static void def_callback3(struct sock *sk) +{ + long wmem; + + wmem = (long) sk->wmem_alloc; + + if (wmem < 0) { + printk(KERN_DEBUG "bug wmem_alloc < 0\n"); + sk->wmem_alloc = 0; + } + + if(!sk->dead && sk->wmem_alloc*2 <= sk->sndbuf) + { + wake_up_interruptible(sk->sleep); + sock_wake_async(sk->socket, 2); + } +} + +struct sock * rawv6_sock_array[SOCK_ARRAY_SIZE]; + +static int inet6_create(struct socket *sock, int protocol) +{ + struct sock *sk; + struct proto *prot; + int err; + + sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL); + if (sk == NULL) + return(-ENOBUFS); + + /* Efficient way to set most fields to zero */ + memset(sk,0,sizeof(*sk)); + + /* + * Note for tcp that also wiped the dummy_th block for us. + */ + + switch(sock->type) + { + case SOCK_STREAM: + case SOCK_SEQPACKET: + if (protocol && protocol != IPPROTO_TCP) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPROTONOSUPPORT); + } + protocol = IPPROTO_TCP; + sk->no_check = TCP_NO_CHECK; + prot = &tcpv6_prot; + break; + + case SOCK_DGRAM: + if (protocol && protocol != IPPROTO_UDP) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPROTONOSUPPORT); + } + protocol = IPPROTO_UDP; + sk->no_check = UDP_NO_CHECK; + prot=&udpv6_prot; + break; + + case SOCK_RAW: + if (!suser()) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPERM); + } + if (!protocol) + { + kfree_s((void *)sk, sizeof(*sk)); + return(-EPROTONOSUPPORT); + } + prot = &rawv6_prot; + sk->reuse = 1; + sk->num = protocol; + break; + default: + kfree_s((void *)sk, sizeof(*sk)); + return(-ESOCKTNOSUPPORT); + } + + sk->socket = sock; + + sk->family = AF_INET6; + sk->type = sock->type; + sk->protocol = protocol; + sk->allocation = GFP_KERNEL; + sk->sndbuf = SK_WMEM_MAX; + sk->rcvbuf = SK_RMEM_MAX; + sk->priority = 1; + + sk->prot = prot; + sk->backlog_rcv = prot->backlog_rcv; + + sk->sleep = sock->wait; + sock->data =(void *) sk; + + sk->state = TCP_CLOSE; + + skb_queue_head_init(&sk->write_queue); + skb_queue_head_init(&sk->receive_queue); + skb_queue_head_init(&sk->back_log); + + sk->timer.data = (unsigned long)sk; + sk->timer.function = &net_timer; + init_timer(&sk->timer); + + sk->state_change = def_callback1; + sk->data_ready = def_callback2; + sk->write_space = def_callback3; + sk->error_report = def_callback1; + + sk->net_pinfo.af_inet6.hop_limit = ipv6_hop_limit; + sk->net_pinfo.af_inet6.mcast_hops = IPV6_DEFAULT_MCASTHOPS; + sk->net_pinfo.af_inet6.mc_loop = 1; + + /* + * init the ipv4 part of the socket since + * we can have sockets using v6 API for ipv4 + */ + + sk->ip_ttl=64; + +#ifdef CONFIG_IP_MULTICAST + sk->ip_mc_loop=1; + sk->ip_mc_ttl=1; + *sk->ip_mc_name=0; + sk->ip_mc_list=NULL; +#endif + + + if (sk->type==SOCK_RAW && protocol==IPPROTO_RAW) + sk->ip_hdrincl=1; + + if (sk->num) + { + /* + * It assumes that any protocol which allows + * the user to assign a number at socket + * creation time automatically + * shares. + */ + + inet_put_sock(sk->num, sk); + sk->dummy_th.source = ntohs(sk->num); + } + + if (sk->prot->init) + { + err = sk->prot->init(sk); + if (err != 0) + { + destroy_sock(sk); + return(err); + } + } + MOD_INC_USE_COUNT; + return(0); +} + +static int inet6_dup(struct socket *newsock, struct socket *oldsock) +{ + return(inet6_create(newsock, + ((struct sock *)(oldsock->data))->protocol)); +} + + +/* + * bind for INET6 API + */ + +static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) +{ + struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr; + struct sock *sk=(struct sock *)sock->data, *sk2; + __u32 v4addr = 0; + unsigned short snum = 0; + int addr_type = 0; + + /* + * If the socket has its own bind function then use it. + */ + + if(sk->prot->bind) + return sk->prot->bind(sk, uaddr, addr_len); + + /* check this error. */ + if (sk->state != TCP_CLOSE) + return(-EIO); + + if(addr_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + + if(sock->type != SOCK_RAW) + { + if (sk->num != 0) + return(-EINVAL); + + snum = ntohs(addr->sin6_port); + + if (snum == 0) + snum = get_new_socknum(sk->prot, 0); + + if (snum < PROT_SOCK && !suser()) + return(-EACCES); + } + + addr_type = ipv6_addr_type(&addr->sin6_addr); + + if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) + { + return(-EINVAL); + } + + /* + * check if the address belongs to the host + */ + + if (addr_type == IPV6_ADDR_MAPPED) + { + v4addr = addr->sin6_addr.s6_addr32[3]; + + if (ip_chk_addr(v4addr) != IS_MYADDR) + return(-EADDRNOTAVAIL); + } + else + { + if (addr_type != IPV6_ADDR_ANY) + { + /* + * ipv4 addr of the socket is invalid. + * only the unpecified and mapped address + * have a v4 equivalent. + */ + + v4addr = LOOPBACK4_IPV6; + + if (!(addr_type & IPV6_ADDR_MULTICAST)) + { + if (ipv6_chk_addr(&addr->sin6_addr) == NULL) + return(-EADDRNOTAVAIL); + } + } + } + + sk->rcv_saddr = v4addr; + sk->saddr = v4addr; + + memcpy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr, + sizeof(struct in6_addr)); + + if (!(addr_type & IPV6_ADDR_MULTICAST)) + memcpy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr, + sizeof(struct in6_addr)); + + if(sock->type != SOCK_RAW) + { + /* Make sure we are allowed to bind here. */ + cli(); + for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)]; + sk2 != NULL; sk2 = sk2->next) + { + /* + * Hash collision or real match ? + */ + + if (sk2->num != snum) + continue; + + /* + * Either bind on the port is wildcard means + * they will overlap and thus be in error. + * We use the sk2 v4 address to test the + * other socket since addr_any in av4 implies + * addr_any in v6 + */ + + if (addr_type == IPV6_ADDR_ANY || (!sk2->rcv_saddr)) + { + /* + * Allow only if both are setting reuse. + */ + if(sk2->reuse && sk->reuse && sk2->state!=TCP_LISTEN) + continue; + sti(); + return(-EADDRINUSE); + } + + /* + * Two binds match ? + */ + + if (ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr, + &sk2->net_pinfo.af_inet6.rcv_saddr)) + + continue; + /* + * Reusable port ? + */ + + if (!sk->reuse) + { + sti(); + return(-EADDRINUSE); + } + + /* + * Reuse ? + */ + + if (!sk2->reuse || sk2->state==TCP_LISTEN) + { + sti(); + return(-EADDRINUSE); + } + } + sti(); + + inet_remove_sock(sk); + + /* + if(sock->type==SOCK_DGRAM) + udp_cache_zap(); + if(sock->type==SOCK_STREAM) + tcp_cache_zap(); + */ + inet_put_sock(snum, sk); + sk->dummy_th.source = ntohs(sk->num); + sk->dummy_th.dest = 0; + sk->daddr = 0; + } + + return(0); +} + +static int inet6_release(struct socket *sock, struct socket *peer) +{ + MOD_DEC_USE_COUNT; + return inet_release(sock, peer); +} + +static int inet6_socketpair(struct socket *sock1, struct socket *sock2) +{ + return(-EOPNOTSUPP); +} + +/* + * This does both peername and sockname. + */ + +static int inet6_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr; + struct sock *sk; + + sin->sin6_family = AF_INET6; + sk = (struct sock *) sock->data; + if (peer) + { + if (!tcp_connected(sk->state)) + return(-ENOTCONN); + sin->sin6_port = sk->dummy_th.dest; + memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr, + sizeof(struct in6_addr)); + } + else + { + if (ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr) == + IPV6_ADDR_ANY) + memcpy(&sin->sin6_addr, + &sk->net_pinfo.af_inet6.saddr, + sizeof(struct in6_addr)); + + else + memcpy(&sin->sin6_addr, + &sk->net_pinfo.af_inet6.rcv_saddr, + sizeof(struct in6_addr)); + + sin->sin6_port = sk->dummy_th.source; + + } + + *uaddr_len = sizeof(*sin); + return(0); +} + +static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk=(struct sock *)sock->data; + int err; + int pid; + + switch(cmd) + { + case FIOSETOWN: + case SIOCSPGRP: + err = get_user(pid, (int *) arg); + if(err) + return err; + + /* see inet_fcntl */ + if (current->pid != pid && current->pgrp != -pid && !suser()) + return -EPERM; + sk->proc = pid; + return(0); + case FIOGETOWN: + case SIOCGPGRP: + err = put_user(sk->proc,(int *)arg); + if(err) + return err; + return(0); + case SIOCGSTAMP: + if(sk->stamp.tv_sec==0) + return -ENOENT; + err = copy_to_user((void *)arg, &sk->stamp, + sizeof(struct timeval)); + if (err) + return -EFAULT; + return 0; + + case SIOCADDRT: + case SIOCDELRT: + + return(ipv6_route_ioctl(cmd,(void *)arg)); + + case SIOCGIFCONF: + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCADDMULTI: + case SIOCDELMULTI: +/* + + this ioctls deal with addresses + must process the addr info before + calling dev_ioctl to perform dev specific functions + + case SIOCGIFADDR: + case SIOCSIFADDR: + + + case SIOCGIFDSTADDR: + + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + */ + + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + case SIOCGIFMEM: + case SIOCSIFMEM: + case SIOCGIFMTU: + case SIOCSIFMTU: + case SIOCSIFLINK: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case SIOCSIFMAP: + case SIOCGIFMAP: + case SIOCSIFSLAVE: + case SIOCGIFSLAVE: + + return(dev_ioctl(cmd,(void *) arg)); + + return -EINVAL; + + case SIOCSIFADDR: + return addrconf_add_ifaddr((void *) arg); + case SIOCSIFDSTADDR: + return addrconf_set_dstaddr((void *) arg); + default: + if ((cmd >= SIOCDEVPRIVATE) && + (cmd <= (SIOCDEVPRIVATE + 15))) + return(dev_ioctl(cmd,(void *) arg)); + + if (sk->prot->ioctl==NULL) + return(-EINVAL); + return(sk->prot->ioctl(sk, cmd, arg)); + } + /*NOTREACHED*/ + return(0); +} + +/* + * This routine must find a socket given a TCP or UDP header. + * Everything is assumed to be in net order. + * + * We give priority to more closely bound ports: if some socket + * is bound to a particular foreign address, it will get the packet + * rather than somebody listening to any address.. + */ + +struct sock *inet6_get_sock(struct proto *prot, + struct in6_addr *loc_addr, + struct in6_addr *rmt_addr, + unsigned short loc_port, + unsigned short rmt_port) +{ + struct sock *s; + struct sock *result = NULL; + int badness = -1; + unsigned short hnum; + struct ipv6_pinfo *np; + hnum = ntohs(loc_port); + + /* + * SOCK_ARRAY_SIZE must be a power of two. This will work better + * than a prime unless 3 or more sockets end up using the same + * array entry. This should not be a problem because most + * well known sockets don't overlap that much, and for + * the other ones, we can just be careful about picking our + * socket number when we choose an arbitrary one. + */ + + for(s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)]; + s != NULL; s = s->next) + { + int score = 0; + + if ((s->num != hnum) || s->family != AF_INET6) + continue; + + if(s->dead && (s->state == TCP_CLOSE)) + { + printk(KERN_DEBUG "dead or closed socket\n"); + continue; + } + + np = &s->net_pinfo.af_inet6; + + /* remote port matches? */ + + if (s->dummy_th.dest) { + if (s->dummy_th.dest != rmt_port) + { + continue; + } + score++; + } + + /* local address matches? */ + + if (!ipv6_addr_any(&np->rcv_saddr)) + { + if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr)) + { + continue; + } + score++; + } + + /* remote address matches? */ + if (!ipv6_addr_any(&np->daddr)) + { + if (ipv6_addr_cmp(&np->daddr, rmt_addr)) + { + continue; + } + score++; + } + + /* perfect match? */ + if (score == 3) + return s; + /* no, check if this is the best so far.. */ + if (score <= badness) + continue; + result = s; + badness = score; + } + return result; +} + +static int __inline__ inet6_mc_check(struct sock *sk, struct in6_addr *addr) +{ + struct ipv6_mc_socklist *mc; + + for (mc = sk->net_pinfo.af_inet6.ipv6_mc_list; mc; mc=mc->next) + { + if (ipv6_addr_cmp(&mc->addr, addr) == 0) + return 1; + } + + return 0; +} + +/* + * Deliver a datagram to raw sockets. + */ + +struct sock *inet6_get_sock_raw(struct sock *sk, unsigned short num, + struct in6_addr *loc_addr, + struct in6_addr *rmt_addr) + +{ + struct sock *s; + struct ipv6_pinfo *np; + int addr_type = 0; + + s=sk; + + addr_type = ipv6_addr_type(loc_addr); + + for(; s != NULL; s = s->next) + { + if (s->num != num) + continue; + + if(s->dead && (s->state == TCP_CLOSE)) + continue; + + np = &s->net_pinfo.af_inet6; + + if (!ipv6_addr_any(&np->daddr) && + ipv6_addr_cmp(&np->daddr, rmt_addr)) + { + continue; + } + + if (!ipv6_addr_any(&np->rcv_saddr)) + { + if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0) + return(s); + + if ((addr_type & IPV6_ADDR_MULTICAST) && + inet6_mc_check(s, loc_addr)) + return (s); + + continue; + } + + return(s); + } + return(NULL); +} + +/* + * inet6_get_sock_mcast for UDP sockets. + */ + +struct sock *inet6_get_sock_mcast(struct sock *sk, + unsigned short num, unsigned short rmt_port, + struct in6_addr *loc_addr, + struct in6_addr *rmt_addr) +{ + struct sock *s; + struct ipv6_pinfo *np; + + s=sk; + + for(; s != NULL; s = s->next) + { + if (s->num != num) + continue; + + if(s->dead && (s->state == TCP_CLOSE)) + continue; + + np = &s->net_pinfo.af_inet6; + + if (s->dummy_th.dest) { + if (s->dummy_th.dest != rmt_port) + { + continue; + } + } + + if (!ipv6_addr_any(&np->daddr) && + ipv6_addr_cmp(&np->daddr, rmt_addr)) + { + continue; + } + + + if (!ipv6_addr_any(&np->rcv_saddr)) + { + if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0) + return(s); + } + + if (!inet6_mc_check(s, loc_addr)) + { + continue; + } + + return(s); + } + return(NULL); +} + + +static struct proto_ops inet6_proto_ops = { + AF_INET6, + + inet6_create, + inet6_dup, + inet6_release, + inet6_bind, + inet_connect, /* ok */ + inet6_socketpair, /* a do nothing */ + inet_accept, /* ok */ + inet6_getname, + inet_select, /* ok */ + inet6_ioctl, /* must change */ + inet_listen, /* ok */ + inet_shutdown, /* ok */ + inet_setsockopt, /* ok */ + inet_getsockopt, /* ok */ + inet_fcntl, /* ok */ + inet_sendmsg, /* ok */ + inet_recvmsg /* ok */ +}; + +#ifdef MODULE +int init_module(void) +#else +void inet6_proto_init(struct net_proto *pro) +#endif +{ + int i; + + printk(KERN_INFO "IPv6 v0.1\n"); + + sock_register(inet6_proto_ops.family, &inet6_proto_ops); + + for(i = 0; i < SOCK_ARRAY_SIZE; i++) + { + rawv6_sock_array[i] = NULL; + } + + /* + * ipngwg API draft makes clear that the correct semantics + * for TCP and UDP is to consider one TCP and UDP instance + * in a host availiable by both INET and INET6 APIs and + * hable to communicate via both network protocols. + */ + + tcpv6_prot.inuse = 0; + tcpv6_prot.highestinuse = 0; + tcpv6_prot.sock_array = tcp_sock_array; + + udpv6_prot.inuse = 0; + udpv6_prot.highestinuse = 0; + udpv6_prot.sock_array = udp_sock_array; + + rawv6_prot.inuse = 0; + rawv6_prot.highestinuse = 0; + rawv6_prot.sock_array = rawv6_sock_array; + + ipv6_init(); + + icmpv6_init(&inet6_proto_ops); + ndisc_init(&inet6_proto_ops); + + addrconf_init(); + + sit_init(); + + /* init v6 transport protocols */ + + udpv6_init(); + /* add /proc entries here */ + + tcpv6_init(); + +#ifdef MODULE + return 0; +#endif +} + +#ifdef MODULE +void cleanup_module(void) +{ + sit_cleanup(); + ipv6_cleanup(); + sock_unregister(AF_INET6); +} +#endif + diff -u --recursive --new-file v2.1.7/linux/net/ipv6/datagram.c linux/net/ipv6/datagram.c --- v2.1.7/linux/net/ipv6/datagram.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/datagram.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,202 @@ +/* + * common UDP/RAW code + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * $Id: datagram.c,v 1.3 1996/10/11 16:03:05 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6_options *opt = (struct ipv6_options *) skb->proto_priv; + struct cmsghdr *cmsg = msg->msg_control; + int len = msg->msg_controllen; + + msg->msg_controllen = 0; + + if (np->rxinfo && (len >= sizeof(struct cmsghdr) + + sizeof(struct in6_pktinfo))) + { + struct in6_pktinfo *src_info; + struct inet6_dev *in6_dev; + + cmsg->cmsg_len = (sizeof(struct cmsghdr) + + sizeof(struct in6_pktinfo)); + cmsg->cmsg_level = SOL_IPV6; + cmsg->cmsg_type = IPV6_RXINFO; + + src_info = (struct in6_pktinfo *) cmsg->cmsg_data; + in6_dev = ipv6_get_idev(skb->dev); + + if (in6_dev == NULL) + { + printk(KERN_DEBUG "recv_ctl: unknown device\n"); + return -ENODEV; + } + + src_info->ipi6_ifindex = in6_dev->if_index; + ipv6_addr_copy(&src_info->ipi6_addr, + &skb->ipv6_hdr->daddr); + + len -= cmsg->cmsg_len; + msg->msg_controllen += cmsg->cmsg_len; + cmsg = (struct cmsghdr *)((u8*) cmsg + cmsg->cmsg_len); + } + + if (opt->srcrt) + { + int hdrlen = sizeof(struct rt0_hdr) + (opt->srcrt->hdrlen << 3); + + if (len >= sizeof(struct cmsghdr) + hdrlen) + { + struct rt0_hdr *rt0; + + cmsg->cmsg_len = sizeof(struct cmsghdr) + hdrlen; + cmsg->cmsg_level = SOL_IPV6; + cmsg->cmsg_type = IPV6_RXINFO; + + rt0 = (struct rt0_hdr *) cmsg->cmsg_data; + memcpy(rt0, opt->srcrt, hdrlen); + + len -= cmsg->cmsg_len; + msg->msg_controllen += cmsg->cmsg_len; + cmsg = (struct cmsghdr *)((u8*) cmsg + cmsg->cmsg_len); + } + } + return 0; +} + + +int datagram_send_ctl(struct msghdr *msg, struct device **src_dev, + struct in6_addr **src_addr, struct ipv6_options *opt) +{ + struct inet6_dev *in6_dev = NULL; + struct in6_pktinfo *src_info; + struct cmsghdr *cmsg; + struct ipv6_rt_hdr *rthdr; + int len; + int err = -EINVAL; + + for (cmsg = msg->msg_control; cmsg; cmsg = cmsg_nxthdr(msg, cmsg)) + { + if (cmsg->cmsg_level != SOL_IPV6) + { + printk(KERN_DEBUG "cmsg_level %d\n", cmsg->cmsg_level); + continue; + } + + switch (cmsg->cmsg_type) { + + case IPV6_TXINFO: + if (cmsg->cmsg_len < (sizeof(struct cmsghdr) + + sizeof(struct in6_pktinfo))) + { + goto exit_f; + } + + src_info = (struct in6_pktinfo *) cmsg->cmsg_data; + + if (src_info->ipi6_ifindex) + { + in6_dev = ipv6_dev_by_index(src_info->ipi6_ifindex); + if (in6_dev == NULL) + { + goto exit_f; + } + + *src_dev = in6_dev->dev; + } + + if (!ipv6_addr_any(&src_info->ipi6_addr)) + { + struct inet6_ifaddr *ifp; + + ifp = ipv6_chk_addr(&src_info->ipi6_addr); + + if ( ifp == NULL) + { + goto exit_f; + } + + if (in6_dev && ifp->scope == IFA_LINK && + in6_dev != ifp->idev) + { + goto exit_f; + } + + *src_addr = &src_info->ipi6_addr; + err = 0; + } + + break; + + case SCM_SRCRT: + + len = cmsg->cmsg_len; + + len -= sizeof(struct cmsghdr); + + /* validate option length */ + if (len < sizeof(struct ipv6_rt_hdr)) + { + goto exit_f; + } + + rthdr = (struct ipv6_rt_hdr *) cmsg->cmsg_data; + + /* + * TYPE 0 + */ + if (rthdr->type) + { + goto exit_f; + } + + if (((rthdr->hdrlen + 1) << 3) < len) + { + goto exit_f; + } + + /* segments left must also match */ + if ((rthdr->hdrlen >> 1) != rthdr->segments_left) + { + goto exit_f; + } + + opt->opt_nflen += ((rthdr->hdrlen + 1) << 3); + opt->srcrt = rthdr; + err = 0; + + break; + default: + printk(KERN_DEBUG "invalid cmsg type: %d\n", + cmsg->cmsg_type); + break; + } + } + + exit_f: + return err; +} diff -u --recursive --new-file v2.1.7/linux/net/ipv6/exthdrs.c linux/net/ipv6/exthdrs.c --- v2.1.7/linux/net/ipv6/exthdrs.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/exthdrs.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,173 @@ +/* + * Extension Header handling for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * $Id: exthdrs.c,v 1.7 1996/09/12 18:44:18 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * inbound + */ + +int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev, + __u8 *nhptr, struct ipv6_options *opt) +{ + struct sk_buff *skb = *skb_ptr; + struct in6_addr *addr; + struct in6_addr daddr; + int addr_type = 0; + int strict = 0; + __u32 bit_map; + int pos; + int n, i; + + struct ipv6_rt_hdr *hdr = (struct ipv6_rt_hdr *) skb->h.raw; + struct rt0_hdr *rthdr; + + if (hdr->segments_left == 0) + { + struct ipv6_options *opt; + + opt = (struct ipv6_options *) skb->proto_priv; + opt->srcrt = hdr; + + skb->h.raw += (hdr->hdrlen + 1) << 3; + return hdr->nexthdr; + } + + if (hdr->type != IPV6_SRCRT_TYPE_0 || hdr->hdrlen & 0x01 || + hdr->hdrlen > 46) + { + /* + * Discard + */ + + pos = (__u8 *) hdr - (__u8 *) skb->ipv6_hdr + 2; + + if (hdr->type) + pos += 2; + else + pos += 1; + + icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev); + kfree_skb(skb, FREE_READ); + return 0; + } + + /* + * This is the routing header forwarding algorithm from + * RFC 1883, page 17. + */ + + n = hdr->hdrlen >> 1; + + if (hdr->segments_left > n) + { + pos = (__u8 *) hdr - (__u8 *) skb->ipv6_hdr + 2; + + pos += 3; + + icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev); + kfree_skb(skb, FREE_READ); + return 0; + } + + i = n - --hdr->segments_left; + + rthdr = (struct rt0_hdr *) hdr; + addr = rthdr->addr; + addr += i - 1; + + addr_type = ipv6_addr_type(addr); + + if (addr_type == IPV6_ADDR_MULTICAST) + { + kfree_skb(skb, FREE_READ); + return 0; + } + + ipv6_addr_copy(&daddr, addr); + ipv6_addr_copy(addr, &skb->ipv6_hdr->daddr); + ipv6_addr_copy(&skb->ipv6_hdr->daddr, &daddr); + + /* + * Check Strick Source Route + */ + + bit_map = ntohl(rthdr->bitmap); + + if ((bit_map & (1 << i)) == IPV6_SRCRT_STRICT) + { + strict = 1; + } + + ipv6_forward(skb, dev, (strict ? IP6_FW_STRICT : 0) | IP6_FW_SRCRT); + + return 0; +} + + +/* + * outbound + */ + +int ipv6opt_bld_rthdr(struct sk_buff *skb, struct ipv6_options *opt, + struct in6_addr *addr, int proto) +{ + struct rt0_hdr *phdr, *ihdr; + int hops; + + ihdr = (struct rt0_hdr *) opt->srcrt; + + phdr = (struct rt0_hdr *) skb_put(skb, (ihdr->rt_hdr.hdrlen + 1) << 3); + memcpy(phdr, ihdr, sizeof(struct ipv6_rt_hdr)); + + hops = ihdr->rt_hdr.hdrlen >> 1; + + if (hops > 1) + { + memcpy(phdr->addr, ihdr->addr + 1, + (hops - 1) * sizeof(struct in6_addr)); + } + + ipv6_addr_copy(phdr->addr + (hops - 1), addr); + + phdr->rt_hdr.nexthdr = proto; + + return NEXTHDR_ROUTING; +} + +/* + * Local variables: + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/icmp.c linux/net/ipv6/icmp.c --- v2.1.7/linux/net/ipv6/icmp.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/icmp.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,553 @@ +/* + * Internet Control Message Protocol (ICMPv6) + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Based on net/ipv4/icmp.c + * + * RFC 1885 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * ICMP socket for flow control. + */ + +static struct socket icmpv6_socket; + +int icmpv6_rcv(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + struct ipv6_options *opt, unsigned short len, + int redo, struct inet6_protocol *protocol); + +static struct inet6_protocol icmpv6_protocol = +{ + icmpv6_rcv, /* handler */ + NULL, /* error control */ + NULL, /* next */ + IPPROTO_ICMPV6, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "ICMPv6" /* name */ +}; + + + +struct icmpv6_msg { + struct icmpv6hdr icmph; + __u8 *data; + struct in6_addr *daddr; + int len; + __u32 csum; +}; + + + +/* + * getfrag callback + * not static because it's needed in ndisc.c + */ + +static void icmpv6_getfrag(const void *data, struct in6_addr *saddr, + char *buff, unsigned int offset, unsigned int len) +{ + struct icmpv6_msg *msg = (struct icmpv6_msg *) data; + struct icmpv6hdr *icmph; + __u32 csum; + + /* + * in theory offset must be 0 since we never send more + * than 576 bytes on an error or more than the path mtu + * on an echo reply. (those are the rules on RFC 1883) + */ + + if (offset) + { + csum = csum_partial_copy((void *) msg->data + + offset - sizeof(struct icmpv6hdr), + buff, len, msg->csum); + msg->csum = csum; + return; + } + + csum = csum_partial_copy((void *) &msg->icmph, buff, + sizeof(struct icmpv6hdr), msg->csum); + + csum = csum_partial_copy((void *) msg->data, + buff + sizeof(struct icmpv6hdr), + len - sizeof(struct icmpv6hdr), csum); + + icmph = (struct icmpv6hdr *) buff; + + icmph->checksum = csum_ipv6_magic(saddr, msg->daddr, msg->len, + IPPROTO_ICMPV6, csum); +} + +/* + * an inline helper for the "simple" if statement bellow + * checks if parameter problem report is caused by an + * unrecognized IPv6 option that has the Option Type + * highest-order two bits set to 10 + */ +static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) +{ + char *buff = (char *) skb->ipv6_hdr; + + return ( ( *(buff + offset) & 0xC0 ) == 0x80 ); +} + +/* + * Send an ICMP message in response to a packet in error + */ + +void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, + struct device *dev) +{ + struct ipv6hdr *hdr = skb->ipv6_hdr; + struct sock *sk = (struct sock *) icmpv6_socket.data; + struct in6_addr *saddr = NULL; + struct device *src_dev = NULL; + struct icmpv6_msg msg; + int addr_type = 0; + int optlen; + int len; + + /* + * sanity check pointer in case of parameter problem + */ + + if (type == ICMPV6_PARAMETER_PROB && + (info > (skb->tail - ((unsigned char *) hdr)))) + { + printk(KERN_DEBUG "icmpv6_send: bug! pointer > skb\n"); + return; + } + + /* + * Make sure we respect the rules + * i.e. RFC 1885 2.4(e) + * Rule (e.1) is enforced by not using icmpv6_send + * in any code that processes icmp errors. + */ + + addr_type = ipv6_addr_type(&hdr->daddr); + + if (ipv6_chk_addr(&hdr->daddr)) + { + saddr = &hdr->daddr; + } + + /* + * Dest addr check + */ + + if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) + { + if (type != ICMPV6_PKT_TOOBIG && + !(type == ICMPV6_PARAMETER_PROB && + code == ICMPV6_UNK_OPTION && + (opt_unrec(skb, info)))) + { + return; + } + + saddr = NULL; + } + + addr_type = ipv6_addr_type(&hdr->saddr); + + /* + * Source addr check + */ + + if (addr_type & IPV6_ADDR_LINKLOCAL) + { + src_dev = skb->dev; + } + + /* + * Must not send if we know that source is Anycast also. + * for now we don't know that. + */ + if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) + { + printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); + return; + } + + /* + * ok. kick it. checksum will be provided by the + * getfrag_t callback. + */ + + msg.icmph.type = type; + msg.icmph.code = code; + msg.icmph.checksum = 0; + msg.icmph.icmp6_pointer = htonl(info); + + msg.data = (__u8 *) skb->ipv6_hdr; + msg.csum = 0; + msg.daddr = &hdr->saddr; + /* + if (skb->opt) + optlen = skb->opt->optlen; + else + */ + + optlen = 0; + + len = min(skb->tail - ((unsigned char *) hdr), + 576 - sizeof(struct ipv6hdr) - sizeof(struct icmpv6hdr) + - optlen); + + if (len < 0) + { + printk(KERN_DEBUG "icmp: len problem\n"); + return; + } + + len += sizeof(struct icmpv6hdr); + + msg.len = len; + + + ipv6_build_xmit(sk, icmpv6_getfrag, &msg, &hdr->saddr, len, + saddr, src_dev, NULL, IPPROTO_ICMPV6, 1); +} + +static void icmpv6_echo_reply(struct sk_buff *skb) +{ + struct sock *sk = (struct sock *) icmpv6_socket.data; + struct ipv6hdr *hdr = skb->ipv6_hdr; + struct icmpv6hdr *icmph = (struct icmpv6hdr *) skb->h.raw; + struct in6_addr *saddr; + struct icmpv6_msg msg; + unsigned char *data; + int len; + + data = (char *) (icmph + 1); + + saddr = &hdr->daddr; + + if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST) + saddr = NULL; + + len = skb->tail - data; + len += sizeof(struct icmpv6hdr); + + msg.icmph.type = ICMPV6_ECHO_REPLY; + msg.icmph.code = 0; + msg.icmph.checksum = 0; + msg.icmph.icmp6_identifier = icmph->icmp6_identifier; + msg.icmph.icmp6_sequence = icmph->icmp6_sequence; + + msg.data = data; + msg.csum = 0; + msg.len = len; + msg.daddr = &hdr->saddr; + + ipv6_build_xmit(sk, icmpv6_getfrag, &msg, &hdr->saddr, len, saddr, + skb->dev, NULL, IPPROTO_ICMPV6, 1); +} + +static __inline__ int ipv6_ext_hdr(u8 nexthdr) +{ + /* + * find out if nexthdr is an extension header or a protocol + */ + return ( (nexthdr == NEXTHDR_HOP) || + (nexthdr == NEXTHDR_ROUTING) || + (nexthdr == NEXTHDR_FRAGMENT) || + (nexthdr == NEXTHDR_ESP) || + (nexthdr == NEXTHDR_AUTH) || + (nexthdr == NEXTHDR_NONE) || + (nexthdr == NEXTHDR_DEST) ); + +} + +static void icmpv6_notify(int type, int code, unsigned char *buff, int len, + struct in6_addr *saddr, struct in6_addr *daddr, + struct inet6_protocol *protocol) +{ + struct ipv6hdr *hdr = (struct ipv6hdr *) buff; + struct inet6_protocol *ipprot; + struct sock *sk; + char * pbuff; + __u32 info = 0; + int hash; + u8 nexthdr; + + /* now skip over extension headers */ + + nexthdr = hdr->nexthdr; + + pbuff = (char *) (hdr + 1); + len -= sizeof(struct ipv6hdr); + + while (ipv6_ext_hdr(nexthdr)) + { + int hdrlen; + + if (nexthdr == NEXTHDR_NONE) + return; + + nexthdr = *pbuff; + hdrlen = *(pbuff+1); + + if (((hdrlen + 1) << 3) > len) + return; + + pbuff += hdrlen; + len -= hdrlen; + } + + hash = nexthdr & (MAX_INET_PROTOS -1); + + for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; + ipprot != NULL; + ipprot=(struct inet6_protocol *)ipprot->next) + { + if (ipprot->protocol != nexthdr) + continue; + + if (ipprot->err_handler) + { + ipprot->err_handler(type, code, pbuff, info, + saddr, daddr, ipprot); + } + return; + } + + /* delivery to upper layer protocols failed. try raw sockets */ + + sk = rawv6_prot.sock_array[hash]; + + if (sk == NULL) + { + return; + } + + while ((sk = inet6_get_sock_raw(sk, nexthdr, daddr, saddr))) + { + rawv6_err(sk, type, code, pbuff, saddr, daddr); + sk = sk->next; + } + + return; +} + +/* + * Handle icmp messages + */ + +int icmpv6_rcv(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + struct ipv6_options *opt, unsigned short len, + int redo, struct inet6_protocol *protocol) +{ + struct ipv6hdr *orig_hdr; + struct icmpv6hdr *hdr = (struct icmpv6hdr *) skb->h.raw; + int ulen; + + /* perform checksum */ + + + switch (skb->ip_summed) { + case CHECKSUM_NONE: + skb->csum = csum_partial((char *)hdr, len, 0); + case CHECKSUM_HW: + if (csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, + skb->csum)) + { + printk(KERN_DEBUG "icmpv6 checksum failed\n"); + goto discard_it; + } + default: + /* CHECKSUM_UNNECESSARY */ + } + + /* + * length of original packet carried in skb + */ + ulen = skb->tail - (unsigned char *) (hdr + 1); + + switch (hdr->type) { + + case ICMPV6_ECHO_REQUEST: + icmpv6_echo_reply(skb); + break; + + case ICMPV6_ECHO_REPLY: + /* we coulnd't care less */ + break; + + case ICMPV6_PKT_TOOBIG: + orig_hdr = (struct ipv6hdr *) (hdr + 1); + if (ulen >= sizeof(struct ipv6hdr)) + { + rt6_handle_pmtu(&orig_hdr->daddr, + ntohl(hdr->icmp6_mtu)); + } + + /* + * Drop through to notify + */ + + case ICMPV6_DEST_UNREACH: + case ICMPV6_TIME_EXCEEDED: + case ICMPV6_PARAMETER_PROB: + + icmpv6_notify(hdr->type, hdr->code, (char *) (hdr + 1), ulen, + saddr, daddr, protocol); + break; + + case NDISC_ROUTER_SOLICITATION: + case NDISC_ROUTER_ADVERTISEMENT: + case NDISC_NEIGHBOUR_SOLICITATION: + case NDISC_NEIGHBOUR_ADVERTISEMENT: + case NDISC_REDIRECT: + ndisc_rcv(skb, dev, saddr, daddr, opt, len); + break; + + case ICMPV6_MEMBERSHIP_QUERY: + case ICMPV6_MEMBERSHIP_REPORT: + case ICMPV6_MEMBERSHIP_REDUCTION: + /* forward the packet to the igmp module */ + break; + + default: + printk(KERN_DEBUG "icmpv6: msg of unkown type\n"); + + /* informational */ + if (hdr->type & 0x80) + { + goto discard_it; + } + + /* + * error of unkown type. + * must pass to upper level + */ + + icmpv6_notify(hdr->type, hdr->code, (char *) (hdr + 1), ulen, + saddr, daddr, protocol); + } + + discard_it: + + kfree_skb(skb, FREE_READ); + return 0; +} + +void icmpv6_init(struct proto_ops *ops) +{ + struct sock *sk; + int err; + + icmpv6_socket.type=SOCK_RAW; + icmpv6_socket.ops=ops; + + if((err=ops->create(&icmpv6_socket, IPPROTO_ICMPV6))<0) + printk(KERN_DEBUG + "Failed to create the ICMP control socket.\n"); + + MOD_DEC_USE_COUNT; + + sk = icmpv6_socket.data; + sk->allocation = GFP_ATOMIC; + sk->num = 256; /* Don't receive any data */ + + inet6_add_protocol(&icmpv6_protocol); +} + +static struct icmp6_err { + int err; + int fatal; +} tab_unreach[] = { + { ENETUNREACH, 0}, /* NOROUTE */ + { EACCES, 1}, /* ADM_PROHIBITED */ + { EOPNOTSUPP, 1}, /* NOT_NEIGHBOUR */ + { EHOSTUNREACH, 0}, /* ADDR_UNREACH */ + { ECONNREFUSED, 1}, /* PORT_UNREACH */ +}; + +int icmpv6_err_convert(int type, int code, int *err) +{ + int fatal = 0; + + *err = 0; + + switch (type) { + case ICMPV6_DEST_UNREACH: + if (code <= ICMPV6_PORT_UNREACH) + { + *err = tab_unreach[code].err; + fatal = tab_unreach[code].fatal; + } + break; + + case ICMPV6_PKT_TOOBIG: + *err = EMSGSIZE; + break; + + case ICMPV6_PARAMETER_PROB: + *err = EPROTO; + fatal = 1; + break; + }; + + return fatal; +} + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o icmp.o icmp.c" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/ipv6_input.c linux/net/ipv6/ipv6_input.c --- v2.1.7/linux/net/ipv6/ipv6_input.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/ipv6_input.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,437 @@ +/* + * IPv6 input + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * Ian P. Morris + * + * Based in linux/net/ipv4/ip_input.c + * + * $Id: ipv6_input.c,v 1.13 1996/10/11 16:03:06 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * Header processing function list + * We process headers in order (as per RFC) + * If the processing function returns 0 the packet is considered + * delivered else it returns the value of the nexthdr. + * The ptr field of the function points to the previous nexthdr field. + * This is allows the processing function to change it if it's sematics + * is: return a new packet without this header (like fragmentation). + * When a next_header value is not within the list + * the inet protocol list is searched (i.e. to deliver to + * TCP for instance) + */ + +static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev, __u8 *nhptr, + struct ipv6_options *opt); + + +struct hdrtype_proc { + u8 type; + int (*func) (struct sk_buff **, struct device *dev, __u8 *ptr, + struct ipv6_options *opt); +} hdrproc_lst[] = { + /* + TODO + + {NEXTHDR_HOP, ipv6_hop_by_hop} + */ + {NEXTHDR_ROUTING, ipv6_routing_header}, + {NEXTHDR_FRAGMENT, ipv6_reassembly}, + + {NEXTHDR_DEST, ipv6_dest_opt}, + /* + {NEXTHDR_AUTH, ipv6_auth_hdr}, + {NEXTHDR_ESP, ipv6_esp_hdr}, + */ + {NEXTHDR_MAX, NULL} +}; + +/* New header structures */ + + +struct ipv6_tlvtype { + u8 type; + u8 len; +}; + +struct ipv6_destopt_hdr { + u8 nexthdr; + u8 hdrlen; +}; + + +struct tlvtype_proc { + u8 type; + int (*func) (struct sk_buff *, struct device *dev, __u8 *ptr, + struct ipv6_options *opt); + + /* these functions do NOT update skb->h.raw */ + +} tlvprocdestopt_lst[] = { + {255, NULL} +}; + + +static int parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb, + struct device *dev, __u8 *nhptr, struct ipv6_options *opt, + void *lastopt) +{ + struct ipv6_tlvtype *hdr; + struct tlvtype_proc *curr; + int pos; + + while ((hdr=(struct ipv6_tlvtype *)skb->h.raw) != lastopt) + switch (hdr->type & 0x3F) + { + case 0: /* TLV encoded Pad1 */ + skb->h.raw++; + break; + + case 1: /* TLV encoded PadN */ + skb->h.raw += hdr->len+2; + break; + + default: /* Other TLV code so scan list */ + for (curr=procs; curr->type != 255; curr++) + if (curr->type == (hdr->type & 0x3F)) + { + curr->func(skb, dev, nhptr, opt); + skb->h.raw += hdr->len+2; + break; + } + + if (curr->type==255) + { + /* unkown type */ + pos= (__u8 *) skb->h.raw - (__u8 *) skb->ipv6_hdr; + /* I think this is correct please check - IPM */ + + switch ((hdr->type & 0xC0) >> 6) { + case 0: /* ignore */ + skb->h.raw += hdr->len+2; + break; + + case 1: /* drop packet */ + kfree_skb(skb, FREE_READ); + return 0; + + case 2: /* send ICMP PARM PROB regardless and + drop packet */ + icmpv6_send(skb, ICMPV6_PARAMETER_PROB, + 2, pos, dev); + kfree_skb(skb, FREE_READ); + return 0; + + case 3: /* Send ICMP if not a multicast address + and drop packet */ + if (!(ipv6_addr_type(&(skb->ipv6_hdr->daddr)) & IPV6_ADDR_MULTICAST) ) + icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 2, pos, dev); + kfree_skb(skb, FREE_READ); + return 0; + } + } + break; + } + + return 1; +} + + + +static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev, __u8 *nhptr, + struct ipv6_options *opt) +{ + struct sk_buff *skb=*skb_ptr; + struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw; + + if (parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt,skb->h.raw+hdr->hdrlen)) + return hdr->nexthdr; + else + return 0; +} + + + +/* + * 0 - deliver + * 1 - block + */ +static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +{ + struct icmpv6hdr *icmph; + struct raw6_opt *opt; + + opt = &sk->tp_pinfo.tp_raw; + icmph = (struct icmpv6hdr *) (skb->ipv6_hdr + 1); + return test_bit(icmph->type, &opt->filter); +} + +/* + * demultiplex raw sockets. + * (should consider queueing the skb in the sock receive_queue + * without calling rawv6.c) + */ +static struct sock * ipv6_raw_deliver(struct sk_buff *skb, + struct device *dev, + struct ipv6_options *opt, + __u16 nexthdr, + __u16 len, + struct in6_addr *saddr, + struct in6_addr *daddr) +{ + struct sock *sk, *sk2; + __u8 hash; + + hash = nexthdr & (SOCK_ARRAY_SIZE-1); + + sk = rawv6_prot.sock_array[hash]; + + + /* + * The first socket found will be delivered after + * delivery to transport protocols. + */ + + if (sk == NULL) + return NULL; + + sk = inet6_get_sock_raw(sk, nexthdr, daddr, saddr); + + if (sk) + { + sk2 = sk; + + while ((sk2 = inet6_get_sock_raw(sk2->next, nexthdr, + daddr, saddr))) + { + struct sk_buff *buff; + + if (nexthdr == IPPROTO_ICMPV6 && + icmpv6_filter(sk2, skb)) + { + continue; + } + buff = skb_clone(skb, GFP_ATOMIC); + buff->sk = sk2; + rawv6_rcv(buff, dev, saddr, daddr, opt, len); + } + } + + if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb)) + { + sk = NULL; + } + + return sk; +} + +int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct inet6_ifaddr *ifp; + struct ipv6_options *opt = (struct ipv6_options *) skb->proto_priv; + struct ipv6hdr *hdr; + u8 hash; + u8 addr_type; + struct inet6_protocol *ipprot; + struct sock *raw_sk; + int found = 0; + int nexthdr = 0; + __u8 *nhptr; + int pkt_len; + + hdr = skb->ipv6_hdr = (struct ipv6hdr *) skb->h.raw; + + if (skb->len < sizeof(struct ipv6hdr) || hdr->version != 6) + { + ipv6_statistics.Ip6InHdrErrors++; + printk(KERN_DEBUG "ipv6_rcv: broken header\n"); + kfree_skb(skb, FREE_READ); + return 0; + } + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + { + printk(KERN_DEBUG "ipv6_rcv: invalid payload length\n"); + kfree_skb(skb, FREE_READ); + return 0; + } + + skb_trim(skb, pkt_len + sizeof(struct ipv6hdr)); + + /* check daddr */ + + /* Accounting & Firewall check */ + + addr_type = ipv6_addr_type(&hdr->daddr); + + if (addr_type & IPV6_ADDR_MULTICAST) + { + /* + * if mcast address is not for one of our groups + * either pass it to mcast router or discard it + */ + + if (ipv6_chk_mcast_addr(dev, &hdr->daddr) == 0) + { + /* something like: + if (acting_as_router) + ipv6_mcast_route(skb, ...) + else + */ + kfree_skb(skb, FREE_READ); + return 0; + } + } + + if (addr_type & IPV6_ADDR_MULTICAST || + (ifp = ipv6_chk_addr(&hdr->daddr))) + { + + /* loop in a cicle parsing nexthdrs */ + + skb->h.raw += sizeof(struct ipv6hdr); + + /* extension header processing must update skb->h.raw */ + + nexthdr = hdr->nexthdr; + nhptr = &hdr->nexthdr; + + + while(1) + { + struct hdrtype_proc *hdrt; + + /* check for extension header */ + + for (hdrt=hdrproc_lst; hdrt->type != NEXTHDR_MAX; hdrt++) + { + if (hdrt->type == nexthdr) + { + if ((nexthdr = hdrt->func(&skb, dev, nhptr, opt))) + { + nhptr = skb->h.raw; + hdr = skb->ipv6_hdr; + continue; + } + return 0; + } + } + break; + + } + + /* + * deliver to raw sockets + * should we deliver raw after or before parsing + * extension headers ? + * delivering after means we do reassembly of datagrams + * in ip. + */ + + pkt_len = skb->tail - skb->h.raw; + + raw_sk = ipv6_raw_deliver(skb, dev, opt, nexthdr, pkt_len, + &hdr->saddr, &hdr->daddr); + + /* check inet6_protocol list */ + + hash = nexthdr & (MAX_INET_PROTOS -1); + for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; + ipprot != NULL; + ipprot = (struct inet6_protocol *) ipprot->next) + { + struct sk_buff *buff = skb; + + if (ipprot->protocol != nexthdr) + continue; + + if (ipprot->copy || raw_sk) + buff = skb_clone(skb, GFP_ATOMIC); + + + ipprot->handler(buff, dev, + &hdr->saddr, &hdr->daddr, + opt, pkt_len, + 0, ipprot); + found = 1; + } + + if (raw_sk) + { + skb->sk = raw_sk; + rawv6_rcv(skb, dev, &hdr->saddr, &hdr->daddr, opt, + htons(hdr->payload_len)); + found = 1; + } + + /* not found: send ICMP parameter problem back */ + + if (!found) + { + printk(KERN_DEBUG "proto not found %d\n", nexthdr); + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + } + + } + else + { + if (ipv6_forwarding) + { + if (addr_type & IPV6_ADDR_LINKLOCAL) + { + printk(KERN_DEBUG + "link local pkt to forward\n"); + kfree_skb(skb, FREE_READ); + return 0; + } + ipv6_forward(skb, dev, 0); + } + else + { + printk(KERN_WARNING "IPV6: packet to forward -" + "host not configured as router\n"); + kfree_skb(skb, FREE_READ); + } + } + + return 0; +} + +/* + * Local variables: + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/ipv6_output.c linux/net/ipv6/ipv6_output.c --- v2.1.7/linux/net/ipv6/ipv6_output.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/ipv6_output.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,964 @@ +/* + * IPv6 output functions + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Based on linux/net/ipv4/ip_output.c + * + * $Id: ipv6_output.c,v 1.19 1996/10/16 18:34:16 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +static u32 ipv6_fragmentation_id = 1; +int ipv6_forwarding = 0; /* default: host */ + +static int __inline__ ipv6_build_mac_header(struct sk_buff *skb, + struct device *dev, + struct neighbour *neigh, + int len) +{ + int mac; + int hdrlen = 0; + + skb->arp = 1; + skb->nexthop = neigh; + + + if (dev->hard_header_len) + { + skb_reserve(skb, (dev->hard_header_len + 15) & ~15); + + if (neigh && (neigh->flags & NCF_HHVALID)) + { + /* + * Cached hardware header + */ + + memcpy(skb_push(skb, dev->hard_header_len), + neigh->hh_data, dev->hard_header_len); + + return dev->hard_header_len; + } + + if (dev->hard_header) + { + mac = dev->hard_header(skb, dev, ETH_P_IPV6, + NULL, NULL, len); + + if (mac < 0) + { + hdrlen = -mac; + skb->arp = 0; + } + else + { + hdrlen = mac; + } + } + else + hdrlen = dev->hard_header_len; + } + + return hdrlen; +} + +void ipv6_redo_mac_hdr(struct sk_buff *skb, struct neighbour *neigh, int len) +{ + struct device *dev = neigh->dev; + int mac; + + skb->dev = dev; + skb->nexthop = neigh; + skb->arp = 1; + + skb_pull(skb, (unsigned char *) skb->ipv6_hdr - skb->data); + + /* + * neighbour cache should have the ether address + * cached... use it + */ + + if (dev->hard_header) + { + if (neigh && (neigh->flags & NCF_HHVALID)) + { + /* + * Cached hardware header + */ + + memcpy(skb_push(skb, dev->hard_header_len), + neigh->hh_data, dev->hard_header_len); + return; + } + + mac = dev->hard_header(skb, dev, ETH_P_IPV6, + NULL, NULL, len); + + if (mac < 0) + { + skb->arp = 0; + } + + } +} + +void default_output_method(struct sk_buff *skb, struct rt6_info *rt) +{ + struct sock *sk = skb->sk; + struct device *dev = skb->dev; + + if (dev->flags & IFF_UP) + { + /* + * If we have an owner use its priority setting, + * otherwise use NORMAL + */ + + if (sk != NULL) + { + dev_queue_xmit(skb, dev, sk->priority); + } + else + { + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + } + } + else + { + if(sk) + sk->err = ENETDOWN; + + ipv6_statistics.Ip6OutDiscards++; + + kfree_skb(skb, FREE_WRITE); + } +} + +/* + * xmit an sk_buff (used by TCP) + * sk can be NULL (for sending RESETs) + */ +int ipv6_xmit(struct sock *sk, struct sk_buff *skb, struct in6_addr *saddr, + struct in6_addr *daddr, struct ipv6_options *opt, int proto) +{ + struct ipv6hdr *hdr; + struct dest_entry *dc; + struct ipv6_pinfo *np = NULL; + struct device *dev = skb->dev; + int seg_len; + int addr_type; + int rt_flags = 0; + + + addr_type = ipv6_addr_type(daddr); + + if (addr_type & (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_SITELOCAL)) + { + /* + * force device match on route lookup + */ + + rt_flags |= RTI_DEVRT; + } + + if (skb->localroute) + { + rt_flags |= RTI_GATEWAY; + } + + hdr = skb->ipv6_hdr; + + + if (sk) + { + np = &sk->net_pinfo.af_inet6; + } + + if (np && np->dest) + { + dc = ipv6_dst_check(np->dest, daddr, np->dc_sernum, rt_flags); + } + else + { + dc = ipv6_dst_route(daddr, dev, rt_flags); + } + + if (dc == NULL) + { + ipv6_statistics.Ip6OutNoRoutes++; + return(-ENETUNREACH); + } + + dev = dc->rt.rt_dev; + + if (saddr == NULL) + { + struct inet6_ifaddr *ifa; + + ifa = ipv6_get_saddr((struct rt6_info *) dc, daddr); + + if (ifa == NULL) + { + printk(KERN_DEBUG + "ipv6_xmit: get_saddr failed\n"); + return -ENETUNREACH; + } + + saddr = &ifa->addr; + + if (np) + { + ipv6_addr_copy(&np->saddr, saddr); + } + } + + seg_len = skb->tail - ((unsigned char *) hdr); + + /* + * Link Layer headers + */ + + skb->sk = sk; + skb->protocol = __constant_htons(ETH_P_IPV6); + skb->free = 1; + skb->dev = dev; + + ipv6_redo_mac_hdr(skb, dc->dc_nexthop, seg_len); + + /* + * Fill in the IPv6 header + */ + + hdr->version = 6; + hdr->priority = np ? np->priority : 0; + + if (np) + memcpy(hdr->flow_lbl, (void *) &np->flow_lbl, 3); + else + memset(hdr->flow_lbl, 0, 3); + + hdr->payload_len = htons(seg_len - sizeof(struct ipv6hdr)); + hdr->nexthdr = proto; + hdr->hop_limit = np ? np->hop_limit : ipv6_hop_limit; + + memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr)); + memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr)); + + + /* + * Options + */ + + + /* + * Output the packet + */ + + ipv6_statistics.Ip6OutRequests++; + + if (dc->rt.rt_output_method) + { + (*dc->rt.rt_output_method)(skb, (struct rt6_info *) dc); + } + else + default_output_method(skb, (struct rt6_info *) dc); + + /* + * Update serial number of cached dest_entry or + * release destination cache entry + */ + + if (np) + { + np->dest = dc; + if (dc->rt.fib_node) + { + np->dc_sernum = dc->rt.fib_node->fn_sernum; + } + } + else + { + ipv6_dst_unlock(dc); + } + + return 0; +} + +/* + * To avoid extra problems ND packets are send through this + * routine. It's code duplication but i really want to avoid + * extra checks since ipv6_build_header is used by TCP (which + * is for us performace critical) + */ + +int ipv6_bld_hdr_2(struct sock *sk, struct sk_buff *skb, struct device *dev, + struct neighbour *neigh, + struct in6_addr *saddr, struct in6_addr *daddr, + int proto, int len) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6hdr *hdr; + int hdrlen = 0; + + skb->dev = dev; + + /* build MAC header */ + hdrlen += ipv6_build_mac_header(skb, dev, neigh, len); + + /* build fixed IPv6 header */ + + if (proto == IPPROTO_RAW) + return hdrlen; + + + hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + skb->ipv6_hdr = hdr; + + hdr->version = 6; + hdr->priority = np->priority & 0x0f; + + memset(hdr->flow_lbl, 0, 3); + + hdr->hop_limit = np->hop_limit; + + if (saddr == NULL) + { + printk(KERN_DEBUG "bug: bld_hdr called with no saddr\n"); + return -ENETUNREACH; + } + + memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr)); + memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr)); + + hdrlen += sizeof(struct ipv6hdr); + + hdr->nexthdr = proto; + + return hdrlen; +} + +void ipv6_queue_xmit(struct sock *sk, struct device *dev, struct sk_buff *skb, + int free) +{ + struct ipv6hdr *hdr; + u32 seg_len; + + hdr = skb->ipv6_hdr; + skb->sk = sk; + skb->protocol = __constant_htons(ETH_P_IPV6); + skb->free=1; + + seg_len = skb->tail - ((unsigned char *) hdr); + + hdr->payload_len = htons(seg_len - sizeof(struct ipv6hdr)); + + if (dev == NULL) + { + printk(KERN_DEBUG "ipv6_queue_xmit: unknown device\n"); + return; + } + + skb->dev = dev; + + ipv6_statistics.Ip6OutRequests++; + + + /* + * Multicast loopback + */ + + if (dev->flags & IFF_UP) + { + /* + * If we have an owner use its priority setting, + * otherwise use NORMAL + */ + + if (sk != NULL) + { + dev_queue_xmit(skb, dev, sk->priority); + } + else + { + dev_queue_xmit(skb, dev, SOPRI_NORMAL); + } + } + else + { + if(sk) + sk->err = ENETDOWN; + + ipv6_statistics.Ip6OutDiscards++; + + kfree_skb(skb, FREE_WRITE); + } + +} + + +int ipv6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data, + struct in6_addr *dest, unsigned short int length, + struct in6_addr *saddr, struct device *dev, + struct ipv6_options *opt, int proto, + int noblock) +{ + rt6_output_method_t output_method = default_output_method; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct dest_entry *dc = NULL; + struct in6_addr *daddr = dest; + struct ipv6hdr *hdr; + struct neighbour *neigh; + int addr_type; + int pktlength; + int pmtu = 0; + int rt_flags = 0; + + + if (opt && opt->srcrt) + { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + daddr = rt0->addr; + } + + addr_type = ipv6_addr_type(daddr); + if (addr_type & IPV6_ADDR_MULTICAST && dev == NULL) + { + dev = np->mc_if; + } + + if (addr_type & (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_SITELOCAL | + IPV6_ADDR_MULTICAST)) + { + /* + * force device match on route lookup + */ + + rt_flags |= RTI_DEVRT; + } + + if (sk->localroute) + { + rt_flags |= RTI_GATEWAY; + } + + if (np->dest) + { + np->dest = ipv6_dst_check(np->dest, daddr, np->dc_sernum, + rt_flags); + + dc = np->dest; + + if (dc && dc->rt.fib_node) + { + np->dc_sernum = dc->rt.fib_node->fn_sernum; + } + else + { + printk(KERN_WARNING "dc entry not in table\n"); + } + } + else + { + dc = ipv6_dst_route(daddr, dev, rt_flags); + } + + if (dc == NULL) + { + if ((addr_type & IPV6_ADDR_MULTICAST) && dev) + { + neigh = NULL; + pmtu = dev->mtu; + } + else + { + ipv6_statistics.Ip6OutNoRoutes++; + return(-ENETUNREACH); + } + } + else + { + neigh = dc->dc_nexthop; + dev = neigh->dev; + + if (dc->rt.rt_output_method) + { + output_method = dc->rt.rt_output_method; + } + + if (dc->dc_flags & DCF_PMTU) + pmtu = dc->dc_pmtu; + else + pmtu = dev->mtu; + } + + + if (saddr == NULL) + { + struct inet6_ifaddr *ifa; + + ifa = ipv6_get_saddr((struct rt6_info *) dc, daddr); + + if (ifa == NULL) + { + printk(KERN_DEBUG + "ipv6_build_xmit: get_saddr failed\n"); + return -ENETUNREACH; + } + + saddr = &ifa->addr; + } + + if (dc && np->dest == NULL) + { + ipv6_dst_unlock(dc); + } + + pktlength = length; + + if (!sk->ip_hdrincl) + { + pktlength += sizeof(struct ipv6hdr); + if (opt) + { + pktlength += opt->opt_flen + opt->opt_nflen; + } + } + + + dev_lock_list(); + + /* + * reminder: don't allow fragmentation for IPPROTO_RAW + */ + + + if (pktlength <= pmtu) + { + int error; + + struct sk_buff *skb = + sock_alloc_send_skb(sk, pktlength+15+ + dev->hard_header_len, + 0, noblock, &error); + + if (skb == NULL) + { + ipv6_statistics.Ip6OutDiscards++; + dev_unlock_list(); + return error; + + } + + skb->dev=dev; + skb->protocol = htons(ETH_P_IPV6); + skb->free=1; + skb->when=jiffies; + skb->sk=sk; + skb->arp=0; + + /* build the mac header... */ + ipv6_build_mac_header(skb, dev, neigh, pktlength); + + hdr = (struct ipv6hdr *) skb->tail; + + if (!sk->ip_hdrincl) + { + skb_put(skb, sizeof(struct ipv6hdr)); + skb->ipv6_hdr = hdr; + + hdr->version = 6; + hdr->priority = np->priority; + + memcpy(hdr->flow_lbl, &np->flow_lbl, 3); + + hdr->payload_len = htons(pktlength - + sizeof(struct ipv6hdr)); + + hdr->hop_limit = np->hop_limit; + + memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr)); + memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr)); + + if (opt && opt->srcrt) + { + hdr->nexthdr = ipv6opt_bld_rthdr(skb, opt, + dest, proto); + + } + else + hdr->nexthdr = proto; + } + + skb_put(skb, length); + getfrag(data, &hdr->saddr, + ((char *) hdr) + (pktlength - length), 0, length); + + ipv6_statistics.Ip6OutRequests++; + (*output_method)(skb, (struct rt6_info *) dc); + + dev_unlock_list(); + return 0; + } + else + { + /* + * Fragmentation + */ + + /* + * Extension header order: + * Hop-by-hop -> Routing -> Fragment -> rest (...) + * + * We must build the non-fragmented part that + * will be in every packet... this also means + * that other extension headers (Dest, Auth, etc) + * must be considered in the data to be fragmented + */ + + struct sk_buff *last_skb; + struct frag_hdr *fhdr; + int unfrag_len; + int payl_len; + int frag_len; + int last_len; + int nfrags; + int err; + int fhdr_dist; + __u32 id; + + if (sk->ip_hdrincl) + { + return -EMSGSIZE; + } + + id = ipv6_fragmentation_id++; + + unfrag_len = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr); + payl_len = length; + + if (opt) + { + unfrag_len += opt->opt_nflen; + payl_len += opt->opt_flen; + } + + nfrags = payl_len / ((pmtu - unfrag_len) & ~0x7); + + /* + * Length of fragmented part on every packet but + * the last must be an: + * "integer multiple of 8 octects". + */ + + frag_len = (pmtu - unfrag_len) & ~0x7; + + /* + * We must send from end to start because of + * UDP/ICMP checksums. We do a funny trick: + * fill the last skb first with the fixed + * header (and its data) and then use it + * to create the following segments and send it + * in the end. If the peer is checking the M_flag + * to trigger the reassembly code then this + * might be a good idea. + */ + + last_len = payl_len - (nfrags * frag_len); + + if (last_len == 0) + { + last_len = frag_len; + nfrags--; + } + + last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len + + dev->hard_header_len + 15, + 0, noblock, &err); + + if (last_skb == NULL) + { + dev_unlock_list(); + return err; + } + + last_skb->dev=dev; + last_skb->protocol = htons(ETH_P_IPV6); + last_skb->free=1; + last_skb->when=jiffies; + last_skb->sk=sk; + last_skb->arp=0; + + /* + * build the mac header... + */ + ipv6_build_mac_header(last_skb, dev, neigh, + unfrag_len + frag_len); + + hdr = (struct ipv6hdr *) skb_put(last_skb, + sizeof(struct ipv6hdr)); + last_skb->ipv6_hdr = hdr; + + hdr->version = 6; + hdr->priority = np->priority; + + memcpy(hdr->flow_lbl, &np->flow_lbl, 3); + hdr->payload_len = htons(unfrag_len + frag_len - + sizeof(struct ipv6hdr)); + + hdr->hop_limit = np->hop_limit; + + hdr->nexthdr = NEXTHDR_FRAGMENT; + + memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr)); + memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr)); + + if (opt && opt->srcrt) + { + hdr->nexthdr = ipv6opt_bld_rthdr(last_skb, opt, dest, + NEXTHDR_FRAGMENT); + } + + fhdr = (struct frag_hdr *) + skb_put(last_skb, sizeof(struct frag_hdr)); + + memset(fhdr, 0, sizeof(struct frag_hdr)); + + fhdr->nexthdr = proto; + fhdr->frag_off = ntohs(nfrags * frag_len); + fhdr->identification = id; + + fhdr_dist = (unsigned char *) fhdr - last_skb->data; + + getfrag(data, &hdr->saddr, last_skb->tail, nfrags * frag_len, + last_len); + + while (nfrags--) + { + struct sk_buff *skb; + + struct frag_hdr *fhdr2; + + printk(KERN_DEBUG "sending frag %d\n", nfrags); + skb = skb_copy(last_skb, sk->allocation); + + fhdr2 = (struct frag_hdr *) (skb->data + fhdr_dist); + /* more flag on */ + fhdr2->frag_off = ntohs(nfrags * frag_len + 1); + + /* if (nfrags == 0) + put rest of headers + */ + + getfrag(data, &hdr->saddr, skb_put(skb, frag_len), + nfrags * frag_len, frag_len); + + ipv6_statistics.Ip6OutRequests++; + (*output_method)(skb, (struct rt6_info *) dc); + } + + printk(KERN_DEBUG "sending last frag \n"); + + hdr->payload_len = htons(unfrag_len + last_len - + sizeof(struct ipv6hdr)); + + /* + * update last_skb to reflect the getfrag we did + * on start. + */ + last_skb->tail += last_len; + last_skb->len += last_len; + + /* + * toss the mac header out and rebuild it. + * needed because of the different frame length. + * ie: not needed for an ethernet. + */ + + if (dev->type != ARPHRD_ETHER && last_len != frag_len) + { + ipv6_redo_mac_hdr(last_skb, neigh, + unfrag_len + last_len); + } + + ipv6_statistics.Ip6OutRequests++; + (*output_method)(last_skb, (struct rt6_info *) dc); + + dev_unlock_list(); + return 0; + } + return -1; +} + +static int pri_values[4] = +{ + SOPRI_BACKGROUND, + SOPRI_NORMAL, + SOPRI_NORMAL, + SOPRI_INTERACTIVE +}; + +void ipv6_forward(struct sk_buff *skb, struct device *dev, int flags) +{ + struct neighbour *neigh; + struct dest_entry *dest; + int priority; + int rt_flags; + int size; + int pmtu; + + if (skb->ipv6_hdr->hop_limit <= 1) + { + icmpv6_send(skb, ICMPV6_TIME_EXCEEDED, ICMPV6_EXC_HOPLIMIT, + 0, dev); + + kfree_skb(skb, FREE_READ); + return; + } + + skb->ipv6_hdr->hop_limit--; + + if (ipv6_addr_type(&skb->ipv6_hdr->saddr) & IPV6_ADDR_LINKLOCAL) + { + printk(KERN_DEBUG "ipv6_forward: link local source addr\n"); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOT_NEIGHBOUR, + 0, dev); + kfree_skb(skb, FREE_READ); + return; + } + + rt_flags = RTF_MODIFIED; + + if ((flags & IP6_FW_STRICT)) + { + rt_flags |= RTF_GATEWAY; + } + + dest = ipv6_dst_route(&skb->ipv6_hdr->daddr, NULL, rt_flags); + + if (dest == NULL) + { + int code; + + if (flags & IP6_FW_STRICT) + code = ICMPV6_NOT_NEIGHBOUR; + else + code = ICMPV6_NOROUTE; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, dev); + + kfree_skb(skb, FREE_READ); + return; + } + + neigh = dest->dc_nexthop; + + if (neigh->dev == dev && (dev->flags & IFF_MULTICAST) && + !(flags & IP6_FW_SRCRT)) + { + struct in6_addr *target = NULL; + + /* + * outgoing device equal to incoming device + * send a redirect + */ + + if ((dest->dc_flags & RTF_GATEWAY)) + { + target = &neigh->addr; + } + else + { + target = &skb->ipv6_hdr->daddr; + } + + ndisc_send_redirect(skb, neigh, target); + } + + pmtu = neigh->dev->mtu; + + size = sizeof(struct ipv6hdr) + ntohs(skb->ipv6_hdr->payload_len); + + if (size > pmtu) + { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, pmtu, dev); + kfree_skb(skb, FREE_READ); + return; + } + + ipv6_dst_unlock(dest); + + if (skb_headroom(skb) < neigh->dev->hard_header_len) + { + struct sk_buff *buff; + + buff = alloc_skb(neigh->dev->hard_header_len + skb->len + 15, + GFP_ATOMIC); + + if (buff == NULL) + { + return; + } + + skb_reserve(buff, (neigh->dev->hard_header_len + 15) & ~15); + + buff->protocol = __constant_htons(ETH_P_IPV6); + buff->free = 1; + buff->h.raw = skb_put(buff, size); + + memcpy(buff->h.raw, skb->ipv6_hdr, size); + buff->ipv6_hdr = (struct ipv6hdr *) buff->h.raw; + kfree_skb(skb, FREE_READ); + skb = buff; + } + + ipv6_redo_mac_hdr(skb, neigh, size); + + priority = skb->ipv6_hdr->priority; + + priority = (priority & 0x7) >> 1; + priority = pri_values[priority]; + + if (dev->flags & IFF_UP) + { + dev_queue_xmit(skb, neigh->dev, priority); + } + else + { + ipv6_statistics.Ip6OutDiscards++; + kfree_skb(skb, FREE_READ); + } +} + + +/* + * Local variables: + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/ipv6_route.c linux/net/ipv6/ipv6_route.c --- v2.1.7/linux/net/ipv6/ipv6_route.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/ipv6_route.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,1905 @@ +/* + * IPv6 routing table + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PROC_FS +#include +#endif + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +/* + * Routing Table + * + * simplified version of a radix tree + * + * - every node shares it's acestors prefix + * - the tree is ordered from less to most specific mask + * - default routes are handled apart + * + * this facilitates recursion a lot + */ + +static struct rt6_info null_entry = { + NULL, NULL, + {{{0}}}, + 0, 1, + NULL, NULL, + 0, 0, RTF_REJECT +}; + +struct fib6_node routing_table = { + NULL, NULL, NULL, &null_entry, + 0, RTN_ROOT, 0 +}; + +struct rt6_info *default_rt_list = NULL; +struct rt6_info *loopback_rt = NULL; + +/* + * last_resort_rt - no routers present. + * Assume all destinations on link. + */ +struct rt6_info *last_resort_rt = NULL; + +static struct rt6_req request_queue = { + 0, NULL, &request_queue, &request_queue +}; + + +/* + * A routing update causes an increase of the serial number on the + * afected subtree. This allows for cached routes to be asynchronously + * tested when modifications are made to the destination cache as a + * result of redirects, path MTU changes, etc. + */ + +static __u32 rt_sernum = 0; + +static atomic_t rt6_lock = 0; +static int rt6_bh_mask = 0; + +#define RT_BH_REQUEST 1 +#define RT_BH_GC 2 + +static void __rt6_run_bh(void); + +typedef void (*f_pnode)(struct fib6_node *fn, void *); + +static void rt6_walk_tree(f_pnode func, void * arg, int filter); +static void rt6_rt_timeout(struct fib6_node *fn, void *arg); +static int rt6_msgrcv(struct sk_buff *skb); + +struct rt6_statistics rt6_stats = { + 1, 0, 1, 1, 0 +}; + +static atomic_t rt_clients = 0; + +void rt6_timer_handler(unsigned long data); + +struct timer_list rt6_gc_timer = { + NULL, + NULL, + 0, + 0, + rt6_timer_handler +}; + +static __inline__ void rt6_run_bh(void) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + if (rt6_lock == 0 && rt6_bh_mask) + { + __rt6_run_bh(); + } + restore_flags(flags); +} + +/* + * request queue operations + * FIFO queue/dequeue + */ +static __inline__ void rtreq_queue(struct rt6_req * req) +{ + unsigned long flags; + struct rt6_req *next = &request_queue; + + save_flags(flags); + cli(); + + req->prev = next->prev; + req->prev->next = req; + next->prev = req; + req->next = next; + restore_flags(flags); +} + +static __inline__ struct rt6_req * rtreq_dequeue(void) +{ + struct rt6_req *next = &request_queue; + struct rt6_req *head; + + head = next->next; + + if (head == next) + { + return NULL; + } + + head->next->prev = head->prev; + next->next = head->next; + + head->next = NULL; + head->prev = NULL; + + return head; +} + +/* + * compare "prefix length" bits of an address + */ +static __inline__ int addr_match(struct in6_addr *a1, struct in6_addr *a2, + int prefixlen) +{ + int pdw; + int pbi; + + pdw = prefixlen >> 0x05; /* num of whole __u32 in prefix */ + pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ + + if (pdw) + { + if (memcmp(a1, a2, pdw << 2)) + return 0; + } + + if (pbi) + { + __u32 w1, w2; + __u32 mask; + + w1 = a1->s6_addr32[pdw]; + w2 = a2->s6_addr32[pdw]; + + mask = htonl((0xffffffff) << (0x20 - pbi)); + + if ((w1 ^ w2) & mask) + return 0; + } + + return 1; +} + +/* + * test bit. range [0-127] + */ + +static __inline__ int addr_bit_set(struct in6_addr *addr, int fn_bit) +{ + int dw; + __u32 b1; + __u32 mask; + int bit = fn_bit; + + dw = bit >> 0x05; + + b1 = addr->s6_addr32[dw]; + + bit = ~bit; + bit &= 0x1f; + mask = htonl(1 << bit); + return (b1 & mask); +} + +static __inline__ int addr_bit_equal(struct in6_addr *a1, struct in6_addr *a2, + int fn_bit) +{ + int dw; + __u32 b1, b2; + __u32 mask; + int bit = fn_bit; + + dw = bit >> 0x05; + + b1 = a1->s6_addr32[dw]; + b2 = a2->s6_addr32[dw]; + + bit = ~bit; + bit &= 0x1f; + mask = htonl(1 << bit); + return !((b1 ^ b2) & mask); +} + +/* + * find the first different bit between two addresses + */ +static __inline__ int addr_diff(struct in6_addr *a1, struct in6_addr *a2) +{ + int i; + + for (i = 0; i<4; i++) + { + __u32 b1, b2; + __u32 xb; + + b1 = a1->s6_addr32[i]; + b2 = a2->s6_addr32[i]; + + xb = b1 ^ b2; + + if (xb) + { + int res = 0; + int j=31; + + xb = ntohl(xb); + + while (test_bit(j, &xb) == 0) + { + res++; + j--; + } + + return (i * 32 + res); + } + } + + /* + * bit values are in range [0-127] + * 128 is an ilegal value as we should *never* get to + * this point since that would mean the addrs are equal + */ + return 128; +} + +/* + * add a rt to a node that may already contain routes + * sort routes in ascending metric order so that fib lookup + * returns the smallest metric by default + */ + +static __inline__ void fib6_add_rt2node(struct fib6_node *fn, + struct rt6_info *rt) +{ + struct rt6_info *iter, **back; + + rt->fib_node = fn; + back = &fn->leaf; + + for (iter = fn->leaf; iter; iter=iter->next) + { + if (iter->rt_metric > rt->rt_metric) + { + break; + } + + back = &iter->next; + } + + rt->next = iter; + *back = rt; +} + +/* + * Routing Table + */ + +static int fib6_add_1(struct rt6_info *rt) +{ + struct fib6_node *fn; + struct fib6_node *pn = NULL; + struct fib6_node *in; + struct fib6_node *ln; + struct in6_addr *addr; + __u32 bit; + __u32 dir = 0; + __u32 sernum = ++rt_sernum; + int pbit = rt->rt_prefixlen - 1; + + addr = &rt->rt_dst; + + /* insert node in tree */ + + fn = &routing_table; + + for (;;) + { + if (fn == NULL) + { + ln = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC); + + if (ln == NULL) + return (-ENOMEM); + + memset(ln, 0, sizeof(struct fib6_node)); + ln->fn_bit = pbit; + ln->fn_flags = RTN_BACKTRACK; + + ln->parent = pn; + ln->leaf = rt; + ln->fn_sernum = sernum; + rt->fib_node = ln; + + atomic_inc(&rt->rt_ref); + + if (dir) + pn->right = ln; + else + pn->left = ln; + + rt6_stats.fib_nodes++; + rt6_stats.fib_route_nodes++; + rt6_stats.fib_rt_entries++; + + return(0); + } + + if (addr_match(&fn->leaf->rt_dst, addr, fn->fn_bit)) + { + if (pbit == fn->fn_bit && + addr_bit_equal(addr, &fn->leaf->rt_dst, + rt->rt_prefixlen)) + { + /* clean up an intermediate node */ + if ((fn->fn_flags & RTN_BACKTRACK) == 0) + { + rt_release(fn->leaf); + fn->leaf = NULL; + fn->fn_flags |= RTN_BACKTRACK; + } + + fib6_add_rt2node(fn, rt); + fn->fn_sernum = sernum; + atomic_inc(&rt->rt_ref); + + rt6_stats.fib_route_nodes++; + rt6_stats.fib_rt_entries++; + + return 0; + } + + if (pbit > fn->fn_bit) + { + /* walk down on tree */ + + fn->fn_sernum = sernum; + + dir = addr_bit_set(addr, fn->fn_bit); + pn = fn; + fn = dir ? fn->right: fn->left; + + continue; + } + } + + /* + * split since we don't have a common prefix anymore or + * we have a less significant route. + * we've to insert an intermediate node on the list + * this new node will point to the one we need to create + * and the current + */ + + pn = fn->parent; + + /* find 1st bit in difference between the 2 addrs */ + bit = addr_diff(addr, &fn->leaf->rt_dst); + + + /* + * (intermediate) + * / \ + * (new leaf node) (old node) + */ + if (rt->rt_prefixlen > bit) + { + in = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC); + + if (in == NULL) + return (-ENOMEM); + + memset(in, 0, sizeof(struct fib6_node)); + + /* + * new intermediate node. + * RTN_BACKTRACK will + * be off since that an address that chooses one of + * the branches would not match less specific routes + * int the other branch + */ + + in->fn_bit = bit; + in->parent = pn; + in->leaf = rt; + in->fn_sernum = sernum; + atomic_inc(&rt->rt_ref); + + /* leaf node */ + ln = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC); + + if (ln == NULL) + { + kfree(in); + return (-ENOMEM); + } + + /* update parent pointer */ + if (dir) + pn->right = in; + else + pn->left = in; + + memset(ln, 0, sizeof(struct fib6_node)); + ln->fn_bit = pbit; + ln->fn_flags = RTN_BACKTRACK; + + ln->parent = in; + fn->parent = in; + + ln->leaf = rt; + ln->fn_sernum = sernum; + atomic_inc(&rt->rt_ref); + + rt->fib_node = ln; + + if (addr_bit_set(addr, bit)) + { + in->right = ln; + in->left = fn; + } + else + { + in->left = ln; + in->right = fn; + } + + rt6_stats.fib_nodes += 2; + rt6_stats.fib_route_nodes++; + rt6_stats.fib_rt_entries++; + + return 0; + } + + /* + * (new leaf node) + * / \ + * (old node) NULL + */ + + ln = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC); + + if (ln == NULL) + return (-ENOMEM); + + memset(ln, 0, sizeof(struct fib6_node)); + ln->fn_bit = pbit; + ln->fn_flags = RTN_BACKTRACK; + + + ln->parent = pn; + ln->leaf = rt; + ln->fn_sernum = sernum; + atomic_inc(&rt->rt_ref); + + rt->fib_node = ln; + + if (dir) + pn->right = ln; + else + pn->left = ln; + + + if (addr_bit_set(&fn->leaf->rt_dst, pbit)) + ln->right = fn; + else + ln->left = fn; + + fn->parent = ln; + + rt6_stats.fib_nodes++; + rt6_stats.fib_route_nodes++; + rt6_stats.fib_rt_entries++; + + return(0); + } + + return (-1); +} + +static struct rt6_info * fib6_lookup_1(struct in6_addr *addr, int flags) +{ + struct fib6_node *fn, *next; + int dir; + + fn = &routing_table; + + for (;;) + { + dir = addr_bit_set(addr, fn->fn_bit); + + next = dir ? fn->right: fn->left; + + if (next) + { + fn = next; + continue; + } + + break; + } + + + while ((fn->fn_flags & RTN_ROOT) == 0) + { + if (fn->fn_flags & RTN_BACKTRACK) + { + if (addr_match(&fn->leaf->rt_dst, addr, + fn->leaf->rt_prefixlen)) + { + struct rt6_info *rt; + + for (rt = fn->leaf; rt; rt = rt->next) + { + if ((rt->rt_flags & flags) == 0) + return rt; + } + } + } + + fn = fn->parent; + } + + return NULL; +} + + + +/* + * called to trim the tree of intermediate nodes when possible + */ + +static void fib6_del_3(struct fib6_node *fn) +{ + int children = 0; + int dir = 0; + int bit; + + /* + * 0 or one children: + * delete the node + * + * 2 children: + * move the bit down + */ + + if (fn->left) + { + children++; + dir = 0; + } + + if (fn->right) + { + children++; + dir = 1; + } + + if (children < 2) + { + struct fib6_node *child; + + child = dir ? fn->right : fn->left; + + if (fn->parent->left == fn) + { + fn->parent->left = child; + } + else + { + fn->parent->right = child; + } + + if (child) + { + child->parent = fn->parent; + } + + /* + * try to collapse on top + */ + if ((fn->parent->fn_flags & (RTN_BACKTRACK | RTN_ROOT)) == 0) + { + if (fn->leaf) + { + rt_release(fn->leaf); + fn->leaf = NULL; + } + fib6_del_3(fn->parent); + } + if (fn->fn_flags & RTN_BACKTRACK) + { + rt6_stats.fib_route_nodes--; + } + rt6_stats.fib_nodes--; + kfree(fn); + return; + } + + bit = addr_diff(&fn->left->leaf->rt_dst, &fn->right->leaf->rt_dst); + + fn->fn_bit = bit; + fn->fn_flags &= ~RTN_BACKTRACK; + fn->leaf = fn->left->leaf; + + rt6_stats.fib_route_nodes--; +} + +static struct fib6_node * fib6_del_2(struct in6_addr *addr, __u32 prefixlen, + struct in6_addr *gw, struct device *dev) +{ + struct fib6_node *fn; + + for (fn = &routing_table; fn;) + { + int dir; + + if ((fn->fn_flags & RTN_BACKTRACK) && + prefixlen == fn->leaf->rt_prefixlen && + addr_match(&fn->leaf->rt_dst, addr, fn->leaf->rt_prefixlen) + ) + { + break; + } + + dir = addr_bit_set(addr, fn->fn_bit); + + fn = dir ? fn->right: fn->left; + } + + /* + * if route tree node found + * search among it's entries + */ + + if (fn) + { + struct rt6_info *back = NULL; + struct rt6_info *lf; + + for(lf = fn->leaf; lf; lf=lf->next) + { + if ((gw && (ipv6_addr_cmp(addr, &lf->rt_dst) == 0)) || + (dev && dev == lf->rt_dev)) + { + /* delete this entry */ + if (back == NULL) + fn->leaf = lf->next; + else + back->next = lf->next; + + lf->fib_node = NULL; + rt_release(lf); + return fn; + } + back = lf; + } + } + + return NULL; +} + +static struct fib6_node * fib6_del_rt_2(struct in6_addr *addr, __u32 prefixlen, + struct rt6_info *rt) +{ + struct fib6_node *fn; + + for (fn = &routing_table; fn;) + { + int dir; + + if ((fn->fn_flags & RTN_BACKTRACK) && + prefixlen == fn->leaf->rt_prefixlen && + addr_match(&fn->leaf->rt_dst, addr, fn->leaf->rt_prefixlen) + ) + { + break; + } + + dir = addr_bit_set(addr, fn->fn_bit); + + fn = dir ? fn->right: fn->left; + } + + /* + * if route tree node found + * search among its entries + */ + + if (fn) + { + struct rt6_info *back = NULL; + struct rt6_info *lf; + + for(lf = fn->leaf; lf; lf=lf->next) + { + if (rt == lf) + { + /* delete this entry */ + if (back == NULL) + fn->leaf = lf->next; + else + back->next = lf->next; + + lf->fib_node = NULL; + rt_release(lf); + return fn; + } + back = lf; + } + } + + return NULL; +} + +int fib6_del_1(struct in6_addr *addr, __u32 prefixlen, struct in6_addr *gw, + struct device *dev) +{ + struct fib6_node *fn; + + fn = fib6_del_2(addr, prefixlen, gw, dev); + + if (fn == NULL) + return -ENOENT; + + if (fn->leaf == NULL) + { + fib6_del_3(fn); + } + + return 0; +} + +int fib6_del_rt(struct rt6_info *rt) +{ + struct fib6_node *fn; + + fn = fib6_del_rt_2(&rt->rt_dst, rt->rt_prefixlen, rt); + + if (fn == NULL) + return -ENOENT; + + if (fn->leaf == NULL) + { + fib6_del_3(fn); + } + + return 0; +} + +static void fib6_flush_1(struct fib6_node *fn, void *p_arg) +{ + struct rt6_info *rt; + + for (rt = fn->leaf; rt;) + { + struct rt6_info *itr; + + itr = rt; + rt = rt->next; + itr->fib_node = NULL; + rt_release(itr); + } + + if (fn->fn_flags & RTN_BACKTRACK) + { + rt6_stats.fib_route_nodes--; + } + rt6_stats.fib_nodes--; + kfree(fn); +} + +void fib6_flush(void) +{ + rt6_walk_tree(fib6_flush_1, NULL, 0); +} + +int ipv6_route_add(struct in6_rtmsg *rtmsg) +{ + struct rt6_info *rt; + struct device * dev = NULL; + struct rt6_req *request; + int flags = rtmsg->rtmsg_flags; + + dev = dev_get(rtmsg->rtmsg_device); + + rt = (struct rt6_info *) kmalloc(sizeof(struct rt6_info), + GFP_ATOMIC); + + rt6_stats.fib_rt_alloc++; + + memset(rt, 0, sizeof(struct rt6_info)); + + memcpy(&rt->rt_dst, &rtmsg->rtmsg_dst, sizeof(struct in6_addr)); + rt->rt_prefixlen = rtmsg->rtmsg_prefixlen; + + if (flags & (RTF_GATEWAY | RTF_NEXTHOP)) + { + /* check to see if its an acceptable gateway */ + if (flags & RTF_GATEWAY) + { + struct rt6_info *gw_rt; + + gw_rt = fibv6_lookup(&rtmsg->rtmsg_gateway, NULL, + RTI_GATEWAY); + + if (gw_rt == NULL) + { + return -EHOSTUNREACH; + } + + dev = gw_rt->rt_dev; + } + + rt->rt_nexthop = ndisc_get_neigh(dev, &rtmsg->rtmsg_gateway); + + if (rt->rt_nexthop == NULL) + { + printk(KERN_DEBUG "ipv6_route_add: no nexthop\n"); + kfree(rt); + return -EINVAL; + } + + rt->rt_dev = dev; + + if (loopback_rt == NULL && (dev->flags & IFF_LOOPBACK)) + { + loopback_rt = rt; + } + + } + else + { + if (dev == NULL) + { + printk(KERN_DEBUG "ipv6_route_add: NULL dev\n"); + kfree(rt); + return -EINVAL; + } + + rt->rt_dev = dev; + rt->rt_nexthop = NULL; + } + + rt->rt_metric = rtmsg->rtmsg_metric; + rt->rt_flags = rtmsg->rtmsg_flags; + + if (rt->rt_flags & RTF_ADDRCONF) + { + rt->rt_expires = rtmsg->rtmsg_info; + } + + request = kmalloc(sizeof(struct rt6_req), GFP_ATOMIC); + if (request == NULL) + { + printk(KERN_WARNING "ipv6_route_add: kmalloc failed\n"); + return -ENOMEM; + } + + request->operation = RT_OPER_ADD; + request->ptr = rt; + request->next = request->prev = NULL; + rtreq_queue(request); + rt6_bh_mask |= RT_BH_REQUEST; + + rt6_run_bh(); + + return 0; +} + +int ipv6_route_del(struct in6_rtmsg *rtmsg) +{ + struct rt6_info * rt; + + rt = fib6_lookup_1(&rtmsg->rtmsg_dst, 0); + if (!rt || (rt && (rt->rt_prefixlen != rtmsg->rtmsg_prefixlen))) + return -ENOENT; + return fib6_del_rt(rt); +} + +/* + * search the routing table + * the flags parameter restricts the search to entries where + * the flag is *not* set + */ +struct rt6_info * fibv6_lookup(struct in6_addr *addr, struct device *src_dev, + int flags) +{ + struct rt6_info *rt; + + if ((rt = fib6_lookup_1(addr, flags))) + { + if (src_dev) + { + for (; rt; rt=rt->next) + { + if (rt->rt_dev == src_dev) + return rt; + } + + if (flags & RTI_DEVRT) + { + return NULL; + } + } + + return rt; + } + + if (!(flags & RTI_GATEWAY)) + { + if ((rt = dflt_rt_lookup())) + { + return rt; + } + + return last_resort_rt; + } + + return NULL; +} + +/* + * Destination Cache + */ + +struct dest_entry * ipv6_dst_route(struct in6_addr * daddr, + struct device *src_dev, + int flags) +{ + struct dest_entry * dc = NULL; + struct rt6_info * rt; + + atomic_inc(&rt6_lock); + + rt = fibv6_lookup(daddr, src_dev, flags); + + if (rt == NULL) + { + goto exit; + } + + if (rt->rt_nexthop) + { + /* + * We can use the generic route + * (warning: the pmtu value maybe invalid) + */ + + dc = (struct dest_entry *) rt; + atomic_inc(&rt->rt_use); + } + else + { + struct rt6_req *request; + + if (ipv6_chk_addr(daddr) && !(rt->rt_dev->flags & IFF_LOOPBACK)) + { + rt = loopback_rt; + + if (rt == NULL) + { + goto exit; + } + } + + /* + * dynamicly allocate a new route + */ + + dc = (struct dest_entry *) kmalloc(sizeof(struct dest_entry), + GFP_ATOMIC); + + if (dc == NULL) + { + printk(KERN_WARNING "dst_route: kmalloc failed\n"); + goto exit; + } + + rt6_stats.fib_rt_alloc++; + rt6_stats.fib_dc_alloc++; + + memset(dc, 0, sizeof(struct dest_entry)); + + memcpy(&dc->dc_addr, daddr, sizeof(struct in6_addr)); + dc->rt.rt_prefixlen = 128; + dc->dc_usecnt = 1; + dc->rt.rt_metric = rt->rt_metric; + + dc->dc_flags = (rt->rt_flags | RTF_HOST | RTI_DYNAMIC | + RTI_DCACHE | DCF_PMTU); + + dc->dc_pmtu = rt->rt_dev->mtu; + dc->rt.rt_dev = rt->rt_dev; + dc->rt.rt_output_method = rt->rt_output_method; + dc->dc_tstamp = jiffies; + /* add it to the request queue */ + + request = kmalloc(sizeof(struct rt6_req), GFP_ATOMIC); + + if (request == NULL) + { + printk(KERN_WARNING "dst_route: kmalloc failed\n"); + dc = NULL; + goto exit; + } + + dc->dc_nexthop = ndisc_get_neigh(rt->rt_dev, daddr); + + rt6_bh_mask |= RT_BH_REQUEST; + + request->operation = RT_OPER_ADD; + request->ptr = (struct rt6_info *) dc; + request->next = request->prev = NULL; + rtreq_queue(request); + } + + atomic_inc(&rt_clients); + + exit: + + atomic_dec(&rt6_lock); + rt6_run_bh(); + + return dc; +} + +/* + * check cache entry for vality... + * this needs to be done as a inline func that calls + * ipv6_slow_dst_check if entry is invalid + */ + +struct dest_entry * ipv6_dst_check(struct dest_entry *dc, + struct in6_addr *daddr, + __u32 sernum, int flags) +{ + int uptodate = 0; + + /* + * destination cache becomes invalid when routing + * changes or a more specific dynamic entry is + * created. + * if route is removed from table fib_node will + * become NULL + */ + + if (dc->rt.fib_node && (dc->rt.fib_node->fn_sernum == sernum)) + uptodate = 1; + + if (uptodate && ((dc->dc_flags & DCF_INVALID) == 0)) + { + if (dc->dc_nexthop && !(dc->dc_nexthop->flags & NCF_NOARP)) + { + ndisc_event_send(dc->dc_nexthop, NULL); + } + return dc; + } + + /* route for destination may have changed */ + + ipv6_dst_unlock(dc); + + return ipv6_dst_route(daddr, NULL, flags); +} + +void ipv6_dst_unlock(struct dest_entry *dc) +{ + /* + * decrement counter and mark entry for deletion + * if counter reaches 0. we delay deletions in hope + * we can reuse cache entries. + */ + + atomic_dec(&dc->dc_usecnt); + + if (dc->dc_usecnt == 0) + { + + if (dc->dc_flags & RTI_DCACHE) + { + /* + * update last usage tstamp + */ + + dc->dc_tstamp = jiffies; + rt6_bh_mask |= RT_BH_GC; + } + + if (dc->rt.rt_ref == 0) + { + /* + * entry out of the routing table + * pending to be released on last deref + */ + + if (dc->dc_nexthop) + { + ndisc_dec_neigh(dc->dc_nexthop); + } + + if (dc->dc_flags & RTI_DCACHE) + { + rt6_stats.fib_dc_alloc--; + } + + rt6_stats.fib_rt_alloc--; + kfree(dc); + } + + } + + atomic_dec(&rt_clients); +} + +/* + * Received a packet too big icmp that lowers the mtu for this + * address. If the route for the destination is genric we create + * a new route with the apropriate MTU info. The route_add + * procedure will update the serial number on the generic routes + * belonging to the afected tree forcing clients to request a route + * lookup. + */ +void rt6_handle_pmtu(struct in6_addr *addr, int pmtu) +{ + struct rt6_info *rt; + struct rt6_req *req; + struct dest_entry *dc; + + printk(KERN_DEBUG "rt6_handle_pmtu\n"); + + if (pmtu < 0 || pmtu > 65536) + { + printk(KERN_DEBUG "invalid MTU value\n"); + return; + } + + rt = fibv6_lookup(addr, NULL, 0); + + if (rt == NULL) + { + printk(KERN_DEBUG "rt6_handle_pmtu: route not found\n"); + return; + } + + if (rt->rt_flags & RTI_DCACHE) + { + /* + * we do have a destination cache entry for this + * address. + */ + + dc = (struct dest_entry *) rt; + + /* + * fixme: some sanity checks are likely to be needed + * here + */ + + dc->dc_pmtu = pmtu; + dc->dc_flags |= DCF_PMTU; + return; + } + + req = (struct rt6_req *) kmalloc(sizeof(struct rt6_req), GFP_ATOMIC); + + /* now add the new destination cache entry */ + + dc = (struct dest_entry *) kmalloc(sizeof(struct dest_entry), + GFP_ATOMIC); + + rt6_stats.fib_rt_alloc++; + rt6_stats.fib_dc_alloc++; + + memset(dc, 0, sizeof(struct dest_entry)); + + memcpy(&dc->dc_addr, addr, sizeof(struct in6_addr)); + dc->rt.rt_prefixlen = 128; + dc->rt.rt_metric = rt->rt_metric; + + dc->dc_flags = (rt->rt_flags | RTI_DYNAMIC | RTI_DCACHE | DCF_PMTU | + RTF_HOST); + + dc->dc_pmtu = pmtu; + dc->dc_tstamp = jiffies; + + dc->dc_nexthop = rt->rt_nexthop; + atomic_inc(&dc->dc_nexthop->refcnt); + + dc->rt.rt_dev = rt->rt_dev; + dc->rt.rt_output_method = rt->rt_output_method; + + req->operation = RT_OPER_ADD; + req->ptr = (struct rt6_info *) dc; + req->next = req->prev = NULL; + + rtreq_queue(req); + + rt6_bh_mask |= RT_BH_REQUEST; + + rt6_run_bh(); +} + +/* + * Redirect received: target is nexthop for dest + */ +struct rt6_info * ipv6_rt_redirect(struct device *dev, struct in6_addr *dest, + struct in6_addr *target, int on_link) + +{ + struct rt6_info *rt; + struct rt6_req *req; + int metric; + + rt = fibv6_lookup(dest, dev, 0); + + if (rt == NULL) + { + printk(KERN_WARNING "rt_redirect: unable to locate route\n"); + return NULL; + } + + metric = rt->rt_metric; + + if ((rt->rt_flags & RTF_HOST) == 0) + { + /* Need to create an host route for this address */ + + rt = (struct rt6_info *) kmalloc(sizeof(struct rt6_info), + GFP_ATOMIC); + memset(rt, 0, sizeof(struct rt6_info)); + ipv6_addr_copy(&rt->rt_dst, dest); + rt->rt_prefixlen = 128; + rt->rt_flags = RTF_HOST | RTF_UP; + rt->rt_dev = dev; + + /* + * clone rt->rt_output_method ? + */ + + rt->rt_metric = metric; + + rt6_stats.fib_rt_alloc++; + + req = (struct rt6_req *) kmalloc(sizeof(struct rt6_req), + GFP_ATOMIC); + req->operation = RT_OPER_ADD; + req->ptr = rt; + req->next = req->prev = NULL; + + rtreq_queue(req); + rt6_bh_mask |= RT_BH_REQUEST; + } + else + { + rt->rt_flags |= RTF_MODIFIED; + } + + rt->rt_flags |= RTF_DYNAMIC; + + if (on_link) + { + rt->rt_flags &= ~RTF_GATEWAY; + } + else + { + rt->rt_flags |= RTF_GATEWAY; + } + + if (rt->rt_nexthop) + { + if (ipv6_addr_cmp(&rt->rt_nexthop->addr, target) == 0) + { + atomic_inc(&rt->rt_nexthop->refcnt); + goto exit; + } + else + { + ndisc_dec_neigh(rt->rt_nexthop); + } + } + + rt->rt_nexthop = ndisc_get_neigh(dev, target); + + exit: + rt6_run_bh(); + return rt; +} + +static int dcache_gc_node(struct fib6_node *fn, int timeout) +{ + struct rt6_info *rt, *back; + int more = 0; + unsigned long now = jiffies; + + back = NULL; + + for (rt = fn->leaf; rt;) + { + if ((rt->rt_flags & RTI_DCACHE) && rt->rt_use == 0) + { + struct dest_entry *dc; + + dc = (struct dest_entry *) rt; + + if (now - dc->dc_tstamp > timeout) + { + struct rt6_info *old; + + old = rt; + + rt = rt->next; + + if (back == NULL) + { + fn->leaf = rt; + } + else + { + back->next = rt; + } + + old->fib_node = NULL; + rt_release(old); + rt6_stats.fib_rt_entries--; + continue; + } + else + { + more++; + } + } + + back = rt; + rt = rt->next; + } + + if (fn->leaf == NULL) + { + return -1; + } + return more; +} + +struct dc_gc_args { + unsigned long timeout; + int more; +}; + +static void dc_garbage_collect(struct fib6_node *fn, void *p_arg) +{ + struct dc_gc_args * args = (struct dc_gc_args *) p_arg; + + if (fn->fn_flags & RTN_BACKTRACK) + { + if (fn->fn_bit == 127) + { + int more; + + more = dcache_gc_node(fn, args->timeout); + + if (more == -1) + { + if (fn->parent->left == fn) + fn->parent->left = NULL; + else + fn->parent->right = NULL; + + kfree(fn); + + rt6_stats.fib_nodes--; + rt6_stats.fib_route_nodes--; + + return; + } + args->more += more; + } + } + else if (!(fn->fn_flags & RTN_ROOT)) + { + int children = 0; + struct fib6_node *chld = NULL; + + if (fn->left) + { + children++; + chld = fn->left; + } + + if (fn->right) + { + children++; + chld = fn->right; + } + + if (children <= 1) + { + struct fib6_node *pn = fn->parent; + + if (pn->left == fn) + { + pn->left = chld; + } + else + { + pn->right = chld; + } + + if (chld) + { + chld->parent = pn; + } + + rt_release(fn->leaf); + + rt6_stats.fib_nodes--; + kfree(fn); + } + } +} + +/* + * called with ints off + */ + +static void __rt6_run_bh(void) +{ + static last_gc_run = 0; + + if (rt6_bh_mask & RT_BH_REQUEST) + { + struct rt6_req *request; + + while ((request = rtreq_dequeue())) + { + struct rt6_info *rt; + + rt = request->ptr; + + switch (request->operation) { + case RT_OPER_ADD: + fib6_add_1(rt); + break; + + case RT_OPER_DEL: + fib6_del_rt(rt); + break; + + default: + printk(KERN_WARNING + "rt6_run_bh: bad request in queue\n"); + } + + kfree(request); + } + + rt6_bh_mask &= ~RT_BH_REQUEST; + } + + if (rt6_bh_mask & RT_BH_GC) + { + if (jiffies - last_gc_run > DC_TIME_RUN) + { + struct dc_gc_args args; + + if (rt6_stats.fib_dc_alloc >= DC_WATER_MARK) + args.timeout = DC_SHORT_TIMEOUT; + else + args.timeout = DC_LONG_TIMEOUT; + + args.more = 0; + rt6_walk_tree(dc_garbage_collect, &args, 0); + + last_gc_run = jiffies; + + if (!args.more) + { + rt6_bh_mask &= ~RT_BH_GC; + } + } + } +} + +/* + * Timer for expiring routes learned via addrconf and stale DC + * entries when there is no network actuvity + */ + +void rt6_timer_handler(unsigned long data) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + if (rt6_lock == 0) + { + if (rt_clients == 0 && rt6_bh_mask) + { + __rt6_run_bh(); + } + + /* + * route expiry + */ + + rt6_walk_tree(rt6_rt_timeout, NULL, 1); + } + + restore_flags(flags); + + rt6_gc_timer.expires = jiffies + 4 * DC_LONG_TIMEOUT; + add_timer(&rt6_gc_timer); +} + +/* + * Check if routes should be timed out. + * Called from rt6_walk_tree for every node. + */ + +static void rt6_rt_timeout(struct fib6_node *fn, void *arg) +{ + struct rt6_info *rt; + unsigned long now = jiffies; + + for (rt = fn->leaf; rt; rt = rt->next) + { + if ((rt->rt_flags & RTF_ADDRCONF) && now > rt->rt_expires) + { + struct rt6_req *req; + + /* + * request route deletion. routes will only + * be deleted after walk_tree completes + */ + + req = (struct rt6_req *) kmalloc(sizeof(struct rt6_req), + GFP_ATOMIC); + req->operation = RT_OPER_DEL; + req->ptr = rt; + req->next = req->prev = NULL; + } + } +} + +int ipv6_route_ioctl(unsigned int cmd, void *arg) +{ + struct in6_rtmsg rtmsg; + int err; + + switch(cmd) + { + case SIOCADDRT: /* Add a route */ + case SIOCDELRT: /* Delete a route */ + if (!suser()) + return -EPERM; + err = copy_from_user(&rtmsg, arg, + sizeof(struct in6_rtmsg)); + if (err) + return -EFAULT; + return (cmd == SIOCDELRT) ? ipv6_route_del(&rtmsg) : + ipv6_route_add(&rtmsg); + } + + return -EINVAL; +} + +static void rt6_walk_tree(f_pnode func, void * arg, int filter) +{ + struct fib6_node *fn; + /* + * adquire lock + * this warranties that the operation will be atomic with + * respect to the garbage collect routine that also does + * a tree transversal and tags nodes with the RTN_TAG flag + */ + atomic_inc(&rt6_lock); + + fn = &routing_table; + + do { + if (!(fn->fn_flags & RTN_TAG)) + { + fn->fn_flags |= RTN_TAG; + + if (fn->left) + { + fn = fn->left; + continue; + } + } + + fn->fn_flags &= ~RTN_TAG; + + if (fn->right) + { + fn = fn->right; + continue; + } + + do { + struct fib6_node *node; + + if (fn->fn_flags & RTN_ROOT) + break; + node = fn; + fn = fn->parent; + + if (!(node->fn_flags & RTN_TAG) && + (!filter || (node->fn_flags & RTN_BACKTRACK))) + { + (*func)(node, arg); + } + + } while (!(fn->fn_flags & RTN_TAG)); + + } while (!(fn->fn_flags & RTN_ROOT) || (fn->fn_flags & RTN_TAG)); + + atomic_dec(&rt6_lock); +} + +#ifdef CONFIG_PROC_FS +#define RT6_INFO_LEN (32 + 2 + 32 + 2 + 2 + 2 + 4 + 8 + 7) + +struct rt6_proc_arg { + char *buffer; + int offset; + int skip; + int len; +}; + +static void rt6_info_node(struct fib6_node *fn, void *p_arg) +{ + struct rt6_info *rt; + struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; + + for (rt = fn->leaf; rt; rt = rt->next) + { + int i; + + if (arg->skip < arg->offset / RT6_INFO_LEN) + { + arg->skip++; + continue; + } + + for (i=0; i<16; i++) + { + sprintf(arg->buffer + arg->len, "%02x", + rt->rt_dst.s6_addr[i]); + arg->len += 2; + } + arg->len += sprintf(arg->buffer + arg->len, " %02x ", + rt->rt_prefixlen); + if (rt->rt_nexthop) + { + for (i=0; i<16; i++) + { + sprintf(arg->buffer + arg->len, "%02x", + rt->rt_nexthop->addr.s6_addr[i]); + arg->len += 2; + } + } + else + { + sprintf(arg->buffer + arg->len, + "00000000000000000000000000000000"); + arg->len += 32; + } + arg->len += sprintf(arg->buffer + arg->len, + " %02x %02x %02x %04x %8s\n", + rt->rt_metric, rt->rt_use, + rt->rt_ref, rt->rt_flags, + rt->rt_dev ? rt->rt_dev->name : ""); + } +} + +static int rt6_proc_info(char *buffer, char **start, off_t offset, int length, + int dummy) +{ + struct rt6_proc_arg arg; + struct fib6_node sfn; + arg.buffer = buffer; + arg.offset = offset; + arg.skip = 0; + arg.len = 0; + + rt6_walk_tree(rt6_info_node, &arg, 1); + + sfn.leaf = default_rt_list; + rt6_info_node(&sfn, &arg); + + sfn.leaf = last_resort_rt; + rt6_info_node(&sfn, &arg); + + *start = buffer; + + if (offset) + *start += offset % RT6_INFO_LEN; + + arg.len -= offset % RT6_INFO_LEN; + + if (arg.len > length) + arg.len = length; + + return arg.len; +} + + +static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length, + int dummy) +{ + int len; + + len = sprintf(buffer, "%04x %04x %04x %04x %04x\n", + rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, + rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, + rt6_stats.fib_dc_alloc); + + len -= offset; + + if (len > length) + len = length; + + *start = buffer + offset; + + return len; +} + +#endif /* CONFIG_PROC_FS */ + +void ipv6_route_init(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_register(&(struct proc_dir_entry) { + PROC_NET_RT6, 6, "route6", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + rt6_proc_info + }); + proc_net_register(&(struct proc_dir_entry) { + PROC_NET_RT6_STATS, 9, "rt6_stats", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + rt6_proc_stats + }); + +#endif + rt6_gc_timer.expires = jiffies + 4 * DC_LONG_TIMEOUT; + add_timer(&rt6_gc_timer); + netlink_attach(NETLINK_ROUTE6, rt6_msgrcv); +} + +#ifdef MODULE +void ipv6_route_cleanup(void) +{ + proc_net_unregister(PROC_NET_RT6); + proc_net_unregister(PROC_NET_RT6_STATS); + netlink_detach(NETLINK_ROUTE6); + del_timer(&rt6_gc_timer); + fib6_flush(); +} +#endif + +/* + * NETLINK interface + * routing socket moral equivalent + */ + +static int rt6_msgrcv(struct sk_buff *skb) +{ + int count = 0; + struct in6_rtmsg *rtmsg; + + while (skb->len) + { + if (skb->len < sizeof(struct in6_rtmsg)) + { + count = -EINVAL; + goto out; + } + + rtmsg = (struct in6_rtmsg *) skb->data; + skb_pull(skb, sizeof(struct in6_rtmsg)); + count += sizeof(struct in6_rtmsg); + + switch (rtmsg->rtmsg_type) { + case RTMSG_NEWROUTE: + ipv6_route_add(rtmsg); + break; + case RTMSG_DELROUTE: + ipv6_route_del(rtmsg); + break; + default: + count = -EINVAL; + goto out; + } + } + + out: + kfree_skb(skb, FREE_READ); + return count; +} + +void rt6_sndmsg(__u32 type, struct in6_addr *dst, struct in6_addr *gw, + __u16 plen, __u16 metric, char *devname, __u16 flags) +{ + struct sk_buff *skb; + struct in6_rtmsg *msg; + + skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC); + msg = (struct in6_rtmsg *) skb_put(skb, sizeof(struct in6_rtmsg)); + + msg->rtmsg_type = type; + + if (dst) + { + ipv6_addr_copy(&msg->rtmsg_dst, dst); + } + else + memset(&msg->rtmsg_dst, 0, sizeof(struct in6_addr)); + + if (gw) + { + ipv6_addr_copy(&msg->rtmsg_gateway, gw); + } + else + memset(&msg->rtmsg_gateway, 0, sizeof(struct in6_addr)); + + msg->rtmsg_prefixlen = plen; + msg->rtmsg_metric = metric; + strcpy(msg->rtmsg_device, devname); + msg->rtmsg_flags = flags; + + if (netlink_post(NETLINK_ROUTE6, skb)) + { + kfree_skb(skb, FREE_WRITE); + } +} diff -u --recursive --new-file v2.1.7/linux/net/ipv6/ipv6_sockglue.c linux/net/ipv6/ipv6_sockglue.c --- v2.1.7/linux/net/ipv6/ipv6_sockglue.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/ipv6_sockglue.c Sun Nov 3 11:10:33 1996 @@ -0,0 +1,290 @@ +/* + * IPv6 BSD socket options interface + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Based on linux/net/ipv4/ip_sockglue.c + * + * $Id: ipv6_sockglue.c,v 1.12 1996/10/29 22:45:53 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct ipv6_mib ipv6_statistics={0, }; +struct packet_type ipv6_packet_type = +{ + 0, + NULL, /* All devices */ + ipv6_rcv, + NULL, + NULL +}; + +/* + * addrconf module should be notifyed of a device going up + */ +static struct notifier_block ipv6_dev_notf = { + addrconf_notify, + NULL, + 0 +}; + +int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, + int optlen) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int val, err; + int retv = -EOPNOTSUPP; + + if(level!=SOL_IPV6) + goto out; + + if (optval == NULL) + { + val=0; + } + else + { + err = get_user(val, (int *) optval); + if(err) + return err; + } + + + switch (optname) { + + case IPV6_ADDRFORM: + if (val == PF_INET) + { + if (sk->protocol != IPPROTO_UDP && + sk->protocol != IPPROTO_TCP) + { + goto out; + } + + if (sk->state != TCP_ESTABLISHED) + { + retv = ENOTCONN; + goto out; + } + + if (!(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) + { + retv = -EADDRNOTAVAIL; + goto out; + } + + if (sk->protocol == IPPROTO_TCP) + { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + sk->prot = &tcp_prot; + tp->af_specific = &ipv4_specific; + } + else + { + sk->prot = &udp_prot; + } + sk->socket->ops = &inet_proto_ops; + retv = 0; + } + else + { + retv = -EINVAL; + } + break; + + case IPV6_RXINFO: + np->rxinfo = val; + retv = 0; + break; + + case IPV6_UNICAST_HOPS: + if (val > 255) + { + retv = -EINVAL; + } + else + { + np->hop_limit = val; + retv = 0; + } + break; + + case IPV6_MULTICAST_HOPS: + if (val > 255) + { + retv = -EINVAL; + } + else + { + np->mcast_hops = val; + retv = 0; + } + break; + + case IPV6_MULTICAST_LOOP: + np->mc_loop = val; + break; + + case IPV6_MULTICAST_IF: + { + struct in6_addr addr; + + err=verify_area(VERIFY_READ, optval, sizeof(struct in6_addr)); + if(err) + return err; + + err = copy_from_user(&addr, optval, sizeof(struct in6_addr)); + if(err) + return -EFAULT; + + if (ipv6_addr_any(&addr)) + { + np->mc_if = NULL; + } + else + { + struct inet6_ifaddr *ifp; + + ifp = ipv6_chk_addr(&addr); + + if (ifp == NULL) + { + retv = -EADDRNOTAVAIL; + break; + } + + np->mc_if = ifp->idev->dev; + } + retv = 0; + break; + } + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + { + struct ipv6_mreq mreq; + struct inet6_ifaddr *ifp; + struct device *dev = NULL; + int err; + + err = copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)); + if(err) + return -EFAULT; + + if (ipv6_addr_any(&mreq.ipv6mr_interface)) + { + /* + * FIXME + * default multicast rule. + */ + } + else + { + if ((ifp = ipv6_chk_addr(&mreq.ipv6mr_interface))) + { + dev = ifp->idev->dev; + } + } + + if (dev == NULL) + { + return -ENODEV; + } + + if (optname == IPV6_ADD_MEMBERSHIP) + { + retv = ipv6_sock_mc_join(sk, dev, &mreq.ipv6mr_multiaddr); + } + else + { + retv = ipv6_sock_mc_drop(sk, dev, &mreq.ipv6mr_multiaddr); + } + } + } + + out: + return retv; +} + +int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, + int *optlen) +{ + return 0; +} + +#ifdef MODULE + +/* + * sysctl registration functions defined in sysctl_net_ipv6.c + */ + +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif + +void ipv6_init(void) +{ + ipv6_packet_type.type = ntohs(ETH_P_IPV6); + + dev_add_pack(&ipv6_packet_type); + +#ifdef MODULE + ipv6_sysctl_register(); +#endif + + register_netdevice_notifier(&ipv6_dev_notf); + + ipv6_route_init(); +} + +#ifdef MODULE +void ipv6_cleanup(void) +{ + unregister_netdevice_notifier(&ipv6_dev_notf); + dev_remove_pack(&ipv6_packet_type); + ipv6_sysctl_unregister(); + ipv6_route_cleanup(); + ndisc_cleanup(); + addrconf_cleanup(); +} +#endif + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O6 -m486 -c ipv6_sockglue.c" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/mcast.c linux/net/ipv6/mcast.c --- v2.1.7/linux/net/ipv6/mcast.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/mcast.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,220 @@ +/* + * Multicast support for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + + +/* + * socket join on multicast group + */ +int ipv6_sock_mc_join(struct sock *sk, struct device *dev, + struct in6_addr *addr) +{ + struct ipv6_mc_socklist *mc_lst; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int err; + + if (!(ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST)) + return -EINVAL; + + if(!(dev->flags & IFF_MULTICAST)) + return -EADDRNOTAVAIL; + + mc_lst = (struct ipv6_mc_socklist *) + kmalloc(sizeof(struct ipv6_mc_socklist), GFP_KERNEL); + + if (mc_lst == NULL) + return -ENOMEM; + + mc_lst->next = NULL; + memcpy(&mc_lst->addr, addr, sizeof(struct in6_addr)); + mc_lst->dev = dev; + + /* + * now add/increase the group membership on the device + */ + + err = ipv6_dev_mc_inc(dev, addr); + + if (err) + { + kfree(mc_lst); + return err; + } + + mc_lst->next = np->ipv6_mc_list; + np->ipv6_mc_list = mc_lst; + + return 0; +} + +/* + * socket leave on multicast group + */ +int ipv6_sock_mc_drop(struct sock *sk, struct device *dev, + struct in6_addr *addr) +{ + return 0; +} + +void ipv6_sock_mc_close(struct sock *sk) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6_mc_socklist *mc_lst; + + for (mc_lst = np->ipv6_mc_list; mc_lst; ) + { + struct ipv6_mc_socklist *back; + + /* + * leave group + */ + + back = mc_lst; + mc_lst = mc_lst->next; + kfree(back); + } +} + +/* + * device multicast group inc (add if not found) + */ +int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr) +{ + struct ipv6_mc_list *mc; + struct inet6_dev *i6dev; + char buf[6]; + u8 hash; + + for (i6dev = inet6_dev_lst; i6dev; i6dev=i6dev->next) + if (i6dev->dev == dev) + break; + + if (i6dev == NULL) + { + printk(KERN_DEBUG "ipv6_dev_mc_inc: device not found\n"); + return -EINVAL; + } + + for (mc = i6dev->mc_list; mc; mc = mc->if_next) + if (ipv6_addr_cmp(&mc->addr, addr) == 0) + { + atomic_inc(&mc->users); + return 0; + } + + /* + * not found: create a new one. + */ + + mc = (struct ipv6_mc_list *) kmalloc(sizeof(struct ipv6_mc_list), + GFP_ATOMIC); + + if (mc == NULL) + { + return -ENOMEM; + } + + memset(mc, 0, sizeof(struct ipv6_mc_list)); + + memcpy(&mc->addr, addr, sizeof(struct in6_addr)); + mc->dev = dev; + mc->users = 1; + + hash = ipv6_addr_hash(addr); + + mc->next = inet6_mcast_lst[hash]; + inet6_mcast_lst[hash] = mc; + + mc->if_next = i6dev->mc_list; + i6dev->mc_list = mc; + + /* + * multicast mapping is defined in IPv6-over-foo documents + */ + + switch (dev->type) { + case ARPHRD_ETHER: + ipv6_mc_map(addr, buf); + dev_mc_add(dev, buf, ETH_ALEN, 0); + break; + + default: + printk(KERN_DEBUG "dev_mc_inc: unkown device type\n"); + } + + + /* + * FIXME: ICMP report handling + */ + + return 0; +} + +/* + * device multicast group del + */ +int ipv6_dev_mc_dec(struct device *dev, struct in6_addr *addr) +{ + return 0; +} + +/* + * check if the interface/address pair is valid + */ +int ipv6_chk_mcast_addr(struct device *dev, struct in6_addr *addr) +{ + struct ipv6_mc_list *mc; + u8 hash; + + hash = ipv6_addr_hash(addr); + + for (mc = inet6_mcast_lst[hash]; mc; mc=mc->next) + if ((mc->dev == dev) && + ipv6_addr_cmp(&mc->addr, addr) == 0) + { + return 1; + } + + return 0; +} + +/* + * IGMP handling (alias multicast ICMPv6 messages) + */ + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o mcast.o mcast.c" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/ndisc.c linux/net/ipv6/ndisc.c --- v2.1.7/linux/net/ipv6/ndisc.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/ndisc.c Sun Nov 3 11:11:05 1996 @@ -0,0 +1,1905 @@ +/* + * Neighbour Discovery for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * Mike Shaver + * + * $Id: ndisc.c,v 1.28 1996/10/11 16:03:06 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Interface: + * + * ndisc_lookup will be called from eth.c on dev->(re)build_header + * + * ndisc_rcv + * ndisc_validate is called by higher layers when they know a neighbour + * is reachable. + * + * Manages neighbour cache + * + */ + +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + + +#include +#include + +#define NCACHE_NUM_BUCKETS 32 + +static struct socket ndisc_socket; + +unsigned long nd_rand_seed = 152L; + +struct ndisc_statistics nd_stats; + +static struct neighbour *neighbours[NCACHE_NUM_BUCKETS]; +static struct timer_list ndisc_timer; +static struct timer_list ndisc_gc_timer; + +static atomic_t ndisc_lock = 0; + +/* + * Protocol variables + */ + +int nd_max_multicast_solicit = 3; +int nd_max_unicast_solicit = 3; +int nd_retrans_timer = RETRANS_TIMER; +int nd_reachable_time = RECHABLE_TIME; +int nd_base_reachable_time = RECHABLE_TIME; +int nd_delay_first_probe = 5 * HZ; +int nd_gc_interval = 5 * HZ; + +/* + * garbage collection timeout must be greater than reachable time + * since tstamp is updated by reachable confirmations only. + * gc_staletime actually means the time after last confirmation + * *NOT* after the last time the entry was used. + */ + +int nd_gc_staletime = 3 * RECHABLE_TIME; + +static struct neighbour ndisc_insert_queue = { + {{{0,}}}, 0, 0, NULL, 0, + {0,}, NULL, {0,}, 0, 0, 0, 0, 0, + &ndisc_insert_queue, + &ndisc_insert_queue +}; + +static int ndisc_ins_queue_len = 0; + +int ndisc_event_timer(struct neighbour *neigh); + +static void ndisc_bh_insert(void); + +int ipv6_random(void) +{ + nd_rand_seed=nd_rand_seed*69069L+1; + return nd_rand_seed^jiffies; +} + +static __inline__ unsigned long rand_reach_time(void) +{ + unsigned long val; + + val = ipv6_random() % (MAX_RANDOM_FACTOR * nd_base_reachable_time); + if (val < (MIN_RANDOM_FACTOR * nd_base_reachable_time)) + { + val += (MIN_RANDOM_FACTOR * nd_base_reachable_time); + } + + return val; +} + +void ndisc_verify_reachability(struct neighbour * neigh); + +/* + * (inline) support functions + */ + +static __inline__ __u32 ndisc_hash(struct in6_addr *addr) +{ + + __u32 hash_val; + + hash_val = addr->s6_addr32[2] ^ addr->s6_addr32[3]; + + hash_val ^= hash_val >> 16; + + return (hash_val & (NCACHE_NUM_BUCKETS - 1)); +} + + +static __inline__ void ndisc_neigh_queue(struct neighbour *neigh) +{ + struct neighbour *next = &ndisc_insert_queue; + + ndisc_ins_queue_len++; + + neigh->prev = next->prev; + neigh->prev->next = neigh; + next->prev = neigh; + neigh->next = next; +} + +static __inline__ struct neighbour * ndisc_dequeue(void) +{ + struct neighbour *next = &ndisc_insert_queue; + struct neighbour *head; + + ndisc_ins_queue_len--; + + head = next->next; + + if (head == next) + { + return NULL; + } + + head->next->prev = head->prev; + next->next = head->next; + + head->next = NULL; + head->prev = NULL; + + return head; +} + +static __inline__ void ndisc_release_lock(void) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + ndisc_lock--; + + if (ndisc_lock == 0 && ndisc_ins_queue_len) + { + ndisc_bh_insert(); + } + + restore_flags(flags); +} + +static void ndisc_insert_neigh(struct neighbour *neigh) +{ + + struct neighbour * bucket; + __u32 hash_val = ndisc_hash(&neigh->addr); + + bucket = neighbours[hash_val]; + + if (!bucket) + { + neighbours[hash_val] = neigh; + return; + } + + for (; bucket->next; bucket = bucket->next) + ; + + bucket->next = neigh; + neigh->prev = bucket; +} + +static __inline__ struct neighbour * +ndisc_retrieve_neigh(struct device *dev, struct in6_addr *addr) +{ + + struct neighbour * iter; + iter = neighbours[ndisc_hash(addr)]; + + for (; iter; iter = iter->next) + { + if (dev == iter->dev && ipv6_addr_cmp(addr, &iter->addr) == 0) + return iter; + } + return NULL; +} + +static void ndisc_unlink_neigh(struct neighbour * neigh) +{ + if (neigh->prev) + neigh->prev->next = neigh->next; + else + { + int hash = ndisc_hash(&neigh->addr); + neighbours[hash] = neigh->next; + } + + if (neigh->next) + neigh->next->prev = neigh->prev; +} + +static void ndisc_release_neigh(struct neighbour * neigh) +{ + struct sk_buff *skb; + + while((skb=skb_dequeue(&neigh->arp_queue))) + { + dev_kfree_skb(skb, FREE_WRITE); + } + + if (neigh->refcnt == 0) + { + ndisc_unlink_neigh(neigh); + kfree(neigh); + } +} + +static void ndisc_bh_insert(void) +{ + struct neighbour *neigh; + + while((neigh = ndisc_dequeue())) + { + ndisc_insert_neigh(neigh); + } +} + + +static void ndisc_garbage_collect(unsigned long arg) +{ + struct neighbour * neigh; + static unsigned long last_rand = 0; + unsigned long now = jiffies; + unsigned long flags; + int i = 0; + + + /* + * periodicly compute ReachableTime from random function + */ + if (now - last_rand > REACH_RANDOM_INTERVAL) + { + last_rand = now; + nd_reachable_time = rand_reach_time(); + } + + save_flags(flags); + cli(); + + if (ndisc_lock) + { + restore_flags(flags); + ndisc_gc_timer.expires = now + HZ; + add_timer(&ndisc_gc_timer); + return; + } + + for (; i < NCACHE_NUM_BUCKETS; i++) + for (neigh = neighbours[i]; neigh;) + { + /* + * Release unused entries + */ + if (neigh->refcnt == 0 && + ((neigh->nud_state == NUD_FAILED) || + ((neigh->nud_state == NUD_REACHABLE) && + (neigh->tstamp <= (now - nd_gc_staletime)) + ) + ) + ) + { + struct neighbour *prev; + + prev = neigh; + neigh = neigh->next; + ndisc_release_neigh(prev); + continue; + } + neigh = neigh->next; + } + + restore_flags(flags); + + ndisc_gc_timer.expires = now + nd_gc_interval; + add_timer(&ndisc_gc_timer); +} + +static __inline__ void ndisc_add_timer(struct neighbour *neigh, int timer) +{ + unsigned long now = jiffies; + unsigned long tval; + + neigh->expires = now + timer; + tval = del_timer(&ndisc_timer); + + if (tval) + { + tval = min(tval, neigh->expires); + } + else + tval = neigh->expires; + + ndisc_timer.expires = tval; + add_timer(&ndisc_timer); +} + +static void ndisc_del_timer(struct neighbour *neigh) +{ + unsigned long tval; + + if (!(neigh->nud_state & NUD_IN_TIMER)) + return; + + tval = del_timer(&ndisc_timer); + + if (tval == neigh->expires) + { + int i; + + tval = ~0UL; + + /* need to search the entire neighbour cache */ + for (i=0; i < NCACHE_NUM_BUCKETS; i++) + { + for (neigh = neighbours[i]; neigh; neigh=neigh->next) + if (neigh->nud_state & NUD_IN_TIMER) + { + tval = min(tval, neigh->expires); + } + } + + } + + if (tval == ~(0UL)) + return; + + ndisc_timer.expires = tval; + add_timer(&ndisc_timer); +} + +static struct neighbour * ndisc_new_neigh(struct device *dev, + struct in6_addr *addr) +{ + struct neighbour *neigh; + unsigned long flags; + + neigh = (struct neighbour *) kmalloc(sizeof(struct neighbour), + GFP_ATOMIC); + + if (neigh == NULL) + { + printk(KERN_DEBUG "ndisc: kmalloc failure\n"); + return NULL; + } + + nd_stats.allocs++; + + memset(neigh, 0, sizeof (struct neighbour)); + skb_queue_head_init(&neigh->arp_queue); + + ipv6_addr_copy(&neigh->addr, addr); + neigh->len = 128; + neigh->type = ipv6_addr_type(addr); + neigh->dev = dev; + neigh->tstamp = jiffies; + + if (dev->type == ARPHRD_LOOPBACK || dev->type == ARPHRD_SIT) + { + neigh->flags |= NCF_NOARP; + } + + save_flags(flags); + cli(); + + if (ndisc_lock == 0) + { + /* Add to the cache. */ + ndisc_insert_neigh(neigh); + } + else + { + ndisc_neigh_queue(neigh); + } + + restore_flags(flags); + + return neigh; +} + +/* + * Called when creating a new dest_cache entry for a given destination + * is likely that an entry for the refered gateway exists in cache + * + */ + +struct neighbour * ndisc_get_neigh(struct device *dev, struct in6_addr *addr) +{ + struct neighbour *neigh; + + /* + * neighbour cache: + * cached information about nexthop and addr resolution + */ + + if (dev == NULL) + { + printk(KERN_DEBUG "ncache_get_neigh: NULL device\n"); + return NULL; + } + + atomic_inc(&ndisc_lock); + + neigh = ndisc_retrieve_neigh(dev, addr); + + ndisc_release_lock(); + + if (neigh == NULL) + { + neigh = ndisc_new_neigh(dev, addr); + } + + atomic_inc(&neigh->refcnt); + + return neigh; +} + +/* + * return values + * 0 - Address Resolution succeded, send packet + * 1 - Address Resolution unfinished / packet queued + */ + +int ndisc_eth_resolv(unsigned char *h_dest, struct device *dev, + struct sk_buff *skb) +{ + struct neighbour *neigh; + + neigh = skb->nexthop; + + if (neigh == NULL) + { + int addr_type; + + addr_type = ipv6_addr_type(&skb->ipv6_hdr->daddr); + + if (addr_type & IPV6_ADDR_MULTICAST) + { + ipv6_mc_map(&skb->ipv6_hdr->daddr, h_dest); + return 0; + } + + printk(KERN_DEBUG "ndisc_eth_resolv: nexthop is NULL\n"); + goto discard; + } + + if (skb->pkt_type == PACKET_NDISC) + goto ndisc_pkt; + + switch (neigh->nud_state) { + case NUD_FAILED: + case NUD_NONE: + ndisc_event_send(neigh, skb); + + case NUD_INCOMPLETE: + if (skb_queue_len(&neigh->arp_queue) >= NDISC_QUEUE_LEN) + { + struct sk_buff *buff; + + buff = neigh->arp_queue.prev; + skb_unlink(buff); + dev_kfree_skb(buff, FREE_WRITE); + } + skb_queue_head(&neigh->arp_queue, skb); + return 1; + default: + ndisc_event_send(neigh, skb); + } + + ndisc_pkt: + + if (neigh->h_dest == NULL) + { + printk(KERN_DEBUG "neigh->h_dest is NULL\n"); + goto discard; + } + + memcpy(h_dest, neigh->h_dest, dev->addr_len); + + if ((neigh->flags & NCF_HHVALID) == 0) + { + /* + * copy header to hh_data and move h_dest pointer + * this is strictly media dependent. + */ + } + return 0; + + discard: + + dev_kfree_skb(skb, FREE_WRITE); + return 1; +} + + +/* Send the actual Neighbour Advertisement */ + +void ndisc_send_na(struct device *dev, struct neighbour *neigh, + struct in6_addr *daddr, + struct in6_addr *solicited_addr, + int router, int solicited, int override, int inc_opt) +{ + struct sock *sk = (struct sock *)ndisc_socket.data; + struct nd_msg *msg; + int len, opt_len; + struct sk_buff *skb; + int err; + + opt_len = ((dev->addr_len + 1) >> 3) + 1; + len = sizeof(struct icmpv6hdr) + sizeof(struct in6_addr); + + if (inc_opt) + { + len += opt_len << 3; + } + + skb = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err); + + if (skb == NULL) + { + printk(KERN_DEBUG "send_na: alloc skb failed\n"); + } + + skb->free=1; + + if (ipv6_bld_hdr_2(sk, skb, dev, neigh, solicited_addr, daddr, + IPPROTO_ICMPV6, len) < 0) + { + kfree_skb(skb, FREE_WRITE); + printk(KERN_DEBUG + "ndisc_send_na: ipv6_build_header returned < 0\n"); + return; + } + + skb->pkt_type = PACKET_NDISC; + + msg = (struct nd_msg *) skb_put(skb, len); + + msg->icmph.type = NDISC_NEIGHBOUR_ADVERTISEMENT; + msg->icmph.code = 0; + msg->icmph.checksum = 0; + + msg->icmph.icmp6_unused = 0; + msg->icmph.icmp6_router = router; + msg->icmph.icmp6_solicited = solicited; + msg->icmph.icmp6_override = override; + + /* Set the target address. */ + ipv6_addr_copy(&msg->target, solicited_addr); + + if (inc_opt) + { + /* Set the source link-layer address option. */ + msg->opt.opt_type = ND_OPT_TARGET_LL_ADDR; + msg->opt.opt_len = opt_len; + memcpy(msg->opt.link_addr, dev->dev_addr, dev->addr_len); + + if ((opt_len << 3) - (2 + dev->addr_len)) + { + memset(msg->opt.link_addr + dev->addr_len, 0, + (opt_len << 3) - (2 + dev->addr_len)); + } + } + + /* checksum */ + msg->icmph.checksum = csum_ipv6_magic(solicited_addr, daddr, len, + IPPROTO_ICMPV6, + csum_partial((__u8 *) msg, + len, 0)); + + ipv6_queue_xmit(sk, skb->dev, skb, 1); +} + +void ndisc_send_ns(struct device *dev, struct neighbour *neigh, + struct in6_addr *solicit, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct sock *sk = (struct sock *) ndisc_socket.data; + struct sk_buff *skb; + struct nd_msg *msg; + int len, opt_len; + int err; + + /* length of addr in 8 octet groups.*/ + opt_len = ((dev->addr_len + 1) >> 3) + 1; + len = sizeof(struct icmpv6hdr) + sizeof(struct in6_addr) + + (opt_len << 3); + + skb = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err); + if (skb == NULL) + { + printk(KERN_DEBUG "send_ns: alloc skb failed\n"); + return; + } + + skb->free=1; + skb->pkt_type = PACKET_NDISC; + + if (saddr == NULL) + { + struct inet6_ifaddr *ifa; + + /* use link local address */ + ifa = ipv6_get_lladdr(dev); + + if (ifa) + { + saddr = &ifa->addr; + } + } + + if(ipv6_addr_type(daddr) == IPV6_ADDR_MULTICAST) + { + nd_stats.snt_probes_mcast++; + } + else + { + nd_stats.snt_probes_ucast++; + } + + if (ipv6_bld_hdr_2(sk, skb, dev, neigh, saddr, daddr, IPPROTO_ICMPV6, + len) < 0 ) + { + kfree_skb(skb, FREE_WRITE); + printk(KERN_DEBUG + "ndisc_send_ns: ipv6_build_header returned < 0\n"); + return; + } + + msg = (struct nd_msg *)skb_put(skb, len); + msg->icmph.type = NDISC_NEIGHBOUR_SOLICITATION; + msg->icmph.code = 0; + msg->icmph.checksum = 0; + msg->icmph.icmp6_unused = 0; + + /* Set the target address. */ + ipv6_addr_copy(&msg->target, solicit); + + /* Set the source link-layer address option. */ + msg->opt.opt_type = ND_OPT_SOURCE_LL_ADDR; + msg->opt.opt_len = opt_len; + + memcpy(msg->opt.link_addr, dev->dev_addr, dev->addr_len); + + if ((opt_len << 3) - (2 + dev->addr_len)) + { + memset(msg->opt.link_addr + dev->addr_len, 0, + (opt_len << 3) - (2 + dev->addr_len)); + } + + /* checksum */ + msg->icmph.checksum = csum_ipv6_magic(&skb->ipv6_hdr->saddr, + daddr, len, + IPPROTO_ICMPV6, + csum_partial((__u8 *) msg, + len, 0)); + /* send it! */ + ipv6_queue_xmit(sk, skb->dev, skb, 1); +} + +void ndisc_send_rs(struct device *dev, struct in6_addr *saddr, + struct in6_addr *daddr) +{ + struct sock *sk = (struct sock *) ndisc_socket.data; + struct sk_buff *skb; + struct icmpv6hdr *hdr; + __u8 * opt; + int len, opt_len; + int err; + + /* length of addr in 8 octet groups.*/ + opt_len = ((dev->addr_len + 1) >> 3) + 1; + len = sizeof(struct icmpv6hdr) + (opt_len << 3); + + skb = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err); + if (skb == NULL) + { + printk(KERN_DEBUG "send_ns: alloc skb failed\n"); + } + + skb->free=1; + + if (ipv6_bld_hdr_2(sk, skb, dev, NULL, saddr, daddr, IPPROTO_ICMPV6, + len) < 0 ) + { + kfree_skb(skb, FREE_WRITE); + printk(KERN_DEBUG + "ndisc_send_ns: ipv6_build_header returned < 0\n"); + return; + } + + hdr = (struct icmpv6hdr *) skb_put(skb, len); + hdr->type = NDISC_ROUTER_SOLICITATION; + hdr->code = 0; + hdr->checksum = 0; + hdr->icmp6_unused = 0; + + opt = (u8*) (hdr + 1); + + /* Set the source link-layer address option. */ + opt[0] = ND_OPT_SOURCE_LL_ADDR; + opt[1] = opt_len; + + memcpy(opt + 2, dev->dev_addr, dev->addr_len); + + if ((opt_len << 3) - (2 + dev->addr_len)) + { + memset(opt + 2 + dev->addr_len, 0, + (opt_len << 3) - (2 + dev->addr_len)); + } + + /* checksum */ + hdr->checksum = csum_ipv6_magic(&skb->ipv6_hdr->saddr, daddr, len, + IPPROTO_ICMPV6, + csum_partial((__u8 *) hdr, len, 0)); + + /* send it! */ + ipv6_queue_xmit(sk, skb->dev, skb, 1); +} + + +static int ndisc_store_hwaddr(struct device *dev, __u8 *opt, int opt_len, + __u8 *h_addr, int option) +{ + while (*opt != option && opt_len) + { + int len; + + len = opt[1] << 3; + + if (len == 0) + { + printk(KERN_WARNING "nd: option has 0 len\n"); + return -EINVAL; + } + + opt += len; + opt_len -= len; + } + + if (*opt == option) + { + memcpy(h_addr, opt + 2, dev->addr_len); + return 0; + } + + return -EINVAL; +} + +/* Called when a timer expires for a neighbour entry. */ + +static void ndisc_timer_handler(unsigned long arg) +{ + unsigned long now = jiffies; + struct neighbour * neigh; + unsigned long ntimer = ~0UL; + int i; + + atomic_inc(&ndisc_lock); + + for (i=0; i < NCACHE_NUM_BUCKETS; i++) + { + for (neigh = neighbours[i]; neigh;) + { + if (neigh->nud_state & NUD_IN_TIMER) + { + int time; + + if (neigh->expires <= now) + { + time = ndisc_event_timer(neigh); + } + else + time = neigh->expires - now; + + if (time == 0) + { + unsigned long flags; + + save_flags(flags); + cli(); + + if (ndisc_lock == 1) + { + struct neighbour *old = neigh; + + neigh = neigh->next; + ndisc_release_neigh(old); + restore_flags(flags); + continue; + } + + restore_flags(flags); + } + + ntimer = min(ntimer, time); + } + neigh = neigh->next; + } + } + + if (ntimer != (~0UL)) + { + ndisc_timer.expires = jiffies + ntimer; + add_timer(&ndisc_timer); + } + ndisc_release_lock(); +} + + +int ndisc_event_timer(struct neighbour *neigh) +{ + struct in6_addr *daddr; + struct in6_addr *target; + struct in6_addr mcaddr; + struct device *dev; + int max_probes; + + if (neigh->nud_state == NUD_DELAY) + { + neigh->nud_state = NUD_PROBE; + } + + max_probes = (neigh->nud_state == NUD_PROBE ? nd_max_unicast_solicit: + nd_max_multicast_solicit); + + if (neigh->probes == max_probes) + { + struct sk_buff *skb; + + neigh->nud_state = NUD_FAILED; + neigh->flags |= NCF_INVALID; + nd_stats.res_failed++; + + while((skb=skb_dequeue(&neigh->arp_queue))) + { + /* + * "The sender MUST return an ICMP + * destination unreachable" + */ + icmpv6_send(skb, ICMPV6_DEST_UNREACH, + ICMPV6_ADDR_UNREACH, 0, neigh->dev); + + dev_kfree_skb(skb, FREE_WRITE); + } + return 0; + } + + neigh->probes++; + + dev = neigh->dev; + target = &neigh->addr; + + if (neigh->nud_state == NUD_INCOMPLETE) + { + addrconf_addr_solict_mult(&neigh->addr, &mcaddr); + daddr = &mcaddr; + neigh = NULL; + } + else + { + daddr = &neigh->addr; + } + + ndisc_send_ns(dev, neigh, target, daddr, NULL); + + return nd_retrans_timer; +} + +void ndisc_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + unsigned long now = jiffies; + struct in6_addr daddr; + struct in6_addr *saddr = NULL; + + switch (neigh->nud_state) { + case NUD_FAILED: + neigh->probes = 0; + case NUD_NONE: + + if (skb && !skb->stamp.tv_sec) + { + /* + * skb->stamp allows us to know if we are + * originating the skb or forwarding it. + * (it is set on netif_rx) + */ + saddr = &skb->ipv6_hdr->saddr; + } + + neigh->nud_state = NUD_INCOMPLETE; + addrconf_addr_solict_mult(&neigh->addr, &daddr); + ndisc_send_ns(neigh->dev, NULL, &neigh->addr, &daddr, saddr); + ndisc_add_timer(neigh, nd_retrans_timer); + + break; + + case NUD_REACHABLE: + if (now - neigh->tstamp < nd_reachable_time) + break; + + case NUD_STALE: + neigh->nud_state = NUD_DELAY; + ndisc_add_timer(neigh, nd_delay_first_probe); + } +} + +/* + * Received a neighbour announce + */ +void ndisc_event_na(struct neighbour *neigh, unsigned char * opt, int opt_len, + int solicited, int override) +{ + struct sk_buff *skb; + + if (neigh->nud_state == NUD_NONE) + { + neigh->nud_state = NUD_INCOMPLETE; + } + + if (neigh->nud_state == NUD_INCOMPLETE || override) + { + + if (opt_len == 0) + { + printk(KERN_DEBUG "no opt on NA\n"); + } + else + { + /* record hardware address */ + + neigh->h_dest = neigh->hh_data; + neigh->flags &= ~NCF_HHVALID; + + if (ndisc_store_hwaddr(neigh->dev, opt, opt_len, + neigh->h_dest, + ND_OPT_TARGET_LL_ADDR)) + { + printk(KERN_DEBUG + "event_na: invalid TARGET_LL_ADDR\n"); + neigh->h_dest = NULL; + neigh->nud_state = NUD_NONE; + return; + } + } + } + + + if (solicited || override || neigh->nud_state == NUD_INCOMPLETE) + { + + neigh->probes = 0; + neigh->tstamp = jiffies; + + if (neigh->nud_state & NUD_IN_TIMER) + { + ndisc_del_timer(neigh); + } + + if (solicited) + { + neigh->nud_state = NUD_REACHABLE; + } + else + { + neigh->nud_state = NUD_STALE; + } + } + + while ((skb=skb_dequeue(&neigh->arp_queue))) + { + int priority = SOPRI_NORMAL; + + if (skb->sk) + priority = skb->sk->priority; + + dev_queue_xmit(skb, neigh->dev, priority); + } +} + +static void ndisc_event_ns(struct in6_addr *saddr, struct sk_buff *skb) +{ + struct neighbour *neigh; + u8 *opt; + int len; + + opt = skb->h.raw; + opt += sizeof(struct icmpv6hdr) + sizeof(struct in6_addr); + + len = skb->tail - opt; + + neigh = ndisc_retrieve_neigh(skb->dev, saddr); + + if (neigh == NULL) + { + neigh = ndisc_new_neigh(skb->dev, saddr); + } + + switch(neigh->nud_state) { + case NUD_REACHABLE: + case NUD_STALE: + case NUD_DELAY: + if (*opt != ND_OPT_SOURCE_LL_ADDR || + len != neigh->dev->addr_len || + memcmp(neigh->h_dest, opt + 2, len)) + { + break; + } + + if (neigh->nud_state & NUD_IN_TIMER) + { + ndisc_del_timer(neigh); + } + default: + neigh->flags &= ~NCF_HHVALID; + neigh->h_dest = neigh->hh_data; + + if (ndisc_store_hwaddr(neigh->dev, opt, len, + neigh->h_dest, + ND_OPT_SOURCE_LL_ADDR)) + { + printk(KERN_DEBUG + "event_ns: invalid SOURCE_LL_ADDR\n"); + neigh->h_dest = NULL; + neigh->nud_state = NUD_NONE; + return; + } + + neigh->nud_state = NUD_STALE; + neigh->tstamp = jiffies; + neigh->probes = 0; + } + +} + +static struct rt6_info *ndisc_get_dflt_router(struct device *dev, + struct in6_addr *addr) +{ + struct rt6_info *iter; + + for (iter = default_rt_list; iter; iter=iter->next) + { + if (dev == iter->rt_dev && + ipv6_addr_cmp(&iter->rt_dst, addr) == 0) + { + return iter; + } + } + return NULL; +} + +static void ndisc_add_dflt_router(struct rt6_info *rt) +{ + struct rt6_info *iter; + + rt->rt_ref++; + rt->fib_node = &routing_table; + rt6_stats.fib_rt_alloc++; + + if (default_rt_list == NULL) + { + default_rt_list = rt; + return; + } + + for (iter = default_rt_list; iter->next; iter=iter->next) + ; + + iter->next = rt; +} + +static void ndisc_del_dflt_router(struct rt6_info *rt) +{ + struct rt6_info *iter, *back; + + if (rt == default_rt_list) + { + default_rt_list = rt->next; + } + else + { + back = NULL; + for (iter = default_rt_list; iter; iter=iter->next) + { + if (iter == rt) + { + back->next = rt->next; + break; + } + back = iter; + } + } + + rt->fib_node = NULL; + rt_release(rt); +} + +static void ndisc_purge_dflt_routers(void) +{ + struct rt6_info *iter, *rt; + + for (iter = default_rt_list; iter; ) + { + rt = iter; + iter=iter->next; + rt_release(rt); + } + default_rt_list = NULL; +} + +static void ndisc_ll_addr_update(struct neighbour *neigh, u8* opt, int len, + int type) +{ + switch(neigh->nud_state) { + case NUD_REACHABLE: + case NUD_STALE: + case NUD_DELAY: + if (len == neigh->dev->addr_len && + memcmp(neigh->h_dest, opt + 2, len) == 0) + { + break; + } + + if (neigh->nud_state & NUD_IN_TIMER) + { + ndisc_del_timer(neigh); + } + default: + neigh->flags &= ~NCF_HHVALID; + neigh->h_dest = neigh->hh_data; + + if (ndisc_store_hwaddr(neigh->dev, opt, len, neigh->h_dest, + type)) + { + printk(KERN_DEBUG "NDISC: invalid LL_ADDR\n"); + neigh->h_dest = NULL; + neigh->nud_state = NUD_NONE; + break; + } + + neigh->nud_state = NUD_STALE; + neigh->tstamp = jiffies; + neigh->probes = 0; + } + +} + +struct rt6_info * dflt_rt_lookup(void) +{ + struct rt6_info *match = NULL; + struct rt6_info *rt; + int score = -1; + unsigned long now = jiffies; + + for (rt = default_rt_list; rt; rt=rt->next) + { + struct neighbour *neigh = rt->rt_nexthop; + + if (score < 0) + { + score = 0; + match = rt; + } + + if (neigh->nud_state == NUD_REACHABLE) + { + if (score < 1) + { + score = 1; + match = rt; + } + + if (now - neigh->tstamp < nd_reachable_time) + { + return rt; + } + } + + } + + return match; +} + +static void ndisc_router_discovery(struct sk_buff *skb) +{ + struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw; + struct neighbour *neigh; + struct inet6_dev *in6_dev; + struct rt6_info *rt; + int lifetime; + int optlen; + + __u8 * opt = (__u8 *)(ra_msg + 1); + + optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg); + + if (skb->ipv6_hdr->hop_limit != 255) + { + printk(KERN_WARNING + "NDISC: fake router advertisment received\n"); + return; + } + + /* + * set the RA_RECV flag in the interface + */ + + in6_dev = ipv6_get_idev(skb->dev); + if (in6_dev == NULL) + { + printk(KERN_DEBUG "RA: can't find in6 device\n"); + return; + } + + if (in6_dev->if_flags & IF_RS_SENT) + { + /* + * flag that an RA was received after an RS was sent + * out on this interface. + */ + in6_dev->if_flags |= IF_RA_RCVD; + } + + lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); + + rt = ndisc_get_dflt_router(skb->dev, &skb->ipv6_hdr->saddr); + + if (rt && lifetime == 0) + { + ndisc_del_dflt_router(rt); + rt = NULL; + } + + if (rt == NULL && lifetime) + { + printk(KERN_DEBUG "ndisc_rdisc: new default router\n"); + + rt = (struct rt6_info *)kmalloc(sizeof(struct rt6_info), + GFP_ATOMIC); + + neigh = ndisc_retrieve_neigh(skb->dev, &skb->ipv6_hdr->saddr); + + if (neigh == NULL) + { + neigh = ndisc_new_neigh(skb->dev, + &skb->ipv6_hdr->saddr); + } + + atomic_inc(&neigh->refcnt); + neigh->flags |= NCF_ROUTER; + + memset(rt, 0, sizeof(struct rt6_info)); + + ipv6_addr_copy(&rt->rt_dst, &skb->ipv6_hdr->saddr); + rt->rt_metric = 1; + rt->rt_flags = RTF_GATEWAY | RTF_DYNAMIC; + rt->rt_dev = skb->dev; + rt->rt_nexthop = neigh; + + ndisc_add_dflt_router(rt); + } + + if (rt) + { + rt->rt_expires = jiffies + (HZ * lifetime); + } + + if (ra_msg->icmph.icmp6_hop_limit) + { + ipv6_hop_limit = ra_msg->icmph.icmp6_hop_limit; + } + + /* + * Update Reachable Time and Retrans Timer + */ + + if (ra_msg->retrans_timer) + { + nd_retrans_timer = ntohl(ra_msg->retrans_timer); + } + + if (ra_msg->reachable_time) + { + __u32 rtime = ntohl(ra_msg->reachable_time); + + if (rtime != nd_base_reachable_time) + { + nd_base_reachable_time = rtime; + nd_gc_staletime = 3 * nd_base_reachable_time; + nd_reachable_time = rand_reach_time(); + } + + } + + /* + * Process options. + */ + + while(optlen > 0) { + int len; + + len = (opt[1] << 3); + + if (len == 0) + { + printk(KERN_DEBUG "RA: opt has 0 len\n"); + break; + } + + switch(*opt) { + case ND_OPT_SOURCE_LL_ADDR: + + if (rt == NULL) + break; + + neigh = rt->rt_nexthop; + + ndisc_ll_addr_update(neigh, opt, len, + ND_OPT_SOURCE_LL_ADDR); + break; + + case ND_OPT_PREFIX_INFO: + addrconf_prefix_rcv(skb->dev, opt, len); + break; + + case ND_OPT_MTU: + + if (rt) + { + int mtu; + struct device *dev; + + mtu = htonl(*(__u32 *)opt+4); + dev = rt->rt_nexthop->dev; + + if (mtu < 576) + { + printk(KERN_DEBUG "NDISC: router " + "announcement with mtu = %d\n", + mtu); + break; + } + + if (dev->change_mtu) + { + dev->change_mtu(dev, mtu); + } + else + { + dev->mtu = mtu; + } + } + break; + + case ND_OPT_TARGET_LL_ADDR: + case ND_OPT_REDIRECT_HDR: + printk(KERN_DEBUG "got illegal option with RA"); + break; + default: + printk(KERN_DEBUG "unkown option in RA\n"); + } + optlen -= len; + opt += len; + } + +} + +void ndisc_forwarding_on(void) +{ + /* + * forwarding was turned on + */ + + ndisc_purge_dflt_routers(); +} + +void ndisc_forwarding_off(void) +{ + /* + * forwarding was turned off + */ +} + +static void ndisc_redirect_rcv(struct sk_buff *skb) +{ + struct icmpv6hdr *icmph; + struct in6_addr *dest; + struct in6_addr *target; /* new first hop to destination */ + struct neighbour *neigh; + struct rt6_info *rt; + int on_link = 0; + int optlen; + u8 * opt; + + if (skb->ipv6_hdr->hop_limit != 255) + { + printk(KERN_WARNING + "NDISC: fake ICMP redirect received\n"); + return; + } + + if (!(ipv6_addr_type(&skb->ipv6_hdr->saddr) & IPV6_ADDR_LINKLOCAL)) + { + printk(KERN_WARNING + "ICMP redirect: source address is not linklocal\n"); + return; + } + + optlen = skb->tail - skb->h.raw; + optlen -= sizeof(struct icmpv6hdr) + 2 * sizeof(struct in6_addr); + + if (optlen < 0) + { + printk(KERN_WARNING "ICMP redirect: packet too small\n"); + return; + } + + icmph = (struct icmpv6hdr *) skb->h.raw; + target = (struct in6_addr *) (icmph + 1); + dest = target + 1; + + if (ipv6_addr_type(dest) & IPV6_ADDR_MULTICAST) + { + printk(KERN_WARNING "ICMP redirect for multicast addr\n"); + return; + } + + if (ipv6_addr_cmp(dest, target) == 0) + { + on_link = 1; + } + else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) + { + printk(KERN_WARNING + "ICMP redirect: target address is not linklocal\n"); + return; + } + + /* passed validation tests */ + + rt = ipv6_rt_redirect(skb->dev, dest, target, on_link); + + if (rt == NULL) + { + printk(KERN_WARNING "ICMP redirect: no route to host\n"); + return; + } + + neigh = rt->rt_nexthop; + + opt = (u8 *) (dest + 1); + + while (optlen > 0) + { + int len; + + len = (opt[1] << 3); + + if (*opt == ND_OPT_TARGET_LL_ADDR) + { + ndisc_ll_addr_update(neigh, opt, len, + ND_OPT_TARGET_LL_ADDR); + } + + opt += len; + optlen -= len; + } +} + +void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, + struct in6_addr *target) +{ + struct sock *sk = (struct sock *) ndisc_socket.data; + int len = sizeof(struct icmpv6hdr) + 2 * sizeof(struct in6_addr); + struct sk_buff *buff; + struct inet6_ifaddr *ifp; + struct icmpv6hdr *icmph; + struct in6_addr *addrp; + struct rt6_info *rt; + int ta_len = 0; + u8 *opt; + int rd_len; + int err; + int hlen; + + rt = fibv6_lookup(&skb->ipv6_hdr->saddr, skb->dev, 0); + + if (rt->rt_flags & RTF_GATEWAY) + { + printk(KERN_DEBUG "ndisc_send_redirect: not a neighbour\n"); + return; + } + + if (neigh->nud_state == NUD_REACHABLE) + { + ta_len = ((neigh->dev->addr_len + 1) >> 3) + 1; + len += (ta_len << 3); + } + + rd_len = min(536 - len, ntohs(skb->ipv6_hdr->payload_len) + 8); + rd_len &= ~0x7; + len += rd_len; + + ifp = ipv6_get_lladdr(skb->dev); + + if (ifp == NULL) + { + printk(KERN_DEBUG "redirect: no link_local addr for dev\n"); + return; + } + + buff = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err); + + if (buff == NULL) + { + printk(KERN_DEBUG "ndisc_send_redirect: alloc_skb failed\n"); + return; + } + + + hlen = 0; + if (skb->dev->hard_header_len) + { + hlen = (skb->dev->hard_header_len + 15) & ~15; + } + + skb_reserve(buff, hlen + sizeof(struct ipv6hdr)); + + icmph = (struct icmpv6hdr *) skb_put(buff, len); + + memset(icmph, 0, sizeof(struct icmpv6hdr)); + icmph->type = NDISC_REDIRECT; + + /* + * copy target and destination addresses + */ + + addrp = (struct in6_addr *)(icmph + 1); + ipv6_addr_copy(addrp, target); + addrp++; + ipv6_addr_copy(addrp, &skb->ipv6_hdr->daddr); + + opt = (u8*) (addrp + 1); + + /* + * include target_address option + */ + + if (ta_len) + { + int zb; + + *(opt++) = ND_OPT_TARGET_LL_ADDR; + *(opt++) = ta_len; + + memcpy(opt, neigh->h_dest, neigh->dev->addr_len); + opt += neigh->dev->addr_len; + + /* + * if link layer address doesn't end on a 8 byte + * boundary memset(0) the remider + */ + + zb = (neigh->dev->addr_len + 2) & 0x7; + if (zb) + { + int comp; + + comp = 8 - zb; + memset(opt, 0, comp); + opt += comp; + } + } + + /* + * build redirect option and copy skb over to the new packet. + */ + + memset(opt, 0, 8); + *(opt++) = ND_OPT_REDIRECT_HDR; + *(opt++) = (rd_len >> 3); + opt += 6; + + memcpy(opt, &skb->ipv6_hdr, rd_len - 8); + + icmph->checksum = csum_ipv6_magic(&ifp->addr, &skb->ipv6_hdr->saddr, + len, IPPROTO_ICMPV6, + csum_partial((u8 *) icmph, len, 0)); + + ipv6_xmit(sk, buff, &ifp->addr, &skb->ipv6_hdr->saddr, NULL, IPPROTO_ICMPV6); +} + +/* Called by upper layers to validate neighbour cache entries. */ + +void ndisc_validate(struct neighbour *neigh) +{ + if (neigh->nud_state == NUD_INCOMPLETE) + return; + + if (neigh->nud_state == NUD_DELAY) + { + ndisc_del_timer(neigh); + } + + nd_stats.rcv_upper_conf++; + neigh->nud_state = NUD_REACHABLE; + neigh->tstamp = jiffies; +} + +int ndisc_rcv(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + struct ipv6_options *opt, unsigned short len) +{ + struct nd_msg *msg = (struct nd_msg *) skb->h.raw; + struct neighbour *neigh; + struct inet6_ifaddr *ifp; + + switch (msg->icmph.type) { + case NDISC_NEIGHBOUR_SOLICITATION: + if ((ifp = ipv6_chk_addr(&msg->target))) + { + int addr_type; + + if (ifp->flags & DAD_INCOMPLETE) + { + /* + * DAD failed + */ + + printk(KERN_DEBUG "duplicate address\n"); + del_timer(&ifp->timer); + return 0; + } + + addr_type = ipv6_addr_type(saddr); + if (addr_type & IPV6_ADDR_UNICAST) + { + int inc; + + /* + * update / create cache entry + * for the source adddress + */ + + nd_stats.rcv_probes_ucast++; + ndisc_event_ns(saddr, skb); + + /* answer solicitation */ + neigh = ndisc_retrieve_neigh(dev, saddr); + + inc = ipv6_addr_type(daddr); + inc &= IPV6_ADDR_MULTICAST; + + ndisc_send_na(dev, neigh, saddr, &ifp->addr, + ifp->idev->router, 1, inc, inc); + } + else + { + /* FIXME */ + printk(KERN_DEBUG "ns: non unicast saddr\n"); + } + } + break; + + case NDISC_NEIGHBOUR_ADVERTISEMENT: + + neigh = ndisc_retrieve_neigh(skb->dev, &msg->target); + if (neigh) + { + if (neigh->flags & NCF_ROUTER) + { + if (msg->icmph.icmp6_router == 0) + { + /* + * Change: router to host + */ + + struct rt6_info *rt; + rt = ndisc_get_dflt_router(skb->dev, + saddr); + if (rt) + { + ndisc_del_dflt_router(rt); + } + } + } + else + { + if (msg->icmph.icmp6_router) + { + neigh->flags |= NCF_ROUTER; + } + } + ndisc_event_na(neigh, (unsigned char *) &msg->opt, + skb->tail - (u8 *)&msg->opt /*opt_len*/, + msg->icmph.icmp6_solicited, + msg->icmph.icmp6_override); + } + break; + + } + + if (ipv6_forwarding == 0) + { + switch (msg->icmph.type) { + case NDISC_ROUTER_ADVERTISEMENT: + ndisc_router_discovery(skb); + break; + + case NDISC_REDIRECT: + ndisc_redirect_rcv(skb); + break; + } + } + + return 0; +} + +int ndisc_get_info(char *buffer, char **start, off_t offset, int length, + int dummy) +{ + struct neighbour *neigh; + unsigned long now = jiffies; + int len = 0; + int i; + + atomic_inc(&ndisc_lock); + + for (i = 0; i < NCACHE_NUM_BUCKETS; i++) + { + for(neigh = neighbours[i]; neigh; neigh=neigh->next) + { + int j; + + for (j=0; j<16; j++) + { + sprintf(buffer + len, "%02x", + neigh->addr.s6_addr[j]); + len += 2; + } + + len += sprintf(buffer + len, + " %02x %02x %08lx %08lx %04x %04x ", + i, + neigh->nud_state, + neigh->expires - now, + now - neigh->tstamp, + neigh->refcnt, + neigh->flags); + + if (neigh->h_dest) + { + for (j=0; j< neigh->dev->addr_len; j++) + { + sprintf(buffer + len, "%02x", + neigh->h_dest[j]); + len += 2; + } + } + else + len += sprintf(buffer + len, "000000000000"); + len += sprintf(buffer + len, "\n"); + + } + } + + ndisc_release_lock(); + + *start = buffer + offset; + + len -= offset; + + if (len > length) + len = length; + return len; +} + +struct proc_dir_entry ndisc_proc_entry = +{ + 0, 11, "ndisc_cache", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, NULL, + &ndisc_get_info +}; + +void ndisc_init(struct proto_ops *ops) +{ + struct sock *sk; + int i = 0; + int err; + + /* + * Init ndisc_socket + */ + ndisc_socket.type=SOCK_RAW; + ndisc_socket.ops=ops; + + if((err=ops->create(&ndisc_socket, IPPROTO_ICMPV6))<0) + printk(KERN_DEBUG + "Failed to create the NDISC control socket.\n"); + + MOD_DEC_USE_COUNT; + + sk = ndisc_socket.data; + sk->allocation = GFP_ATOMIC; + sk->net_pinfo.af_inet6.hop_limit = 255; + sk->net_pinfo.af_inet6.priority = 15; + sk->num = 256; /* Don't receive any data */ + + /* + * Initialize the neighbours hash buckets. + */ + + for (; i < NCACHE_NUM_BUCKETS; i++) + neighbours[i] = NULL; + + /* General ND state machine timer. */ + init_timer(&ndisc_timer); + ndisc_timer.function = ndisc_timer_handler; + ndisc_timer.data = 0L; + ndisc_timer.expires = 0L; + + /* ND GC timer */ + init_timer(&ndisc_gc_timer); + ndisc_gc_timer.function = ndisc_garbage_collect; + ndisc_gc_timer.data = 0L; + ndisc_gc_timer.expires = jiffies + nd_gc_interval; + + add_timer(&ndisc_gc_timer); + +#ifdef CONFIG_IPV6_MODULE + ndisc_eth_hook = ndisc_eth_resolv; + proc_register_dynamic(&proc_net, &ndisc_proc_entry); +#endif +} + +#ifdef CONFIG_IPV6_MODULE +void ndisc_cleanup(void) +{ + ndisc_eth_hook = NULL; + proc_unregister(&proc_net, ndisc_proc_entry.low_ino); + del_timer(&ndisc_gc_timer); + del_timer(&ndisc_timer); +} +#endif + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o ndisc.o ndisc.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/protocol.c linux/net/ipv6/protocol.c --- v2.1.7/linux/net/ipv6/protocol.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/protocol.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +struct inet6_protocol *inet6_protocol_base = NULL; +struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] = +{ + NULL +}; + + +struct inet6_protocol *inet6_get_protocol(unsigned char prot) +{ + unsigned char hash; + struct inet6_protocol *p; + + hash = prot & (MAX_INET_PROTOS - 1); + for (p = inet6_protos[hash] ; p != NULL; p=p->next) + { + if (p->protocol == prot) + return((struct inet6_protocol *) p); + } + return(NULL); +} + +void inet6_add_protocol(struct inet6_protocol *prot) +{ + unsigned char hash; + struct inet6_protocol *p2; + + hash = prot->protocol & (MAX_INET_PROTOS - 1); + prot ->next = inet6_protos[hash]; + inet6_protos[hash] = prot; + prot->copy = 0; + + /* + * Set the copy bit if we need to. + */ + + p2 = (struct inet6_protocol *) prot->next; + while(p2 != NULL) + { + if (p2->protocol == prot->protocol) + { + prot->copy = 1; + break; + } + p2 = (struct inet6_protocol *) p2->next; + } +} + +/* + * Remove a protocol from the hash tables. + */ + +int inet6_del_protocol(struct inet6_protocol *prot) +{ + struct inet6_protocol *p; + struct inet6_protocol *lp = NULL; + unsigned char hash; + + hash = prot->protocol & (MAX_INET_PROTOS - 1); + if (prot == inet6_protos[hash]) + { + inet6_protos[hash] = (struct inet6_protocol *) inet6_protos[hash]->next; + return(0); + } + + p = (struct inet6_protocol *) inet6_protos[hash]; + while(p != NULL) + { + /* + * We have to worry if the protocol being deleted is + * the last one on the list, then we may need to reset + * someone's copied bit. + */ + if (p->next != NULL && p->next == prot) + { + /* + * if we are the last one with this protocol and + * there is a previous one, reset its copy bit. + */ + if (p->copy == 0 && lp != NULL) + lp->copy = 0; + p->next = prot->next; + return(0); + } + if (p->next != NULL && p->next->protocol == prot->protocol) + lp = p; + + p = (struct inet6_protocol *) p->next; + } + return(-1); +} + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o protocol.o protocol.c" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/raw.c linux/net/ipv6/raw.c --- v2.1.7/linux/net/ipv6/raw.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/raw.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,458 @@ +/* + * RAW sockets for IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Adapted from linux/net/ipv4/raw.c + * + * $Id: raw.c,v 1.5 1996/10/29 22:45:53 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +void rawv6_err(struct sock *sk, int type, int code, unsigned char *buff, + struct in6_addr *saddr, struct in6_addr *daddr) +{ + if (sk == NULL) + return; + +} + +static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) +{ + /* Charge it to the socket. */ + + if (sock_queue_rcv_skb(sk,skb)<0) + { + /* ip_statistics.IpInDiscards++; */ + skb->sk=NULL; + kfree_skb(skb, FREE_READ); + return 0; + } + + /* ip_statistics.IpInDelivers++; */ + return 0; +} + +/* + * This is next to useless... + * if we demultiplex in network layer we don't need the extra call + * just to queue the skb... + * maybe we could have the network decide uppon an hint if it + * should call raw_rcv for demultiplexing + */ +int rawv6_rcv(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + struct ipv6_options *opt, unsigned short len) +{ + struct sock *sk; + + sk = skb->sk; + + if (sk->ip_hdrincl) + { + skb->h.raw = (unsigned char *) skb->ipv6_hdr; + } + + if (sk->users) { + __skb_queue_tail(&sk->back_log, skb); + return 0; + } + + rawv6_rcv_skb(sk, skb); + return 0; +} + + +/* + * This should be easy, if there is something there + * we return it, otherwise we block. + */ + +int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, + int noblock, int flags,int *addr_len) +{ + struct sockaddr_in6 *sin6=(struct sockaddr_in6 *)msg->msg_name; + struct sk_buff *skb; + int copied=0; + int err; + + + if (flags & MSG_OOB) + return -EOPNOTSUPP; + + if (sk->shutdown & RCV_SHUTDOWN) + return(0); + + if (addr_len) + *addr_len=sizeof(*sin6); + + skb=skb_recv_datagram(sk, flags, noblock, &err); + if(skb==NULL) + return err; + + copied = min(len, skb->tail - skb->h.raw); + + skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + sk->stamp=skb->stamp; + + /* Copy the address. */ + if (sin6) + { + sin6->sin6_family = AF_INET6; + memcpy(&sin6->sin6_addr, &skb->ipv6_hdr->saddr, + sizeof(struct in6_addr)); + + *addr_len = sizeof(struct sockaddr_in6); + } + + if (msg->msg_control) + { + int err; + + err = datagram_recv_ctl(sk, msg, skb); + + if (err < 0) + { + copied = err; + } + } + + skb_free_datagram(sk, skb); + return (copied); +} + +/* + * Sending... + */ + +struct rawv6_fakehdr { + struct iovec *iov; + struct sock *sk; + __u32 len; + __u32 cksum; + __u32 proto; + struct in6_addr *daddr; +}; + +static void rawv6_getfrag(const void *data, struct in6_addr *saddr, + char *buff, unsigned int offset, unsigned int len) +{ + struct iovec *iov = (struct iovec *) data; + + memcpy_fromiovecend(buff, iov, offset, len); +} + +static void rawv6_frag_cksum(const void *data, struct in6_addr *addr, + char *buff, unsigned int offset, + unsigned int len) +{ + struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data; + + hdr->cksum = csum_partial_copy_fromiovecend(buff, hdr->iov, offset, + len, hdr->cksum); + + if (offset == 0) + { + struct sock *sk; + struct raw6_opt *opt; + struct in6_addr *daddr; + + sk = hdr->sk; + opt = &sk->tp_pinfo.tp_raw; + + if (hdr->daddr) + { + daddr = hdr->daddr; + } + else + { + daddr = addr + 1; + } + + hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len, + hdr->proto, hdr->cksum); + + if (opt->offset < len) + { + __u16 *csum; + + csum = (__u16 *) (buff + opt->offset); + *csum = hdr->cksum; + } + else + { + /* + * FIXME + * signal an error to user via sk->err + */ + printk(KERN_DEBUG "icmp: cksum offset too big\n"); + } + } +} + + +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len, + int noblock, int flags) +{ + struct ipv6_options opt_space; + struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct ipv6_options *opt = NULL; + struct device *dev = NULL; + struct in6_addr *saddr = NULL; + int addr_len = msg->msg_namelen; + struct in6_addr *daddr; + struct raw6_opt *raw_opt; + u16 proto; + int err; + + + /* Mirror BSD error message compatibility */ + if (flags & MSG_OOB) + return -EOPNOTSUPP; + + if (flags & ~MSG_DONTROUTE) + return(-EINVAL); + /* + * Get and verify the address. + */ + + if (sin6) + { + if (addr_len < sizeof(struct sockaddr_in6)) + return(-EINVAL); + + if (sin6->sin6_family && sin6->sin6_family != AF_INET6) + return(-EINVAL); + + /* port is the proto value [0..255] carried in nexthdr */ + proto = ntohs(sin6->sin6_port); + + if (!proto) + proto = sk->num; + + if (proto > 255) + return(-EINVAL); + + daddr = &sin6->sin6_addr; + + if (np->dest && ipv6_addr_cmp(daddr, &np->daddr)) + { + ipv6_dst_unlock(np->dest); + np->dest = NULL; + } + } + else + { + if (sk->state != TCP_ESTABLISHED) + return(-EINVAL); + + proto = sk->num; + daddr = &(sk->net_pinfo.af_inet6.daddr); + } + + if (ipv6_addr_any(daddr)) + { + /* + * unspecfied destination address + * treated as error... is this correct ? + */ + return(-EINVAL); + } + + /* + * We don't allow > 64K sends yet. + */ + if (len + (sk->ip_hdrincl ? 0 : sizeof(struct ipv6hdr)) > 65535) + return -EMSGSIZE; + + if (msg->msg_control) + { + opt = &opt_space; + memset(opt, 0, sizeof(struct ipv6_options)); + + err = datagram_send_ctl(msg, &dev, &saddr, opt); + if (err < 0) + { + printk(KERN_DEBUG "invalid msg_control\n"); + return err; + } + } + + raw_opt = &sk->tp_pinfo.tp_raw; + + + if (raw_opt->checksum) + { + struct rawv6_fakehdr hdr; + + hdr.iov = msg->msg_iov; + hdr.sk = sk; + hdr.len = len; + hdr.cksum = 0; + hdr.proto = proto; + + if (opt && opt->srcrt) + { + hdr.daddr = daddr; + } + else + { + hdr.daddr = NULL; + } + + err = ipv6_build_xmit(sk, rawv6_frag_cksum, &hdr, daddr, len, + saddr, dev, opt, proto, noblock); + } + else + { + err = ipv6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, daddr, + len, saddr, dev, opt, proto, + noblock); + } + + return err<0?err:len; +} + +static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + struct raw6_opt *opt = &sk->tp_pinfo.tp_raw; + int err = 0; + + switch (optname) { + case ICMPV6_FILTER: + copy_from_user(&opt->filter, optval, + sizeof(struct icmp6_filter)); + break; + default: + err = -ENOPROTOOPT; + }; + + return err; +} + +static int rawv6_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + struct raw6_opt *opt = &sk->tp_pinfo.tp_raw; + int val, err; + + switch(level) + { + case SOL_RAW: + break; + + case SOL_ICMPV6: + if (sk->num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + return rawv6_seticmpfilter(sk, level, optname, optval, + optlen); + default: + return ipv6_setsockopt(sk, level, optname, optval, + optlen); + } + + if (optval == NULL) + return(-EINVAL); + + err = get_user(val, (int *)optval); + if(err) + return err; + + switch (optname) + { + case RAW_CHECKSUM: + opt->checksum = 1; + opt->offset = val; + + return 0; + break; + + default: + return(-ENOPROTOOPT); + } +} + +static void rawv6_close(struct sock *sk, unsigned long timeout) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + + sk->state = TCP_CLOSE; + + if (np->dest) + { + ipv6_dst_unlock(np->dest); + } + + destroy_sock(sk); +} + +static int rawv6_init_sk(struct sock *sk) +{ + return(0); +} + +struct proto rawv6_prot = { + rawv6_close, + udpv6_connect, + NULL, + NULL, + NULL, + NULL, + datagram_select, + NULL, + rawv6_init_sk, + NULL, + NULL, + rawv6_setsockopt, + ipv6_getsockopt, /* FIXME */ + rawv6_sendmsg, + rawv6_recvmsg, + NULL, /* No special bind */ + rawv6_rcv_skb, + 128, + 0, + "RAW", + 0, 0, + NULL +}; + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o rawv6.o rawv6.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/reassembly.c linux/net/ipv6/reassembly.c --- v2.1.7/linux/net/ipv6/reassembly.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/reassembly.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,354 @@ +/* + * IPv6 fragment reassembly + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Based on: net/ipv4/ip_fragment.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +static struct frag_queue ipv6_frag_queue = { + &ipv6_frag_queue, &ipv6_frag_queue, + 0, {0}, NULL, NULL, + 0 +}; + +static void create_frag_entry(struct sk_buff *skb, + struct device *dev, + __u8 *nhptr, + struct frag_hdr *fhdr); +static int reasm_frag_1(struct frag_queue *fq, + struct sk_buff **skb_in); + +static void reasm_queue(struct frag_queue *fq, + struct sk_buff *skb, + struct frag_hdr *fhdr); + +static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb, + __u8 *nhptr, + struct frag_hdr *fhdr) +{ + __u32 expires; + int nh; + + expires = del_timer(&fq->timer); + + /* + * We queue the packet even if it's the last. + * It's a trade off. This allows the reassembly + * code to be simpler (=faster) and of the + * steps we do for queueing the only unnecessary + * one it's the kmalloc for a struct ipv6_frag. + * Feel free to try other alternatives... + */ + reasm_queue(fq, *skb, fhdr); + + if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) + { + fq->last_in = 1; + fq->nhptr = nhptr; + } + + if (fq->last_in) + { + if ((nh = reasm_frag_1(fq, skb))) + return nh; + } + + fq->timer.expires = expires; + add_timer(&fq->timer); + + return 0; +} + +int ipv6_reassembly(struct sk_buff **skb, struct device *dev, __u8 *nhptr, + struct ipv6_options *opt) +{ + struct frag_hdr *fhdr = (struct frag_hdr *) ((*skb)->h.raw); + struct frag_queue *fq; + + for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) + { + if (fq->id == fhdr->identification) + { + return reasm_frag(fq, skb, nhptr,fhdr); + } + } + + create_frag_entry(*skb, dev, nhptr, fhdr); + + + return 0; +} + + +static void fq_free(struct frag_queue *fq) +{ + struct ipv6_frag *fp, *back; + + for(fp = fq->fragments; fp; ) + { + kfree_skb(fp->skb, FREE_READ); + back = fp; + fp=fp->next; + kfree(back); + } + + fq->prev->next = fq->next; + fq->next->prev = fq->prev; + + fq->prev = fq->next = NULL; + + kfree(fq); + +} + +static void frag_expire(unsigned long data) +{ + struct frag_queue *fq; + struct ipv6_frag *frag; + + fq = (struct frag_queue *) data; + + del_timer(&fq->timer); + + frag = fq->fragments; + + if (frag == NULL) + { + printk(KERN_DEBUG "invalid fragment queue\n"); + return; + } + + icmpv6_send(frag->skb, ICMPV6_TIME_EXCEEDED, ICMPV6_EXC_FRAGTIME, 0, + frag->skb->dev); + + fq_free(fq); +} + + +static void create_frag_entry(struct sk_buff *skb, struct device *dev, + __u8 *nhptr, + struct frag_hdr *fhdr) +{ + struct frag_queue *fq; + + fq = (struct frag_queue *) kmalloc(sizeof(struct frag_queue), + GFP_ATOMIC); + + if (fq == NULL) + { + kfree_skb(skb, FREE_READ); + return; + } + + memset(fq, 0, sizeof(struct frag_queue)); + + fq->id = fhdr->identification; + + fq->dev = dev; + + /* init_timer has been done by the memset */ + fq->timer.function = frag_expire; + fq->timer.data = (long) fq; + fq->timer.expires = jiffies + IPV6_FRAG_TIMEOUT; + + fq->nexthdr = fhdr->nexthdr; + + + if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) + { + fq->last_in = 1; + fq->nhptr = nhptr; + } + reasm_queue(fq, skb, fhdr); + + fq->prev = ipv6_frag_queue.prev; + fq->next = &ipv6_frag_queue; + fq->prev->next = fq; + ipv6_frag_queue.prev = fq; + + add_timer(&fq->timer); +} + + +static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, + struct frag_hdr *fhdr) +{ + struct ipv6_frag *nfp, *fp, **bptr; + + nfp = (struct ipv6_frag *) kmalloc(sizeof(struct ipv6_frag), + GFP_ATOMIC); + + if (nfp == NULL) + { + kfree_skb(skb, FREE_READ); + return; + } + + + nfp->offset = ntohs(fhdr->frag_off) & ~0x7; + nfp->len = (ntohs(skb->ipv6_hdr->payload_len) - + ((u8 *) (fhdr + 1) - (u8 *) (skb->ipv6_hdr + 1))); + + + nfp->skb = skb; + nfp->fhdr = fhdr; + + nfp->next = NULL; + + bptr = &fq->fragments; + + + for (fp = fq->fragments; fp; fp=fp->next) + { + if (nfp->offset <= fp->offset) + break; + bptr = &fp->next; + } + + if (fp && fp->offset == nfp->offset) + { + if (fp->len != nfp->len) + { + /* this cannot happen */ + printk(KERN_DEBUG "reasm_queue: dup with wrong len\n"); + } + + /* duplicate. discard it. */ + kfree_skb(skb, FREE_READ); + kfree(nfp); + return; + } + + + *bptr = nfp; + nfp->next = fp; +} + +/* + * check if this fragment completes the packet + * returns true on success + */ +static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in) +{ + struct ipv6_frag *fp; + struct ipv6_frag *tail = NULL; + struct sk_buff *skb; + __u32 offset = 0; + __u32 payload_len; + __u16 unfrag_len; + __u16 copy; + int nh; + + + for(fp = fq->fragments; fp; fp=fp->next) + { + if (offset != fp->offset) + return 0; + + offset += fp->len; + tail = fp; + } + + /* + * we know the m_flag arrived and we have a queue, + * starting from 0, without gaps. + * this means we have all fragments. + */ + + unfrag_len = (u8 *) (tail->fhdr) - (u8 *) (tail->skb->ipv6_hdr + 1); + + payload_len = (unfrag_len + tail->offset + + (tail->skb->tail - (__u8 *) (tail->fhdr + 1))); + + printk(KERN_DEBUG "reasm: payload len = %d\n", payload_len); + + if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) + { + printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n"); + fq_free(fq); + return 1; + } + + copy = unfrag_len + sizeof(struct ipv6hdr); + + skb->ipv6_hdr = (struct ipv6hdr *) skb->data; + + skb->free = 1; + skb->dev = fq->dev; + + + nh = fq->nexthdr; + + *(fq->nhptr) = nh; + memcpy(skb_put(skb, copy), tail->skb->ipv6_hdr, copy); + + skb->h.raw = skb->tail; + + skb->ipv6_hdr->payload_len = ntohs(payload_len); + + *skb_in = skb; + + /* + * FIXME: If we don't have a checksum we ought to be able + * to defragment and checksum in this pass. [AC] + */ + for(fp = fq->fragments; fp; ) + { + struct ipv6_frag *back; + + memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len); + kfree_skb(fp->skb, FREE_READ); + back = fp; + fp=fp->next; + kfree(back); + } + + fq->prev->next = fq->next; + fq->next->prev = fq->prev; + + fq->prev = fq->next = NULL; + + kfree(fq); + + return nh; +} + + + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o reassembly.o reassembly.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/sit.c linux/net/ipv6/sit.c --- v2.1.7/linux/net/ipv6/sit.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/sit.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,578 @@ +/* + * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static int sit_init_dev(struct device *dev); + +static struct device sit_device = { + "sit0", + 0, 0, 0, 0, + 0x0, 0, + 0, 0, 0, NULL, sit_init_dev +}; + +static unsigned long sit_gc_last_run; +static void sit_mtu_cache_gc(void); + +static int sit_xmit(struct sk_buff *skb, + struct device *dev); +static int sit_rcv(struct sk_buff *skb, + struct device *dev, + struct options *opt, + __u32 daddr, unsigned short len, + __u32 saddr, int redo, + struct inet_protocol * protocol); + +static int sit_open(struct device *dev); +static int sit_close(struct device *dev); + +static struct enet_statistics * sit_get_stats(struct device *dev); + +static void sit_err(int type, int code, + unsigned char *buff, __u32 info, + __u32 daddr, __u32 saddr, + struct inet_protocol *protocol); + +static struct inet_protocol sit_protocol = { + sit_rcv, + sit_err, + 0, + IPPROTO_IPV6, + 0, + NULL, + "IPv6" +}; + +#define SIT_NUM_BUCKETS 16 + +struct sit_mtu_info *sit_mtu_cache[SIT_NUM_BUCKETS]; + +static int vif_num = 0; +static struct sit_vif *vif_list = NULL; + +static __inline__ __u32 sit_addr_hash(__u32 addr) +{ + + __u32 hash_val; + + hash_val = addr; + + hash_val ^= hash_val >> 16; + hash_val ^= hash_val >> 8; + + return (hash_val & (SIT_NUM_BUCKETS - 1)); +} + +static void sit_cache_insert(__u32 addr, int mtu) +{ + struct sit_mtu_info *minfo; + int hash; + + minfo = kmalloc(sizeof(struct sit_mtu_info), GFP_ATOMIC); + + if (minfo == NULL) + return; + + minfo->addr = addr; + minfo->tstamp = jiffies; + minfo->mtu = mtu; + + hash = sit_addr_hash(addr); + + minfo->next = sit_mtu_cache[hash]; + sit_mtu_cache[hash] = minfo; +} + +static struct sit_mtu_info * sit_mtu_lookup(__u32 addr) +{ + struct sit_mtu_info *iter; + int hash; + + hash = sit_addr_hash(addr); + + for(iter = sit_mtu_cache[hash]; iter; iter=iter->next) + { + if (iter->addr == addr) + { + iter->tstamp = jiffies; + break; + } + } + + /* + * run garbage collector + */ + + if (jiffies - sit_gc_last_run > SIT_GC_FREQUENCY) + { + sit_mtu_cache_gc(); + sit_gc_last_run = jiffies; + } + + return iter; +} + +static void sit_mtu_cache_gc(void) +{ + struct sit_mtu_info *iter, *back; + unsigned long now = jiffies; + int i; + + for (i=0; i < SIT_NUM_BUCKETS; i++) + { + back = NULL; + for (iter = sit_mtu_cache[i]; iter;) + { + if (now - iter->tstamp > SIT_GC_TIMEOUT) + { + struct sit_mtu_info *old; + + old = iter; + iter = iter->next; + + if (back) + { + back->next = iter; + } + else + { + sit_mtu_cache[i] = iter; + } + + kfree(old); + continue; + } + back = iter; + iter = iter->next; + } + } +} + +static int sit_init_dev(struct device *dev) +{ + int i; + + dev->open = sit_open; + dev->stop = sit_close; + + dev->hard_start_xmit = sit_xmit; + dev->get_stats = sit_get_stats; + + dev->priv = kmalloc(sizeof(struct enet_statistics), GFP_KERNEL); + + if (dev->priv == NULL) + return -ENOMEM; + + memset(dev->priv, 0, sizeof(struct enet_statistics)); + + + for (i = 0; i < DEV_NUMBUFFS; i++) + skb_queue_head_init(&dev->buffs[i]); + + dev->hard_header = NULL; + dev->rebuild_header = NULL; + dev->set_mac_address = NULL; + dev->header_cache_bind = NULL; + dev->header_cache_update= NULL; + + dev->type = ARPHRD_SIT; + + dev->hard_header_len = MAX_HEADER; + dev->mtu = 1500 - sizeof(struct iphdr); + dev->addr_len = 0; + dev->tx_queue_len = 2; + + memset(dev->broadcast, 0, MAX_ADDR_LEN); + memset(dev->dev_addr, 0, MAX_ADDR_LEN); + + dev->flags = IFF_NOARP; + + dev->family = AF_INET6; + dev->pa_addr = 0; + dev->pa_brdaddr = 0; + dev->pa_dstaddr = 0; + dev->pa_mask = 0; + dev->pa_alen = 4; + + return 0; +} + +static int sit_init_vif(struct device *dev) +{ + int i; + + dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST; + dev->priv = kmalloc(sizeof(struct enet_statistics), GFP_KERNEL); + + if (dev->priv == NULL) + return -ENOMEM; + + memset(dev->priv, 0, sizeof(struct enet_statistics)); + + for (i = 0; i < DEV_NUMBUFFS; i++) + skb_queue_head_init(&dev->buffs[i]); + + return 0; +} + +static int sit_open(struct device *dev) +{ + return 0; +} + +static int sit_close(struct device *dev) +{ + return 0; +} + + +int sit_init(void) +{ + int i; + + /* register device */ + + if (register_netdev(&sit_device) != 0) + { + return -EIO; + } + + inet_add_protocol(&sit_protocol); + + for (i=0; i < SIT_NUM_BUCKETS; i++) + sit_mtu_cache[i] = NULL; + + sit_gc_last_run = jiffies; + + return 0; +} + +struct device *sit_add_tunnel(__u32 dstaddr) +{ + struct sit_vif *vif; + struct device *dev; + + vif = kmalloc(sizeof(struct sit_vif), GFP_KERNEL); + if (vif == NULL) + return NULL; + + /* + * Create PtoP configured tunnel + */ + + dev = kmalloc(sizeof(struct device), GFP_KERNEL); + if (dev == NULL) + return NULL; + + memcpy(dev, &sit_device, sizeof(struct device)); + dev->init = sit_init_vif; + dev->pa_dstaddr = dstaddr; + + dev->name = vif->name; + sprintf(vif->name, "sit%d", ++vif_num); + + register_netdev(dev); + + vif->dev = dev; + vif->next = vif_list; + vif_list = vif; + + return dev; +} + +void sit_cleanup(void) +{ + struct sit_vif *vif; + + for (vif = vif_list; vif;) + { + struct device *dev = vif->dev; + struct sit_vif *cur; + + unregister_netdev(dev); + kfree(dev->priv); + kfree(dev); + + cur = vif; + vif = vif->next; + } + + vif_list = NULL; + + unregister_netdev(&sit_device); + inet_del_protocol(&sit_protocol); + +} + + + +/* + * receive IPv4 ICMP messages + */ + +static void sit_err(int type, int code, unsigned char *buff, __u32 info, + __u32 daddr, __u32 saddr, struct inet_protocol *protocol) + +{ + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + { + struct sit_mtu_info *minfo; + + info -= sizeof(struct iphdr); + + minfo = sit_mtu_lookup(daddr); + + printk(KERN_DEBUG "sit: %08lx pmtu = %ul\n", ntohl(saddr), + info); + if (minfo == NULL) + { + minfo = kmalloc(sizeof(struct sit_mtu_info), + GFP_ATOMIC); + + if (minfo == NULL) + return; + + start_bh_atomic(); + sit_cache_insert(daddr, info); + end_bh_atomic(); + } + else + { + minfo->mtu = info; + } + } +} + +static int sit_rcv(struct sk_buff *skb, struct device *idev, + struct options *opt, + __u32 daddr, unsigned short len, + __u32 saddr, int redo, struct inet_protocol * protocol) +{ + struct enet_statistics *stats; + struct device *dev = NULL; + struct sit_vif *vif; + + skb->h.raw = skb_pull(skb, skb->h.raw - skb->data); + skb->protocol = __constant_htons(ETH_P_IPV6); + + for (vif = vif_list; vif; vif = vif->next) + { + if (saddr == vif->dev->pa_dstaddr) + { + dev = vif->dev; + break; + } + } + + if (dev == NULL) + { + dev = &sit_device; + } + + skb->dev = dev; + skb->ip_summed = CHECKSUM_NONE; + + stats = (struct enet_statistics *)dev->priv; + stats->rx_packets++; + + ipv6_rcv(skb, dev, NULL); + return 0; +} + +static int sit_xmit(struct sk_buff *skb, struct device *dev) +{ + struct enet_statistics *stats; + struct sit_mtu_info *minfo; + struct in6_addr *addr6; + unsigned long flags; + struct rtable *rt; + struct iphdr *iph; + __u32 saddr; + __u32 daddr; + __u32 raddr; + int addr_type; + int mtu; + int len; + + /* + * Make sure we are not busy (check lock variable) + */ + + stats = (struct enet_statistics *)dev->priv; + save_flags(flags); + cli(); + if (dev->tbusy != 0) + { + restore_flags(flags); + printk(KERN_DEBUG "sit_xmit: busy\n"); + return(1); + } + dev->tbusy = 1; + restore_flags(flags); + + daddr = dev->pa_dstaddr; + if (daddr == 0) + { + addr6 = &skb->ipv6_hdr->daddr; + addr_type = ipv6_addr_type(addr6); + + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + { + printk(KERN_DEBUG "sit_xmit: non v4 address\n"); + goto on_error; + } + } + + len = skb->tail - (skb->data + sizeof(struct ipv6hdr)); + + if (skb->sk) + { + atomic_sub(skb->truesize, &skb->sk->wmem_alloc); + } + + skb->sk = NULL; + + iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr)); + + skb->protocol = htons(ETH_P_IP); + + /* get route */ + + rt = ip_rt_route(daddr, skb->localroute); + + if (rt == NULL) + { + printk(KERN_DEBUG "sit: no route to host\n"); + goto on_error; + } + + minfo = sit_mtu_lookup(daddr); + + if (minfo) + mtu = minfo->mtu; + else + mtu = rt->rt_dev->mtu; + + if (mtu > 576 && len > mtu) + { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + goto on_error; + } + + saddr = rt->rt_src; + skb->dev = rt->rt_dev; + raddr = rt->rt_gateway; + + if (raddr == 0) + raddr = daddr; + + /* now for the device header */ + + skb->arp = 1; + + if (skb->dev->hard_header_len) + { + int mac; + + if (skb->data - skb->head < skb->dev->hard_header_len) + { + printk(KERN_DEBUG "sit: space at head < dev header\n"); + goto on_error; + } + + if (skb->dev->hard_header) + { + mac = skb->dev->hard_header(skb, skb->dev, ETH_P_IP, + NULL, NULL, len); + + if (mac < 0) + skb->arp = 0; + + skb->raddr = raddr; + } + + } + + ip_rt_put(rt); + + + iph->version = 4; + iph->ihl = 5; + iph->tos = 0; /* tos set to 0... */ + + if (mtu > 576) + { + iph->frag_off = htons(IP_DF); + } + else + iph->frag_off = 0; + + iph->ttl = 64; + iph->saddr = saddr; + iph->daddr = daddr; + iph->protocol = IPPROTO_IPV6; + skb->ip_hdr = iph; + + ip_send_check(iph); + + ip_queue_xmit(NULL, skb->dev, skb, 1); + + stats->tx_packets++; + dev->tbusy=0; + + return 0; + + on_error: + kfree_skb(skb, FREE_WRITE); + dev->tbusy=0; + stats->tx_errors++; + return 0; +} + +static struct enet_statistics *sit_get_stats(struct device *dev) +{ + return((struct enet_statistics*) dev->priv); +} + + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o sit.o sit.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/sysctl_net_ipv6.c linux/net/ipv6/sysctl_net_ipv6.c --- v2.1.7/linux/net/ipv6/sysctl_net_ipv6.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/sysctl_net_ipv6.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,78 @@ +/* + * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem. + */ + +#include +#include +#include +#include +#include +#include +#include + + +int ipv6_hop_limit = IPV6_DEFAULT_HOPLIMIT; + +int ipv6_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp, + void *buffer, size_t *lenp) +{ + int val = ipv6_forwarding; + int retv; + + retv = proc_dointvec(ctl, write, filp, buffer, lenp); + + if (write) + { + if (ipv6_forwarding && val == 0) { + printk(KERN_DEBUG "sysctl: IPv6 forwarding enabled\n"); + ndisc_forwarding_on(); + addrconf_forwarding_on(); + } + + if (ipv6_forwarding == 0 && val) { + ndisc_forwarding_off(); + } + } + return retv; +} + +ctl_table ipv6_table[] = { + {NET_IPV6_FORWARDING, "ipv6_forwarding", + &ipv6_forwarding, sizeof(int), 0644, NULL, + &ipv6_sysctl_forwarding}, + + {NET_IPV6_HOPLIMIT, "ipv6_hop_limit", + &ipv6_hop_limit, sizeof(int), 0644, NULL, + &proc_dointvec}, + + {0} +}; + +#ifdef MODULE +static struct ctl_table_header *ipv6_sysctl_header; +static struct ctl_table ipv6_root_table[]; +static struct ctl_table ipv6_net_table[]; + + +ctl_table ipv6_root_table[] = { + {CTL_NET, "net", NULL, 0, 0555, ipv6_net_table}, + {0} +}; + +ctl_table ipv6_net_table[] = { + {NET_IPV6, "ipv6", NULL, 0, 0555, ipv6_table}, + {0} +}; + +void ipv6_sysctl_register(void) +{ + ipv6_sysctl_header = register_sysctl_table(ipv6_root_table, 0); +} + +void ipv6_sysctl_unregister(void) +{ + unregister_sysctl_table(ipv6_sysctl_header); +} + +#endif + diff -u --recursive --new-file v2.1.7/linux/net/ipv6/tcp_ipv6.c linux/net/ipv6/tcp_ipv6.c --- v2.1.7/linux/net/ipv6/tcp_ipv6.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/tcp_ipv6.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,1225 @@ +/* + * TCP over IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * $Id: tcp_ipv6.c,v 1.15 1996/10/29 22:45:53 roque Exp $ + * + * Based on: + * linux/net/ipv4/tcp.c + * linux/net/ipv4/tcp_input.c + * linux/net/ipv4/tcp_output.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr, + struct tcphdr *th, struct proto *prot, + struct ipv6_options *opt, + struct device *dev, int pri, int hop_limit); + +static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, + struct sk_buff *skb); + +static int tcp_v6_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tcp_v6_build_header(struct sock *sk, struct sk_buff *skb); + +static struct tcp_func ipv6_mapped; +static struct tcp_func ipv6_specific; + +static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, + struct in6_addr *saddr, + struct in6_addr *daddr, + unsigned long base) +{ + return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); +} + +static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) +{ + __u32 si; + __u32 di; + + if (skb->protocol == __constant_htons(ETH_P_IPV6)) + { + si = skb->ipv6_hdr->saddr.s6_addr32[3]; + di = skb->ipv6_hdr->daddr.s6_addr32[3]; + } + else + { + si = skb->saddr; + di = skb->daddr; + } + + return secure_tcp_sequence_number(di, si, + skb->h.th->dest, + skb->h.th->source); +} + +static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct dest_entry *dc; + struct inet6_ifaddr *ifa; + struct tcphdr *th; + __u8 *ptr; + struct sk_buff *buff; + struct sk_buff *skb1; + int addr_type; + int tmp; + + if (sk->state != TCP_CLOSE) + return(-EISCONN); + + /* + * Don't allow a double connect. + */ + + if(!ipv6_addr_any(&np->daddr)) + return -EINVAL; + + if (addr_len < sizeof(struct sockaddr_in6)) + return(-EINVAL); + + if (usin->sin6_family && usin->sin6_family != AF_INET6) + return(-EAFNOSUPPORT); + + /* + * connect() to INADDR_ANY means loopback (BSD'ism). + */ + + if(ipv6_addr_any(&usin->sin6_addr)) + usin->sin6_addr.s6_addr[15] = 0x1; + + addr_type = ipv6_addr_type(&usin->sin6_addr); + + if(addr_type & IPV6_ADDR_MULTICAST) + { + return -ENETUNREACH; + } + + /* + * connect to self not allowed + */ + + if (ipv6_addr_cmp(&usin->sin6_addr, &np->saddr) == 0 && + usin->sin6_port == sk->dummy_th.source) + { + return (-EINVAL); + } + + memcpy(&np->daddr, &usin->sin6_addr, sizeof(struct in6_addr)); + + /* + * TCP over IPv4 + */ + + if (addr_type == IPV6_ADDR_MAPPED) + { + struct sockaddr_in sin; + int err; + + printk(KERN_DEBUG "connect: ipv4 mapped\n"); + + sin.sin_family = AF_INET; + sin.sin_port = usin->sin6_port; + sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; + + sk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped; + sk->backlog_rcv = tcp_v4_backlog_rcv; + + err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); + + if (err) + { + sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific; + sk->backlog_rcv = tcp_v6_backlog_rcv; + } + + return err; + } + + dc = ipv6_dst_route(&np->daddr, NULL, (sk->localroute ? RTI_GATEWAY : 0)); + + if (dc == NULL) + { + return -ENETUNREACH; + } + + np->dest = dc; + np->dc_sernum = (dc->rt.fib_node ? dc->rt.fib_node->fn_sernum : 0); + + ifa = ipv6_get_saddr((struct rt6_info *)dc, &np->daddr); + + if (ifa == NULL) + { + return -ENETUNREACH; + } + + + /* + * Init variables + */ + + lock_sock(sk); + + sk->dummy_th.dest = usin->sin6_port; + sk->write_seq = secure_tcp_sequence_number(np->saddr.s6_addr32[3], + np->daddr.s6_addr32[3], + sk->dummy_th.source, + sk->dummy_th.dest); + + tp->snd_wnd = 0; + tp->snd_wl1 = 0; + tp->snd_wl2 = sk->write_seq; + tp->snd_una = sk->write_seq; + + tp->rcv_nxt = 0; + + sk->err = 0; + + release_sock(sk); + + buff = sock_wmalloc(sk, MAX_SYN_SIZE, 0, GFP_KERNEL); + + if (buff == NULL) + { + return(-ENOMEM); + } + lock_sock(sk); + buff->sk = sk; + buff->free = 0; + buff->localroute = sk->localroute; + + tmp = tcp_v6_build_header(sk, buff); + + /* set the source address */ + + memcpy(&np->saddr, &ifa->addr, sizeof(struct in6_addr)); + memcpy(&np->rcv_saddr, &ifa->addr, sizeof(struct in6_addr)); + + /* build the tcp header */ + th = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr)); + buff->h.th = th; + + memcpy(th, (void *) &(sk->dummy_th), sizeof(*th)); + buff->seq = sk->write_seq++; + th->seq = htonl(buff->seq); + tp->snd_nxt = sk->write_seq; + buff->end_seq = sk->write_seq; + th->ack = 0; + th->window = 2; + th->syn = 1; + th->doff = 6; + + sk->window_clamp=0; + + if ((dc->dc_flags & DCF_PMTU)) + sk->mtu = dc->dc_pmtu; + else + sk->mtu = dc->rt.rt_dev->mtu; + + sk->mss = sk->mtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr); + + /* + * Put in the TCP options to say MTU. + */ + + ptr = skb_put(buff,4); + ptr[0] = 2; + ptr[1] = 4; + ptr[2] = (sk->mss) >> 8; + ptr[3] = (sk->mss) & 0xff; + buff->csum = csum_partial(ptr, 4, 0); + + tcp_v6_send_check(sk, th, sizeof(struct tcphdr) + 4, buff); + + tcp_set_state(sk, TCP_SYN_SENT); + + /* FIXME: should use dcache->rtt if availiable */ + tp->rto = TCP_TIMEOUT_INIT; + + tcp_init_xmit_timers(sk); + + sk->retransmits = 0; + + skb_queue_tail(&sk->write_queue, buff); + sk->packets_out++; + buff->when = jiffies; + skb1 = skb_clone(buff, GFP_KERNEL); + sk->wmem_alloc += skb1->truesize; + + tmp = ipv6_xmit(sk, skb1, &np->saddr, &np->daddr, NULL, IPPROTO_TCP); + + /* Timer for repeating the SYN until an answer */ + + tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); + tcp_statistics.TcpActiveOpens++; + tcp_statistics.TcpOutSegs++; + + release_sock(sk); + + return(tmp); +} + +static int tcp_v6_sendmsg(struct sock *sk, struct msghdr *msg, + int len, int nonblock, int flags) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + int retval = -EINVAL; + + /* + * Do sanity checking for sendmsg/sendto/send + */ + + if (flags & ~(MSG_OOB|MSG_DONTROUTE)) + goto out; + if (msg->msg_name) { + struct sockaddr_in6 *addr=(struct sockaddr_in6 *)msg->msg_name; + + if (msg->msg_namelen < sizeof(*addr)) + goto out; + + if (addr->sin6_family && addr->sin6_family != AF_INET6) + goto out; + retval = -ENOTCONN; + + if(sk->state == TCP_CLOSE) + goto out; + retval = -EISCONN; + if (addr->sin6_port != sk->dummy_th.dest) + goto out; + if (ipv6_addr_cmp(&addr->sin6_addr, &np->daddr)) + goto out; + } + + lock_sock(sk); + retval = tcp_do_sendmsg(sk, msg->msg_iovlen, msg->msg_iov, + len, nonblock, flags); + + release_sock(sk); + +out: + return retval; +} + +void tcp_v6_err(int type, int code, unsigned char *header, __u32 info, + struct in6_addr *saddr, struct in6_addr *daddr, + struct inet6_protocol *protocol) +{ + struct tcphdr *th = (struct tcphdr *)header; + struct ipv6_pinfo *np; + struct sock *sk; + int err; + int opening; + + sk = inet6_get_sock(&tcpv6_prot, daddr, saddr, th->source, th->dest); + + if (sk == NULL) + { + return; + } + + np = &sk->net_pinfo.af_inet6; + + if (type == ICMPV6_PKT_TOOBIG) + { + /* icmp should have updated the destination cache entry */ + + np->dest = ipv6_dst_check(np->dest, &np->daddr, np->dc_sernum, + 0); + + np->dc_sernum = (np->dest->rt.fib_node ? + np->dest->rt.fib_node->fn_sernum : 0); + + if (np->dest->dc_flags & DCF_PMTU) + sk->mtu = np->dest->dc_pmtu; + + sk->mtu = (sk->mtu - sizeof(struct ipv6hdr) - + sizeof(struct tcphdr)); + + return; + } + + opening = (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV); + + if (icmpv6_err_convert(type, code, &err) || opening) + { + sk->err = err; + if (opening) + { + tcp_statistics.TcpAttemptFails++; + tcp_set_state(sk,TCP_CLOSE); + sk->error_report(sk); + } + } + else + sk->err_soft = err; +} + + +static void tcp_v6_send_synack(struct sock *sk, struct open_request *req) +{ + struct tcp_v6_open_req *af_req = (struct tcp_v6_open_req *) req; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct sk_buff * skb; + struct tcphdr *th; + unsigned char *ptr; + struct dest_entry *dc; + int mss; + + skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC); + + if (skb == NULL) + { + return; + } + + skb_reserve(skb, (MAX_HEADER + 15) & ~15); + skb->ipv6_hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + + dc = ipv6_dst_route(&af_req->rmt_addr, af_req->dev, 0); + + skb->dev = af_req->dev; + + if (dc) + { + if (dc->dc_flags & DCF_PMTU) + mss = dc->dc_pmtu; + else + mss = dc->dc_nexthop->dev->mtu; + mss -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + + ipv6_dst_unlock(dc); + } + else + mss = 516; + + th =(struct tcphdr *) skb_put(skb, sizeof(struct tcphdr)); + skb->h.th = th; + memset(th, 0, sizeof(struct tcphdr)); + + th->syn = 1; + th->ack = 1; + + th->source = sk->dummy_th.source; + th->dest = req->rmt_port; + + skb->seq = req->snt_isn; + skb->end_seq = skb->seq + 1; + + th->seq = ntohl(skb->seq); + th->ack_seq = htonl(req->rcv_isn + 1); + th->doff = sizeof(*th)/4 + 1; + + th->window = ntohs(tp->rcv_wnd); + + ptr = skb_put(skb, TCPOLEN_MSS); + ptr[0] = TCPOPT_MSS; + ptr[1] = TCPOLEN_MSS; + ptr[2] = (mss >> 8) & 0xff; + ptr[3] = mss & 0xff; + skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0); + + th->check = tcp_v6_check(th, sizeof(*th) + TCPOLEN_MSS, &af_req->loc_addr, + &af_req->rmt_addr, + csum_partial((char *)th, sizeof(*th), skb->csum)); + + ipv6_xmit(sk, skb, &af_req->loc_addr, &af_req->rmt_addr, af_req->opt, + IPPROTO_TCP); + + tcp_statistics.TcpOutSegs++; + +} + +static void tcp_v6_or_free(struct open_request *req) +{ +} + +static struct or_calltable or_ipv6 = { + tcp_v6_send_synack, + tcp_v6_or_free +}; + +static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, + __u32 isn) +{ + struct tcp_v6_open_req *af_req; + struct open_request *req; + + /* If the socket is dead, don't accept the connection. */ + if (sk->dead) + { + if(sk->debug) + { + printk("Reset on %p: Connect on dead socket.\n",sk); + } + tcp_statistics.TcpAttemptFails++; + return -ENOTCONN; + } + + if (skb->protocol == __constant_htons(ETH_P_IP)) + { + return tcp_v4_conn_request(sk, skb, ptr, isn); + } + + /* + * There are no SYN attacks on IPv6, yet... + */ + if (sk->ack_backlog >= sk->max_ack_backlog) + { + printk(KERN_DEBUG "droping syn ack:%d max:%d\n", + sk->ack_backlog, sk->max_ack_backlog); + tcp_statistics.TcpAttemptFails++; + goto exit; + } + + af_req = kmalloc(sizeof(struct tcp_v6_open_req), GFP_ATOMIC); + + if (af_req == NULL) + { + tcp_statistics.TcpAttemptFails++; + goto exit; + } + + sk->ack_backlog++; + req = (struct open_request *) af_req; + + memset(af_req, 0, sizeof(struct tcp_v6_open_req)); + + req->rcv_isn = skb->seq; + req->snt_isn = isn; + + /* mss */ + req->mss = tcp_parse_options(skb->h.th); + + if (!req->mss) + { + req->mss = 536; + } + + req->rmt_port = skb->h.th->source; + + ipv6_addr_copy(&af_req->rmt_addr, &skb->ipv6_hdr->saddr); + ipv6_addr_copy(&af_req->loc_addr, &skb->ipv6_hdr->daddr); + + /* FIXME: options */ + + /* keep incoming device so that link locals have meaning */ + af_req->dev = skb->dev; + + req->class = &or_ipv6; + + tcp_v6_send_synack(sk, req); + + req->expires = jiffies + TCP_TIMEOUT_INIT; + tcp_inc_slow_timer(TCP_SLT_SYNACK); + tcp_synq_queue(&sk->tp_pinfo.af_tcp, req); + + sk->data_ready(sk, 0); + + exit: + kfree_skb(skb, FREE_READ); + return 0; +} + +static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, + struct sk_buff *skb) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + th->check = 0; + + th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, + csum_partial((char *)th, sizeof(*th), + skb->csum)); +} + +static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct open_request *req) +{ + struct tcp_v6_open_req *af_req = (struct tcp_v6_open_req *) req; + struct ipv6_pinfo *np; + struct dest_entry *dc; + struct tcp_opt *newtp; + struct sock *newsk; + + + if (skb->protocol == __constant_htons(ETH_P_IP)) + { + /* + * v6 mapped + */ + + newsk = tcp_v4_syn_recv_sock(sk, skb, req); + + if (newsk == NULL) + return NULL; + + np = &newsk->net_pinfo.af_inet6; + + ipv6_addr_set(&np->daddr, 0, 0, __constant_htonl(0x0000FFFF), + newsk->daddr); + + ipv6_addr_set(&np->saddr, 0, 0, __constant_htonl(0x0000FFFF), + newsk->saddr); + + ipv6_addr_copy(&np->rcv_saddr, &np->saddr); + + newsk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped; + newsk->backlog_rcv = tcp_v4_backlog_rcv; + + return newsk; + } + + newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC); + if (newsk == NULL) + { + return NULL; + } + + memcpy(newsk, sk, sizeof(*newsk)); + newsk->opt = NULL; + newsk->ip_route_cache = NULL; + skb_queue_head_init(&newsk->write_queue); + skb_queue_head_init(&newsk->receive_queue); + skb_queue_head_init(&newsk->out_of_order_queue); + + /* + * Unused + */ + + newsk->send_head = NULL; + newsk->send_tail = NULL; + + newtp = &(newsk->tp_pinfo.af_tcp); + np = &newsk->net_pinfo.af_inet6; + + newtp->send_head = NULL; + newtp->retrans_head = NULL; + + newtp->pending = 0; + + skb_queue_head_init(&newsk->back_log); + + newsk->prot->init(newsk); + + newsk->cong_count = 0; + newsk->ssthresh = 0; + newtp->backoff = 0; + newsk->blog = 0; + newsk->intr = 0; + newsk->proc = 0; + newsk->done = 0; + newsk->partial = NULL; + newsk->pair = NULL; + newsk->wmem_alloc = 0; + newsk->rmem_alloc = 0; + newsk->localroute = sk->localroute; + + newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; + + newsk->err = 0; + newsk->shutdown = 0; + newsk->ack_backlog = 0; + + newsk->fin_seq = req->rcv_isn; + newsk->syn_seq = req->rcv_isn; + newsk->state = TCP_SYN_RECV; + newsk->timeout = 0; + newsk->ip_xmit_timeout = 0; + + newsk->write_seq = req->snt_isn; + + newtp->snd_wnd = ntohs(skb->h.th->window); + newsk->max_window = newtp->snd_wnd; + newtp->snd_wl1 = req->rcv_isn; + newtp->snd_wl2 = newsk->write_seq; + newtp->snd_una = newsk->write_seq++; + newtp->snd_nxt = newsk->write_seq; + + newsk->urg_data = 0; + newsk->packets_out = 0; + newsk->retransmits = 0; + newsk->linger=0; + newsk->destroy = 0; + init_timer(&newsk->timer); + newsk->timer.data = (unsigned long) newsk; + newsk->timer.function = &net_timer; + + tcp_init_xmit_timers(newsk); + + newsk->dummy_th.source = sk->dummy_th.source; + newsk->dummy_th.dest = req->rmt_port; + + newtp->rcv_nxt = req->rcv_isn + 1; + newtp->rcv_wup = req->rcv_isn + 1; + newsk->copied_seq = req->rcv_isn + 1; + + newsk->socket = NULL; + + ipv6_addr_copy(&np->daddr, &af_req->rmt_addr); + ipv6_addr_copy(&np->saddr, &af_req->loc_addr); + ipv6_addr_copy(&np->rcv_saddr, &af_req->loc_addr); + + /* + * options / mss + */ + + dc = ipv6_dst_route(&af_req->rmt_addr, af_req->dev, 0); + np->dest = dc; + + if (np->dest && (np->dest->dc_flags & DCF_PMTU)) + newsk->mtu = np->dest->dc_pmtu; + else + newsk->mtu = af_req->dev->mtu; + + newsk->mss = min(req->mss, (newsk->mtu - sizeof(struct ipv6hdr) - + sizeof(struct tcphdr))); + + newsk->daddr = LOOPBACK4_IPV6; + newsk->saddr = LOOPBACK4_IPV6; + newsk->rcv_saddr= LOOPBACK4_IPV6; + + inet_put_sock(newsk->num, newsk); + + return newsk; + +} + +static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr, + struct tcphdr *th, struct proto *prot, + struct ipv6_options *opt, + struct device *dev, int pri, int hop_limit) +{ + struct sk_buff *buff; + struct tcphdr *t1; + + if(th->rst) + return; + + /* + * We need to grab some memory, and put together an RST, + * and then put it into the queue to be sent. + */ + + buff = alloc_skb(MAX_RESET_SIZE, GFP_ATOMIC); + if (buff == NULL) + return; + + buff->sk = NULL; + buff->dev = dev; + buff->localroute = 0; + + tcp_v6_build_header(NULL, buff); + + t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr)); + memset(t1, 0, sizeof(*t1)); + + /* + * Swap the send and the receive. + */ + + t1->dest = th->source; + t1->source = th->dest; + t1->doff = sizeof(*t1)/4; + t1->rst = 1; + + if(th->ack) + { + t1->seq = th->ack_seq; + } + else + { + t1->ack = 1; + if(!th->syn) + t1->ack_seq = th->seq; + else + t1->ack_seq = htonl(ntohl(th->seq)+1); + } + + buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); + + t1->check = csum_ipv6_magic(saddr, daddr, sizeof(*t1), IPPROTO_TCP, + buff->csum); + + + ipv6_xmit(NULL, buff, saddr, daddr, NULL, IPPROTO_TCP); + + tcp_statistics.TcpOutSegs++; +} + +int tcp_v6_rcv(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + struct ipv6_options *opt, unsigned short len, + int redo, struct inet6_protocol *protocol) +{ + struct tcphdr *th; + struct sock *sk; + + /* + * "redo" is 1 if we have already seen this skb but couldn't + * use it at that time (the socket was locked). In that case + * we have already done a lot of the work (looked up the socket + * etc). + */ + + th = skb->h.th; + + sk = skb->sk; + + if (!redo) + { + + if (skb->pkt_type != PACKET_HOST) + goto discard_it; + + /* + * Pull up the IP header. + */ + + skb_pull(skb, skb->h.raw - skb->data); + + /* + * Try to use the device checksum if provided. + */ + + switch (skb->ip_summed) + { + case CHECKSUM_NONE: + skb->csum = csum_partial((char *)th, len, 0); + case CHECKSUM_HW: + if (tcp_v6_check(th,len,saddr,daddr,skb->csum)) + { + printk(KERN_DEBUG "tcp csum failed\n"); + goto discard_it; + } + default: + /* CHECKSUM_UNNECESSARY */ + } + + sk = inet6_get_sock(&tcpv6_prot, daddr, saddr, + th->dest, th->source); + + if (!sk) + { + printk(KERN_DEBUG "socket not found\n"); + goto no_tcp_socket; + } + + skb->sk = sk; + skb->seq = ntohl(th->seq); + skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4; + skb->ack_seq = ntohl(th->ack_seq); + + skb->acked = 0; + skb->used = 0; + skb->free = 1; + } + + /* + * We may need to add it to the backlog here. + */ + + if (sk->users) + { + __skb_queue_tail(&sk->back_log, skb); + return(0); + } + + /* + * Signal NDISC that the connection is making + * "forward progress" + */ + if (sk->state != TCP_LISTEN) + { + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); + + if (after(skb->seq, tp->rcv_nxt) || + after(skb->ack_seq, tp->snd_una)) + { + if (np->dest) + ndisc_validate(np->dest->dc_nexthop); + } + } + + if (!sk->prot) + { + printk(KERN_DEBUG "tcp_rcv: sk->prot == NULL\n"); + return(0); + } + + atomic_add(skb->truesize, &sk->rmem_alloc); + + if (sk->state == TCP_ESTABLISHED) + { + tcp_rcv_established(sk, skb, th, len); + return 0; + } + + if (sk->state == TCP_LISTEN) + { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + struct open_request *req; + struct tcp_v6_open_req *af_req; + + req = tp->syn_wait_queue; + af_req = (struct tcp_v6_open_req *) req; + + if (req) + { + do { + if (!ipv6_addr_cmp(&af_req->rmt_addr, saddr) && + !ipv6_addr_cmp(&af_req->loc_addr, daddr) && + req->rmt_port == th->source) + { + /* match */ + + atomic_sub(skb->truesize, &sk->rmem_alloc); + sk = tp->af_specific->syn_recv_sock(sk, skb, + req); + tcp_dec_slow_timer(TCP_SLT_SYNACK); + + if (sk == NULL) + { + goto no_tcp_socket; + } + + atomic_add(skb->truesize, &sk->rmem_alloc); + req->sk = sk; + skb->sk = sk; + break; + } + + req = req->dl_next; + } while (req != tp->syn_wait_queue); + } + + } + + if (tcp_rcv_state_process(sk, skb, th, opt, len) == 0) + return 0; + +no_tcp_socket: + + /* + * No such TCB. If th->rst is 0 send a reset + * (checked in tcp_send_reset) + */ + + tcp_v6_send_reset(daddr, saddr, th, &tcpv6_prot, opt, dev, + skb->ipv6_hdr->priority, 255); + +discard_it: + + /* + * Discard frame + */ + + kfree_skb(skb, FREE_READ); + return 0; + +} + +static int tcp_v6_rebuild_header(struct sock *sk, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + + if (np->dest) + { + np->dest = ipv6_dst_check(np->dest, &np->daddr, + np->dc_sernum, 0); + + } + else + { + np->dest = ipv6_dst_route(&np->daddr, NULL, 0); + } + + if (!np->dest) + { + /* + * lost route to destination + */ + return -1; + } + + np->dc_sernum = (np->dest->rt.fib_node ? + np->dest->rt.fib_node->fn_sernum : 0); + + ipv6_redo_mac_hdr(skb, np->dest->dc_nexthop, + skb->tail - (u8*) skb->ipv6_hdr); + return 0; +} + +static int tcp_v6_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int res; + + res = tcp_v6_rcv(skb, skb->dev, + &skb->ipv6_hdr->saddr, &skb->ipv6_hdr->daddr, + (struct ipv6_options *) skb->proto_priv, + skb->len, 1, + (struct inet6_protocol *) sk->pair); + return res; +} + +static struct sock * tcp_v6_get_sock(struct sk_buff *skb, struct tcphdr *th) +{ + struct in6_addr *saddr; + struct in6_addr *daddr; + struct sock *sk; + + saddr = &skb->ipv6_hdr->saddr; + daddr = &skb->ipv6_hdr->daddr; + + sk = inet6_get_sock(&tcpv6_prot, daddr, saddr, th->source, th->dest); + + return sk; +} + +static int tcp_v6_build_header(struct sock *sk, struct sk_buff *skb) +{ + skb_reserve(skb, (MAX_HEADER + 15) & ~15); + skb->ipv6_hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + + /* + * FIXME: reserve space for option headers + * length member of np->opt + */ + + return 0; +} + +static void tcp_v6_xmit(struct sock *sk, struct device *dev, struct sk_buff *skb, + int free) +{ + struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6; + int err; + + err = ipv6_xmit(sk, skb, &np->saddr, &np->daddr, NULL, IPPROTO_TCP); + + /* + * FIXME: check error handling. + */ + + sk->err_soft = err; +} + + + +static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +{ + struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; + + sin6->sin6_family = AF_INET6; + memcpy(&sin6->sin6_addr, &np->daddr, sizeof(struct in6_addr)); + sin6->sin6_port = sk->dummy_th.dest; + +} + +static struct tcp_func ipv6_specific = { + tcp_v6_build_header, + tcp_v6_xmit, + tcp_v6_send_check, + tcp_v6_rebuild_header, + tcp_v6_conn_request, + tcp_v6_syn_recv_sock, + tcp_v6_init_sequence, + tcp_v6_get_sock, + ipv6_setsockopt, + ipv6_getsockopt, + v6_addr2sockaddr, + sizeof(struct sockaddr_in6) +}; + +/* + * TCP over IPv4 via INET6 API + */ + +static struct tcp_func ipv6_mapped = { + tcp_v4_build_header, + ip_queue_xmit, + tcp_v4_send_check, + tcp_v4_rebuild_header, + tcp_v6_conn_request, + tcp_v6_syn_recv_sock, + tcp_v6_init_sequence, + tcp_v6_get_sock, + ipv6_setsockopt, + ipv6_getsockopt, + v6_addr2sockaddr, + sizeof(struct sockaddr_in6) +}; + +static int tcp_v6_init_sock(struct sock *sk) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + skb_queue_head_init(&sk->out_of_order_queue); + tcp_init_xmit_timers(sk); + + tp->srtt = 0; + tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/ + tp->mdev = TCP_TIMEOUT_INIT; + + tp->ato = 0; + tp->iat = (HZ/5) << 3; + + tp->rcv_wnd = 8192; + + /* start with only sending one packet at a time. */ + sk->cong_window = 1; + sk->ssthresh = 0x7fffffff; + + sk->priority = 1; + sk->state = TCP_CLOSE; + + /* this is how many unacked bytes we will accept for this socket. */ + sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ + sk->max_ack_backlog = SOMAXCONN; + + sk->mtu = 576; + sk->mss = 516; + + sk->dummy_th.doff = sizeof(sk->dummy_th)/4; + + + /* + * Speed up by setting some standard state for the dummy_th + * if TCP uses it (maybe move to tcp_init later) + */ + + sk->dummy_th.ack=1; + sk->dummy_th.doff=sizeof(struct tcphdr)>>2; + + sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific; + + return 0; +} + +static int tcp_v6_destroy_sock(struct sock *sk) +{ + struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6; + struct sk_buff *skb; + + tcp_clear_xmit_timers(sk); + + if (sk->keepopen) + { + tcp_dec_slow_timer(TCP_SLT_KEEPALIVE); + } + + /* + * Cleanup up the write buffer. + */ + + while((skb = skb_dequeue(&sk->write_queue)) != NULL) { + IS_SKB(skb); + skb->free = 1; + kfree_skb(skb, FREE_WRITE); + } + + /* + * Cleans up our, hopefuly empty, out_of_order_queue + */ + + while((skb = skb_dequeue(&sk->out_of_order_queue)) != NULL) { + IS_SKB(skb); + kfree_skb(skb, FREE_READ); + } + + /* + * Release destination entry + */ + + if (np->dest) + { + ipv6_dst_unlock(np->dest); + } + + return 0; +} + + +struct proto tcpv6_prot = { + tcp_close, + tcp_v6_connect, + tcp_accept, + NULL, + tcp_write_wakeup, + tcp_read_wakeup, + tcp_select, + tcp_ioctl, + tcp_v6_init_sock, + tcp_v6_destroy_sock, + tcp_shutdown, + tcp_setsockopt, + tcp_getsockopt, + tcp_v6_sendmsg, + tcp_recvmsg, + NULL, /* No special bind() */ + tcp_v6_backlog_rcv, + 128, + 0, + "TCPv6", + 0, 0, + NULL +}; + +static struct inet6_protocol tcpv6_protocol = +{ + tcp_v6_rcv, /* TCP handler */ + tcp_v6_err, /* TCP error control */ + NULL, /* next */ + IPPROTO_TCP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "TCPv6" /* name */ +}; + + +void tcpv6_init(void) +{ + /* register inet6 protocol */ + inet6_add_protocol(&tcpv6_protocol); +} + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h -c -o tcp_ipv6.o tcp_ipv6.c" + * c-file-style: "Linux" + * End: + */ diff -u --recursive --new-file v2.1.7/linux/net/ipv6/udp.c linux/net/ipv6/udp.c --- v2.1.7/linux/net/ipv6/udp.c Thu Jan 1 02:00:00 1970 +++ linux/net/ipv6/udp.c Sun Nov 3 11:04:46 1996 @@ -0,0 +1,623 @@ +/* + * UDP over IPv6 + * Linux INET6 implementation + * + * Authors: + * Pedro Roque + * + * Based on linux/ipv4/udp.c + * + * $Id: udp.c,v 1.6 1996/10/16 18:34:16 roque Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct udp_mib udp_stats_in6; + +/* + * + */ + +int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct in6_addr *daddr; + struct dest_entry *dest; + struct ipv6_pinfo *np; + struct inet6_ifaddr *ifa; + int addr_type; + + if (addr_len < sizeof(*usin)) + return(-EINVAL); + + if (usin->sin6_family && usin->sin6_family != AF_INET6) + return(-EAFNOSUPPORT); + + addr_type = ipv6_addr_type(&usin->sin6_addr); + np = &sk->net_pinfo.af_inet6; + + if (addr_type == IPV6_ADDR_ANY) + { + /* + * connect to self + */ + usin->sin6_addr.s6_addr[15] = 0x01; + } + + daddr = &usin->sin6_addr; + + if (addr_type == IPV6_ADDR_MAPPED) + { + struct sockaddr_in sin; + int err; + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = daddr->s6_addr32[3]; + + err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin)); + + if (err < 0) + { + return err; + } + + ipv6_addr_copy(&np->daddr, daddr); + + if(ipv6_addr_any(&np->saddr)) + { + ipv6_addr_set(&np->saddr, 0, 0, + __constant_htonl(0x0000ffff), + sk->saddr); + + } + + if(ipv6_addr_any(&np->rcv_saddr)) + { + ipv6_addr_set(&np->rcv_saddr, 0, 0, + __constant_htonl(0x0000ffff), + sk->rcv_saddr); + } + + } + + ipv6_addr_copy(&np->daddr, daddr); + + /* + * Check for a route to destination an obtain the + * destination cache for it. + */ + + dest = ipv6_dst_route(daddr, NULL, sk->localroute ? RTI_GATEWAY : 0); + + np->dest = dest; + + if (dest == NULL) + return -ENETUNREACH; + + /* get the source adddress used in the apropriate device */ + + ifa = ipv6_get_saddr((struct rt6_info *) dest, daddr); + + if(ipv6_addr_any(&np->saddr)) + { + ipv6_addr_copy(&np->saddr, &ifa->addr); + } + + if(ipv6_addr_any(&np->rcv_saddr)) + { + ipv6_addr_copy(&np->rcv_saddr, &ifa->addr); + sk->rcv_saddr = 0xffffffff; + } + + sk->dummy_th.dest = usin->sin6_port; + + sk->state = TCP_ESTABLISHED; + + return(0); +} + +static void udpv6_close(struct sock *sk, unsigned long timeout) +{ + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + + lock_sock(sk); + sk->state = TCP_CLOSE; + + if (np->dest) + { + ipv6_dst_unlock(np->dest); + } + + release_sock(sk); + destroy_sock(sk); +} + +/* + * This should be easy, if there is something there we + * return it, otherwise we block. + */ + +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len, + int noblock, int flags, int *addr_len) +{ + int copied = 0; + int truesize; + struct sk_buff *skb; + int er; + + + /* + * Check any passed addresses + */ + + if (addr_len) + *addr_len=sizeof(struct sockaddr_in6); + + /* + * From here the generic datagram does a lot of the work. Come + * the finished NET3, it will do _ALL_ the work! + */ + + skb = skb_recv_datagram(sk, flags, noblock, &er); + if(skb==NULL) + return er; + + truesize = skb->tail - skb->h.raw - sizeof(struct udphdr); + copied = min(len, truesize); + + /* + * FIXME : should use udp header size info value + */ + + skb_copy_datagram_iovec(skb,sizeof(struct udphdr),msg->msg_iov,copied); + sk->stamp=skb->stamp; + + /* Copy the address. */ + if (msg->msg_name) + { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *) msg->msg_name; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = skb->h.uh->source; + + if (skb->protocol == __constant_htons(ETH_P_IP)) + { + ipv6_addr_set(&sin6->sin6_addr, 0, 0, + __constant_htonl(0xffff), skb->daddr); + } + else + { + memcpy(&sin6->sin6_addr, &skb->ipv6_hdr->saddr, + sizeof(struct in6_addr)); + + if (msg->msg_control) + { + int err; + + err = datagram_recv_ctl(sk, msg, skb); + + if (err < 0) + { + copied = err; + } + } + } + } + + skb_free_datagram(sk, skb); + return(copied); +} + +void udpv6_err(int type, int code, unsigned char *buff, __u32 info, + struct in6_addr *saddr, struct in6_addr *daddr, + struct inet6_protocol *protocol) +{ + struct sock *sk; + struct udphdr *uh; + int err; + + uh = (struct udphdr *) buff; + + sk = inet6_get_sock(&udpv6_prot, daddr, saddr, uh->source, uh->dest); + + if (sk == NULL) + { + printk(KERN_DEBUG "icmp for unkown sock\n"); + return; + } + + if (icmpv6_err_convert(type, code, &err)) + { + if(sk->bsdism && sk->state!=TCP_ESTABLISHED) + return; + + sk->err = err; + sk->error_report(sk); + } + else + sk->err_soft = err; +} + +static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +{ + + if (sock_queue_rcv_skb(sk,skb)<0) { + udp_stats_in6.UdpInErrors++; + ipv6_statistics.Ip6InDiscards++; + ipv6_statistics.Ip6InDelivers--; + skb->sk = NULL; + kfree_skb(skb, FREE_WRITE); + return 0; + } + udp_stats_in6.UdpInDatagrams++; + return 0; +} + +int udpv6_rcv(struct sk_buff *skb, struct device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + struct ipv6_options *opt, unsigned short len, + int redo, struct inet6_protocol *protocol) +{ + struct sock *sk; + struct udphdr *uh; + int ulen; + + /* + * check if the address is ours... + * I believe that this is being done in IP layer + */ + + uh = (struct udphdr *) skb->h.uh; + + ipv6_statistics.Ip6InDelivers++; + + ulen = ntohs(uh->len); + + if (ulen > len || len < sizeof(*uh)) + { + printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len); + udp_stats_in6.UdpInErrors++; + kfree_skb(skb, FREE_READ); + return(0); + } + + if (uh->check == 0) + { + printk(KERN_DEBUG "IPv6: udp checksum is 0\n"); + goto discard; + } + + switch (skb->ip_summed) { + case CHECKSUM_NONE: + skb->csum = csum_partial((char*)uh, len, 0); + case CHECKSUM_HW: + if (csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, skb->csum)) + { + printk(KERN_DEBUG "IPv6: udp checksum error\n"); + goto discard; + } + } + + len = ulen; + + /* + * Multicast receive code + */ + if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) + { + struct sock *sk2; + int lport; + + lport = ntohs(uh->dest); + sk = udpv6_prot.sock_array[lport & (SOCK_ARRAY_SIZE-1)]; + + sk = inet6_get_sock_mcast(sk, lport, uh->source, + daddr, saddr); + + if (sk) + { + sk2 = sk; + + while ((sk2 = inet6_get_sock_mcast(sk2->next, lport, + uh->source, + daddr, saddr))) + { + struct sk_buff *buff; + + buff = skb_clone(skb, GFP_ATOMIC); + + if (sock_queue_rcv_skb(sk, buff) < 0) + { + buff->sk = NULL; + kfree_skb(buff, FREE_READ); + } + } + } + if (!sk || sock_queue_rcv_skb(sk, skb) < 0) + { + skb->sk = NULL; + kfree_skb(skb, FREE_READ); + } + return 0; + } + + /* Unicast */ + + /* + * check socket cache ... must talk to Alan about his plans + * for sock caches... i'll skip this for now. + */ + + sk = inet6_get_sock(&udpv6_prot, daddr, saddr, uh->dest, uh->source); + + if (sk == NULL) + { + udp_stats_in6.UdpNoPorts++; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, + 0, dev); + + kfree_skb(skb, FREE_READ); + return(0); + } + + /* deliver */ + + if (sk->users) + { + __skb_queue_tail(&sk->back_log, skb); + } + else + { + udpv6_queue_rcv_skb(sk, skb); + } + + return(0); + + discard: + udp_stats_in6.UdpInErrors++; + kfree_skb(skb, FREE_READ); + return(0); +} + +/* + * Sending + */ + +struct udpv6fakehdr +{ + struct udphdr uh; + struct iovec *iov; + __u32 wcheck; + __u32 pl_len; + struct in6_addr *daddr; +}; + +/* + * with checksum + */ + +static void udpv6_getfrag(const void *data, struct in6_addr *addr, + char *buff, unsigned int offset, unsigned int len) +{ + struct udpv6fakehdr *udh = (struct udpv6fakehdr *) data; + char *dst; + int final = 0; + int clen = len; + + dst = buff; + + if (offset) + { + offset -= sizeof(struct udphdr); + } + else + { + dst += sizeof(struct udphdr); + final = 1; + clen -= sizeof(struct udphdr); + } + + udh->wcheck = csum_partial_copy_fromiovecend(dst, udh->iov, offset, + clen, udh->wcheck); + + if (final) + { + struct in6_addr *daddr; + + udh->wcheck = csum_partial((char *)udh, sizeof(struct udphdr), + udh->wcheck); + + if (udh->daddr) + { + daddr = udh->daddr; + } + else + { + /* + * use packet destination address + * this should improve cache locality + */ + daddr = addr + 1; + } + udh->uh.check = csum_ipv6_magic(addr, daddr, + udh->pl_len, IPPROTO_UDP, + udh->wcheck); + if (udh->uh.check == 0) + udh->uh.check = -1; + + memcpy(buff, udh, sizeof(struct udphdr)); + } +} + +static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen, + int noblock, int flags) +{ + + struct ipv6_options opt_space; + struct udpv6fakehdr udh; + struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; + struct ipv6_options *opt = NULL; + struct device *dev = NULL; + int addr_len = msg->msg_namelen; + struct in6_addr *daddr; + struct in6_addr *saddr = NULL; + int len = ulen + sizeof(struct udphdr); + int addr_type; + int err; + + + if (flags & ~MSG_DONTROUTE) + return(-EINVAL); + + if (sin6) + { + if (addr_len < sizeof(*sin6)) + return(-EINVAL); + + if (sin6->sin6_family && sin6->sin6_family != AF_INET6) + return(-EINVAL); + + if (sin6->sin6_port == 0) + return(-EINVAL); + + udh.uh.dest = sin6->sin6_port; + daddr = &sin6->sin6_addr; + + if (np->dest && ipv6_addr_cmp(daddr, &np->daddr)) + { + ipv6_dst_unlock(np->dest); + np->dest = NULL; + } + } + else + { + if (sk->state != TCP_ESTABLISHED) + return(-EINVAL); + + udh.uh.dest = sk->dummy_th.dest; + daddr = &sk->net_pinfo.af_inet6.daddr; + } + + addr_type = ipv6_addr_type(daddr); + + if (addr_type == IPV6_ADDR_MAPPED) + { + struct sockaddr_in sin; + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = daddr->s6_addr32[3]; + + return udp_sendmsg(sk, msg, len, noblock, flags); + } + + udh.daddr = NULL; + + if (msg->msg_control) + { + opt = &opt_space; + memset(opt, 0, sizeof(struct ipv6_options)); + + err = datagram_send_ctl(msg, &dev, &saddr, opt); + if (err < 0) + { + printk(KERN_DEBUG "invalid msg_control\n"); + return err; + } + + if (opt->srcrt) + { + udh.daddr = daddr; + } + } + + udh.uh.source = sk->dummy_th.source; + udh.uh.len = htons(ulen); + udh.uh.check = 0; + udh.iov = msg->msg_iov; + udh.wcheck = 0; + udh.pl_len = len; + + err = ipv6_build_xmit(sk, udpv6_getfrag, &udh, daddr, len, + saddr, dev, opt, IPPROTO_UDP, noblock); + + if (err < 0) + return err; + + udp_stats_in6.UdpOutDatagrams++; + return ulen; +} + +static struct inet6_protocol udpv6_protocol = +{ + udpv6_rcv, /* UDP handler */ + udpv6_err, /* UDP error control */ + NULL, /* next */ + IPPROTO_UDP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "UDPv6" /* name */ +}; + + +struct proto udpv6_prot = { + udpv6_close, + udpv6_connect, + NULL, + NULL, + NULL, + NULL, + datagram_select, + udp_ioctl, + NULL, + NULL, + NULL, + ipv6_setsockopt, + ipv6_getsockopt, + udpv6_sendmsg, + udpv6_recvmsg, + NULL, /* No special bind function */ + udpv6_queue_rcv_skb, + 128, + 0, + "UDP", + 0, 0, + NULL +}; + +void udpv6_init(void) +{ + inet6_add_protocol(&udpv6_protocol); +} diff -u --recursive --new-file v2.1.7/linux/net/netlink.c linux/net/netlink.c --- v2.1.7/linux/net/netlink.c Tue Oct 29 19:58:50 1996 +++ linux/net/netlink.c Sun Nov 3 11:04:43 1996 @@ -82,7 +82,8 @@ * Write a message to the kernel side of a communication link */ -static int netlink_write(struct inode * inode, struct file * file, const char * buf, int count) +static long netlink_write(struct inode * inode, struct file * file, + const char * buf, unsigned long count) { unsigned int minor = MINOR(inode->i_rdev); struct sk_buff *skb; @@ -96,7 +97,8 @@ * Read a message from the kernel side of the communication link */ -static int netlink_read(struct inode * inode, struct file * file, char * buf, int count) +static long netlink_read(struct inode * inode, struct file * file, char * buf, + unsigned long count) { unsigned int minor = MINOR(inode->i_rdev); struct sk_buff *skb; @@ -124,8 +126,8 @@ return count; } -static int netlink_lseek(struct inode * inode, struct file * file, - off_t offset, int origin) +static loff_t netlink_lseek(struct inode * inode, struct file * file, + loff_t offset, int origin) { return -ESPIPE; } diff -u --recursive --new-file v2.1.7/linux/net/netsyms.c linux/net/netsyms.c --- v2.1.7/linux/net/netsyms.c Fri Jul 19 08:24:05 1996 +++ linux/net/netsyms.c Sun Nov 3 11:04:43 1996 @@ -28,7 +28,15 @@ #include #include #include +#include #include + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include +#include +#include +#endif + #endif #ifdef CONFIG_NETLINK @@ -55,9 +63,6 @@ extern void destroy_8023_client(struct datalink_proto *); #endif -#ifdef CONFIG_DLCI_MODULE -extern int (*dlci_ioctl_hook)(unsigned int, void *); -#endif static struct symbol_table net_syms = { #include @@ -89,14 +94,10 @@ #ifdef CONFIG_INET /* Internet layer registration */ + X(get_new_socknum), X(inet_add_protocol), X(inet_del_protocol), X(rarp_ioctl_hook), - -#ifdef CONFIG_DLCI_MODULE - X(dlci_ioctl_hook), -#endif - X(init_etherdev), X(ip_rt_route), X(icmp_send), @@ -107,6 +108,79 @@ X(ip_send_check), #ifdef CONFIG_IP_FORWARD X(ip_forward), +#endif + +#ifdef CONFIG_IPV6_MODULE + /* inet functions common to v4 and v6 */ + X(inet_proto_ops), + X(inet_remove_sock), + X(inet_release), + X(inet_connect), + X(inet_accept), + X(inet_select), + X(inet_listen), + X(inet_shutdown), + X(inet_setsockopt), + X(inet_getsockopt), + X(inet_fcntl), + X(inet_sendmsg), + X(inet_recvmsg), + X(tcp_sock_array), + X(udp_sock_array), + X(destroy_sock), + X(ip_queue_xmit), + X(csum_partial), + X(ip_my_addr), + X(skb_copy), + X(dev_lockct), + X(ndisc_eth_hook), + X(memcpy_fromiovecend), + X(csum_partial_copy), + X(csum_partial_copy_fromiovecend), + X(__release_sock), + X(net_timer), + X(inet_put_sock), + /* UDP/TCP exported functions for TCPv6 */ + X(udp_ioctl), + X(udp_connect), + X(udp_sendmsg), + X(tcp_cache_zap), + X(tcp_close), + X(tcp_accept), + X(tcp_write_wakeup), + X(tcp_read_wakeup), + X(tcp_select), + X(tcp_ioctl), + X(tcp_shutdown), + X(tcp_setsockopt), + X(tcp_getsockopt), + X(tcp_recvmsg), + X(tcp_send_synack), + X(sock_wfree), + X(sock_wmalloc), + X(tcp_reset_xmit_timer), + X(tcp_parse_options), + X(tcp_rcv_established), + X(tcp_init_xmit_timers), + X(tcp_clear_xmit_timers), + X(tcp_slt_array), + X(tcp_slow_timer), + X(tcp_statistics), + X(tcp_rcv_state_process), + X(tcp_do_sendmsg), + X(tcp_v4_build_header), + X(tcp_v4_rebuild_header), + X(tcp_v4_send_check), + X(tcp_v4_conn_request), + X(tcp_v4_syn_recv_sock), + X(tcp_v4_backlog_rcv), + X(tcp_v4_connect), + X(ip_chk_addr), + X(net_reset_timer), + X(net_delete_timer), + X(udp_prot), + X(tcp_prot), + X(ipv4_specific), #endif #if defined(CONFIG_ULTRA) || defined(CONFIG_WD80x3) || \ diff -u --recursive --new-file v2.1.7/linux/net/protocols.c linux/net/protocols.c --- v2.1.7/linux/net/protocols.c Sat Mar 30 13:20:34 1996 +++ linux/net/protocols.c Sun Nov 3 11:04:43 1996 @@ -15,9 +15,14 @@ #ifdef CONFIG_UNIX #include #endif + #ifdef CONFIG_INET #include +#ifdef CONFIG_IPV6 +extern void inet6_proto_init(struct net_proto *pro); #endif +#endif /* INET */ + #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE) #include #include @@ -67,6 +72,9 @@ #endif #ifdef CONFIG_INET { "INET", inet_proto_init }, /* TCP/IP */ +#ifdef CONFIG_IPV6 + { "INET6", inet6_proto_init}, /* IPv6 */ +#endif #endif #ifdef CONFIG_IPX { "IPX", ipx_proto_init }, /* IPX */ diff -u --recursive --new-file v2.1.7/linux/net/socket.c linux/net/socket.c --- v2.1.7/linux/net/socket.c Tue Oct 29 19:58:51 1996 +++ linux/net/socket.c Sun Nov 3 11:04:43 1996 @@ -127,18 +127,19 @@ * divide and look after the messy bits. */ -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, about 80 for AX.25 */ +#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - + 16 for IP, 16 for IPX, + 24 for IPv6, + about 80 for AX.25 */ int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr) { - int err; if(ulen<0||ulen>MAX_SOCK_ADDR) return -EINVAL; if(ulen==0) return 0; - if((err=verify_area(VERIFY_READ,uaddr,ulen))<0) - return err; - copy_from_user(kaddr,uaddr,ulen); + if(copy_from_user(kaddr,uaddr,ulen)) + return -EFAULT; return 0; } @@ -146,22 +147,19 @@ { int err; int len; - - if((err=verify_area(VERIFY_WRITE,ulen,sizeof(*ulen)))<0) + if((err=get_user(len, ulen))) return err; - get_user(len,ulen); if(len>klen) len=klen; if(len<0 || len> MAX_SOCK_ADDR) return -EINVAL; if(len) { - if((err=verify_area(VERIFY_WRITE,uaddr,len))<0) - return err; - copy_to_user(uaddr,kaddr,len); + if(copy_to_user(uaddr,kaddr,len)) + return -EFAULT; } - put_user(len,ulen); + put_user(len, ulen); return 0; } @@ -1118,6 +1116,7 @@ char address[MAX_SOCK_ADDR]; struct iovec iov[UIO_MAXIOV]; struct msghdr msg_sys; + void * krn_msg_ctl = NULL; int err; int total_len; @@ -1145,8 +1144,26 @@ if (err < 0) return err; total_len=err; + + if (msg_sys.msg_control) + { + krn_msg_ctl = kmalloc(msg_sys.msg_controllen, GFP_KERNEL); + err = copy_from_user(krn_msg_ctl, msg_sys.msg_control, + msg_sys.msg_controllen); + if (err) + return -EFAULT; + msg_sys.msg_control = krn_msg_ctl; + } + + err = sock->ops->sendmsg(sock, &msg_sys, total_len, + (file->f_flags&O_NONBLOCK), flags); + + if (msg_sys.msg_control) + { + kfree(krn_msg_ctl); + } - return sock->ops->sendmsg(sock, &msg_sys, total_len, (file->f_flags&O_NONBLOCK), flags); + return err; } /* @@ -1159,6 +1176,8 @@ struct file *file; struct iovec iov[UIO_MAXIOV]; struct msghdr msg_sys; + void *usr_msg_ctl = NULL; + void *krn_msg_ctl = NULL; int err; int total_len; int len; @@ -1179,7 +1198,9 @@ err=verify_area(VERIFY_READ, msg,sizeof(struct msghdr)); if(err) return err; + copy_from_user(&msg_sys,msg,sizeof(struct msghdr)); + if(msg_sys.msg_iovlen>UIO_MAXIOV) return -EINVAL; @@ -1194,6 +1215,19 @@ return err; total_len=err; + + + + if (msg_sys.msg_control) + { + usr_msg_ctl = msg_sys.msg_control; + krn_msg_ctl = kmalloc(msg_sys.msg_controllen, GFP_KERNEL); + err = copy_from_user(krn_msg_ctl, usr_msg_ctl, + msg_sys.msg_controllen); + if (err) + return -EFAULT; + msg_sys.msg_control = krn_msg_ctl; + } if(sock->ops->recvmsg==NULL) return -EOPNOTSUPP; @@ -1206,6 +1240,13 @@ if (err) return err; } + + if (msg_sys.msg_control) + { + copy_to_user(usr_msg_ctl, krn_msg_ctl, msg_sys.msg_controllen); + kfree(krn_msg_ctl); + } + return len; } diff -u --recursive --new-file v2.1.7/linux/net/sysctl_net.c linux/net/sysctl_net.c --- v2.1.7/linux/net/sysctl_net.c Mon Jun 3 12:42:42 1996 +++ linux/net/sysctl_net.c Sun Nov 3 11:04:43 1996 @@ -46,6 +46,10 @@ extern ctl_table bridge_table[]; #endif +#ifdef CONFIG_IPV6 +extern ctl_table ipv6_table[]; +#endif + ctl_table net_table[] = { {NET_CORE, "core", NULL, 0, 0555, core_table}, {NET_UNIX, "unix", NULL, 0, 0555, unix_table}, @@ -70,6 +74,9 @@ #endif #ifdef CONFIG_BRIDGE {NET_BRIDGE, "bridge", NULL, 0, 0555, bridge_table}, +#endif +#ifdef CONFIG_IPV6 + {NET_IPV6, "ipv6", NULL, 0, 0555, ipv6_table}, #endif {0} }; diff -u --recursive --new-file v2.1.7/linux/net/unix/af_unix.c linux/net/unix/af_unix.c --- v2.1.7/linux/net/unix/af_unix.c Tue Oct 29 19:58:51 1996 +++ linux/net/unix/af_unix.c Sat Nov 9 10:52:22 1996 @@ -676,31 +676,6 @@ } /* - * Support routines for struct cmsghdr handling - */ - -static struct cmsghdr *unix_copyrights(void *userp, int len) -{ - struct cmsghdr *cm; - - if(len>256|| len <=0) - return NULL; - cm=kmalloc(len, GFP_KERNEL); - copy_from_user(cm, userp, len); - return cm; -} - -/* - * Return a header block - */ - -static void unix_returnrights(void *userp, int len, struct cmsghdr *cm) -{ - copy_to_user(userp, cm, len); - kfree(cm); -} - -/* * Copy file descriptors into system space. * Return number copied or negative error code */ @@ -724,9 +699,6 @@ int fd; fd = fdp[i]; -#if 0 - printk("testing fd %d\n", fd); -#endif if (fd < 0 || fd >= NR_OPEN) return -EBADF; if (current->files->fd[fd]==NULL) @@ -891,18 +863,18 @@ */ if(msg->msg_control) { - struct cmsghdr *cm=unix_copyrights(msg->msg_control, - msg->msg_controllen); + struct cmsghdr *cm = msg->msg_control; + if(cm==NULL || msg->msg_controllencmsg_type!=SCM_RIGHTS || cm->cmsg_level!=SOL_SOCKET || msg->msg_controllen!=cm->cmsg_len) { - kfree(cm); return -EINVAL; } - fpnum=unix_fd_copy(sk,cm,fp); - kfree(cm); + + fpnum = unix_fd_copy(sk, cm, fp); + if(fpnum<0) { return fpnum; } @@ -1064,8 +1036,8 @@ if(msg->msg_control) { - cm=unix_copyrights(msg->msg_control, - msg->msg_controllen); + cm=msg->msg_control; + if(msg->msg_controllensignal & ~current->blocked) return -ERESTARTSYS; - unix_data_wait(sk); down(&sk->protinfo.af_unix.readsem); continue; } @@ -1149,8 +1120,7 @@ } out: up(&sk->protinfo.af_unix.readsem); - if(cm) - unix_returnrights(msg->msg_control,msg->msg_controllen,cm); + return copied; } @@ -1305,7 +1275,7 @@ void unix_proto_init(struct net_proto *pro) { - printk(KERN_INFO "NET3: Unix domain sockets 0.12 for Linux NET3.035.\n"); + printk(KERN_INFO "NET3: Unix domain sockets 0.13 for Linux NET3.035.\n"); sock_register(unix_proto_ops.family, &unix_proto_ops); #ifdef CONFIG_PROC_FS proc_net_register(&proc_net_unix); diff -u --recursive --new-file v2.1.7/linux/scripts/MAKEDEV.ide linux/scripts/MAKEDEV.ide --- v2.1.7/linux/scripts/MAKEDEV.ide Sun May 12 21:21:04 1996 +++ linux/scripts/MAKEDEV.ide Wed Nov 6 14:49:33 1996 @@ -6,18 +6,18 @@ # makedev () { rm -f /dev/$1 - echo mknod /dev/$1 b $2 $3 - mknod /dev/$1 b $2 $3 + echo mknod /dev/$1 $2 $3 $4 + mknod /dev/$1 $2 $3 $4 chown root:disk /dev/$1 chmod 660 /dev/$1 } makedevs () { rm -f /dev/$1* - makedev $1 $2 $3 + makedev $1 b $2 $3 for part in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 do - makedev $1$part $2 `expr $3 + $part` + makedev $1$part b $2 `expr $3 + $part` done } @@ -30,18 +30,8 @@ makedevs hdg 34 0 makedevs hdh 34 64 -# Create the ide-tape rewinding character device. - -rm -f /dev/ht0 -echo mknod /dev/ht0 c 37 0 - mknod /dev/ht0 c 37 0 -chown root:disk /dev/ht0 -chmod 660 /dev/ht0 - -# Create the ide-tape non rewinding character device. - -rm -f /dev/nht0 -echo mknod /dev/nht0 c 37 128 - mknod /dev/nht0 c 37 128 -chown root:disk /dev/nht0 -chmod 660 /dev/nht0 +for tape in 0 1 2 3 4 5 6 7 +do + makedev ht$tape c 37 $tape + makedev nht$tape c 37 `expr $tape + 128` +done