diff -u --recursive --new-file v2.1.67/linux/CREDITS linux/CREDITS --- v2.1.67/linux/CREDITS Sat Nov 29 11:25:08 1997 +++ linux/CREDITS Sun Nov 30 13:10:55 1997 @@ -43,6 +43,15 @@ S: San Jose, California 95129 S: USA +N: Andrea Arcangeli +E: arcangeli@mbox.queen.it +W: http://www-linux.deis.unibo.it/~mirror/ +P: 1024/CB4660B9 CC A0 71 81 F4 A0 63 AC C0 4B 81 1D 8C 15 C8 E5 +D: parport sharing fix. Various other kernel hacks. +S: Via Ciaclini 26 +S: Imola 40026 +S: Italy + N: Derek Atkins E: warlord@MIT.EDU D: Linux-AFS Port, random kernel hacker, diff -u --recursive --new-file v2.1.67/linux/Documentation/Changes linux/Documentation/Changes --- v2.1.67/linux/Documentation/Changes Tue Sep 23 16:48:46 1997 +++ linux/Documentation/Changes Sun Nov 30 12:23:16 1997 @@ -190,6 +190,19 @@ /dev/lp0 with the new Plug-and-Play driver. If printing breaks with the new driver, try checking your lpd configuration. +pppd +==== +This kernel version needs a minor bugfix to pppd. See +Documentation/networking/ppp.txt for more information. + +Syncookies +========== +When you build your kernel with Syncookie support (CONFIG_SYN_COOKIES) +the syncookie code still defaults to off (unlike the 2.0.30+ behaviour). +You have to explicitely enable it by add a line like +echo 1 >/proc/sys/net/ipv4/tcp_syncookies +to one of your startup scripts (e.g. /etc/rc.d/rc.local on a redhat system) + Bash ==== diff -u --recursive --new-file v2.1.67/linux/Documentation/Configure.help linux/Documentation/Configure.help --- v2.1.67/linux/Documentation/Configure.help Sat Nov 29 11:25:08 1997 +++ linux/Documentation/Configure.help Sun Nov 30 13:48:41 1997 @@ -304,13 +304,12 @@ things will operate 100% reliably. If unsure, say Y. Intel 82371 PIIX (Triton I/II), VIA VP-1 DMA support -CONFIG_BLK_DEV_TRITON - If your PCI system uses an IDE harddrive (as opposed to SCSI, say) - and includes the Intel Triton I/II IDE interface chipset (i82371FB, - i82371SB or i82371AB), or the VIA VP-1 IDE interface chipset - (VT82C586), you will want to enable this option to allow use of +CONFIG_BLK_DEV_IDEDMA + If your PCI system uses IDE drive(s) (as opposed to SCSI, say) + and is capable of bus-master DMA operation (most Pentium PCI + systems), you will want to enable this option to allow use of bus-mastering DMA data transfers. Read the comments at the - beginning of drivers/block/triton.c and Documentation/ide.txt. + beginning of drivers/block/idedma.c and Documentation/ide.txt. You can get the latest version of the hdparm utility via ftp (user: anonymous) from sunsite.unc.edu/pub/Linux/kernel/patches/diskdrives/; it is @@ -343,18 +342,19 @@ See the Documentation/ide.txt and ht6560b.c files for more info. PROMISE DC4030 support (EXPERIMENTAL) -CONFIG_BLK_DEV_PROMISE +CONFIG_BLK_DEV_PDC4030 This driver provides support for the secondary IDE interface and cache of Promise IDE chipsets, e.g. DC4030 and DC5030. This driver is known to incur timeouts/retries during heavy I/O to drives attached to the secondary interface. CDROM and TAPE devices are not supported yet. This driver is enabled at runtime using the "ide0=dc4030" kernel boot parameter. See the Documentation/ide.txt - and drivers/block/promise.c files for more info. + and drivers/block/pdc4030.c files for more info. OPTi 82C621 support (EXPERIMENTAL) CONFIG_BLK_DEV_OPTI621 - This is a driver for the OPTi 82C621 EIDE controller. + This driver allows use of hdparm to change the PIO timings + for drives attached to an OPTi MIDE controller. Please read the comments at the top of drivers/block/opti621.c. QDI QD6580 support @@ -2781,18 +2781,51 @@ running kernel whenever you want), say M here and read Documentation/modules.txt. The module will be called scc.o. -BAYCOM ser12 and par96 driver for AX.25 -CONFIG_BAYCOM - This is an experimental driver for Baycom style simple amateur radio - modems that connect to either a serial interface or a parallel - interface. The driver supports the ser12 and par96 designs. To - configure the driver, use the sethdlc utility available in the - standard ax25 utilities package. For information on the modems, see +BAYCOM picpar and par96 driver for AX.25 +CONFIG_BAYCOM_PAR + This is a driver for Baycom style simple amateur radio + modems that connect to a parallel interface. The driver + supports the picpar and par96 designs. To configure the + driver, use the sethdlc utility available in the standard + ax25 utilities package. For information on the modems, see http://www.baycom.de and Documentation/networking/baycom.txt. If you want to compile this driver as a module ( = code which can be inserted in and removed from the running kernel whenever you want), say M here and read Documentation/modules.txt. This is - recommended. The module will be called baycom.o. + recommended. The module will be called baycom_par.o. + +BAYCOM ser12 full duplex driver for AX.25 +CONFIG_BAYCOM_SER_FDX + This is one of two drivers for Baycom style simple amateur radio + modems that connect to a serial interface. The driver supports + the ser12 design in full duplex mode. In addition, it allows the + baudrate to be set between 300 and 4800 baud (however not all modems + support all baudrates). This is the preferred driver. baycom_ser_hdx.o + is the old driver and still provided in case this driver does not work + with your serial interface chip. To configure the driver, use the + sethdlc utility available in the standard ax25 utilities package. + For information on the modems, see http://www.baycom.de and + Documentation/networking/baycom.txt. If you want to compile + this driver as a module ( = code which can be inserted in + and removed from the running kernel whenever you want), + say M here and read Documentation/modules.txt. This is + recommended. The module will be called baycom_ser_fdx.o. + +BAYCOM ser12 half duplex driver for AX.25 +CONFIG_BAYCOM_SER_HDX + This is one of two drivers for Baycom style simple amateur radio + modems that connect to a serial interface. The driver supports + the ser12 design in full duplex mode. This is the old driver. + It is still provided in case your serial interface chip does + not work with the full duplex driver. This driver is depreciated. + To configure the driver, use the sethdlc utility available + in the standard ax25 utilities package. For information + on the modems, see http://www.baycom.de and + Documentation/networking/baycom.txt. If you want to compile + this driver as a module ( = code which can be inserted in + and removed from the running kernel whenever you want), + say M here and read Documentation/modules.txt. This is + recommended. The module will be called baycom_ser_hdx.o. Soundcard modem driver for AX.25 CONFIG_SOUNDMODEM @@ -5917,9 +5950,9 @@ flush the disks, reboot the system immediately or dump some status information). This is accomplished by pressing various keys while holding SysRq (Alt+PrintScreen). As you are expected to be a kernel - hacker to use this, the simple rule about learning what do the keys + hacker to use this, the simple rule about learning what the keys mean is "Use the source, Luke!" -- read drivers/char/sysrq.c. - Don't say Y unless you really know what does this hack do. + Don't say Y unless you really know what this hack does. ISDN subsystem CONFIG_ISDN diff -u --recursive --new-file v2.1.67/linux/Documentation/ide.txt linux/Documentation/ide.txt --- v2.1.67/linux/Documentation/ide.txt Wed Apr 16 14:14:59 1997 +++ linux/Documentation/ide.txt Sun Nov 30 13:48:41 1997 @@ -1,4 +1,4 @@ -ide.txt -- Information regarding the Enhanced IDE drive in Linux 2.1.xx +ide.txt -- Information regarding the Enhanced IDE drive in Linux 2.1.68+ =============================================================================== Supported by: Mark Lord -- disks, interfaces, probing @@ -56,17 +56,19 @@ (courtesy of Juha Laiho ). - auto-detect of disk translations by examining partition table - ide-cd.c now compiles separate from ide.c - - Bus-Master DMA support for Intel PCI Triton chipset IDE interfaces - - for details, see comments at top of triton.c - ide-cd.c now supports door locking and auto-loading. - Also preliminary support for multisession and direct reads of audio data. - experimental support for Promise DC4030VL caching interface card - email thanks/problems to: peterd@pnd-pc.demon.co.uk - the hdparm-3.1 package can be used to set PIO modes for some chipsets. -NEW! - support for the OPTi 82C621 chipset, courtesy of Jaromir Koutek. +NEW! - support for setting PIO modes with the OPTi 82C621, courtesy of Jaromir Koutek. NEW! - support for loadable modules NEW! - optional SCSI host adapter emulation for ATAPI devices +NEW! - generic PCI Bus-Master DMA support +NEW! - works with most Pentium PCI systems, chipsets, add-on cards +NEW! - works with regular DMA as well as Ultra DMA +NEW! - automatically probes for all PCI IDE interfaces For work in progress, see the comments in ide.c, ide-cd.c, triton.c, ... diff -u --recursive --new-file v2.1.67/linux/Documentation/networking/ppp.txt linux/Documentation/networking/ppp.txt --- v2.1.67/linux/Documentation/networking/ppp.txt Tue Mar 5 00:01:26 1996 +++ linux/Documentation/networking/ppp.txt Sun Nov 30 12:23:16 1997 @@ -1,3 +1,33 @@ +*NEWSFLASH* +This kernel release needs a minor bug fix for pppd to run properly with +the new routing code. When your pppd doesn't work apply the following +patch to pppd-2.2.0f or install updated RPMs. + +Updated RPMs for libc5 machines (build on RedHat 4.0): +ftp://ftp.firstfloor.org/pub/ak/ppp-2.2.0f-4.src.rpm +ftp://ftp.firstfloor.org/pub/ak/ppp-2.2.0f-4.i386.rpm + +Patch: + +--- ppp-2.2.0f/pppd/sys-linux.c-o Wed Sep 17 00:23:01 1997 ++++ ppp-2.2.0f/pppd/sys-linux.c Wed Sep 17 00:23:11 1997 +@@ -927,8 +927,11 @@ + + if (ioctl(sockfd, SIOCADDRT, &rt) < 0) + { ++/* The new linux routing code doesn't like routes on down devices. */ ++#if 0 + syslog (LOG_ERR, "ioctl(SIOCADDRT) device route: %m"); + return (0); ++#endif + } + return 1; + } + + +-Andi Kleen +-------------------------------------------------------------------- + The PPP support for this kernel requires the 2.2.0 version of the pppd daemon. You will find the current version of the daemon on sunsite.unc.edu in the /pub/Linux/system/Network/serial directory. diff -u --recursive --new-file v2.1.67/linux/Documentation/networking/z8530drv.txt linux/Documentation/networking/z8530drv.txt --- v2.1.67/linux/Documentation/networking/z8530drv.txt Tue Oct 29 05:33:37 1996 +++ linux/Documentation/networking/z8530drv.txt Sat Nov 29 16:29:37 1997 @@ -4,14 +4,18 @@ Internet: ========= -1. db0bm.automation.fh-aachen.de/incoming/dl1bke/z8530drv-utils-3.0.tar.gz +1. ftp://db0bm.automation.fh-aachen.de/incoming/z8530drv/z8530drv-utils-3.0.tar.gz -2. ftp.ucsd.edu:/hamradio/packet/tcpip/incoming/z8530drv-utils-3.0.tar.gz - If you can't find it there, try .../tcpip/linux/z8530drv-utils-3.0.tar.gz +2. ftp://ftp.pspt.fi/pub/ham/linux/ax25/z8530drv-utils-3.0.tar.gz -and various mirrors (i.e. nic.switch.ch) +3. ftp://ftp.ucsd.edu/hamradio/packet/tcpip/incoming/z8530drv-utils-3.0.tar.gz + If you can't find it there, try .../tcpip/linux/z8530drv-utils-3.0.tar.gz Please note that the information in this document may be hopelessly outdated. +A new version of the documentation, along with links to other important +Linux Kernel AX.25 documentation and programs, is available on +http://www.rat.de/jr + ----------------------------------------------------------------------------- @@ -19,7 +23,7 @@ ******************************************************************** - (c) 1993,1996 by Joerg Reuter DL1BKE + (c) 1993,1997 by Joerg Reuter DL1BKE portions (c) 1993 Guido ten Dolle PE1NNZ @@ -134,7 +138,7 @@ to a higher value. -Example for the BayCom USCC: +Example for the BAYCOM USCC: ---------------------------- chip 1 @@ -228,7 +232,7 @@ gencfg 2 0x300 2 4 5 -4 0 7 4915200 0x10 -does the same for the BayCom USCC card. I my opinion it is much easier +does the same for the BAYCOM USCC card. I my opinion it is much easier to edit scc_config.h... @@ -332,7 +336,7 @@ and start your NOS and attach /dev/ptys0 there. The problem is that NOS is reachable only via digipeating through the kernel AX.25 -(disasterous on a DAMA controlled channel). To solve this problem, +(disastrous on a DAMA controlled channel). To solve this problem, configure "rxecho" to echo the incoming frames from "9k6" to "axlink" and outgoing frames from "axlink" to "9k6" and start: @@ -605,8 +609,9 @@ A very common problem is that the PTT locks until the maxkeyup timer expires, although interrupts and clock source are correct. In most -cases #define SCC_DELAY solves the problems. For more hints read -the (pseudo) FAQ and the documentation coming with z8530drv-utils. +cases compiling the driver with CONFIG_SCC_DELAY (set with +make config) solves the problems. For more hints read the (pseudo) FAQ +and the documentation coming with z8530drv-utils. I got reports that the driver has problems on some 386-based systems. (i.e. Amstrad) Those systems have a bogus AT bus timing which will @@ -624,7 +629,7 @@ - a high load of the machine --- running X, Xmorph, XV and Povray, while compiling the kernel... hmm ... even with 32 MB RAM ... ;-) - Or running a named for the whole .ampr.org. domain on an 8 MB + Or running a named for the whole .ampr.org domain on an 8 MB box... - using information from rxecho or kissbridge. @@ -651,4 +656,5 @@ Joerg Reuter ampr-net: dl1bke@db0pra.ampr.org AX-25 : DL1BKE @ DB0ACH.#NRW.DEU.EU - Internet: jreuter@lykos.oche.de + Internet: jreuter@poboxes.com + WWW : http://www.rat.de/jr/ diff -u --recursive --new-file v2.1.67/linux/Makefile linux/Makefile --- v2.1.67/linux/Makefile Sat Nov 29 11:25:09 1997 +++ linux/Makefile Sat Nov 29 13:01:24 1997 @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 1 -SUBLEVEL = 67 +SUBLEVEL = 68 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/) diff -u --recursive --new-file v2.1.67/linux/arch/alpha/kernel/alpha_ksyms.c linux/arch/alpha/kernel/alpha_ksyms.c --- v2.1.67/linux/arch/alpha/kernel/alpha_ksyms.c Wed Apr 16 14:14:59 1997 +++ linux/arch/alpha/kernel/alpha_ksyms.c Sun Nov 30 10:59:02 1997 @@ -94,7 +94,7 @@ * The following are specially called from the uaccess assembly stubs. */ EXPORT_SYMBOL_NOVERS(__copy_user); -EXPORT_SYMBOL_NOVERS(__clear_user); +EXPORT_SYMBOL_NOVERS(__do_clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strlen_user); diff -u --recursive --new-file v2.1.67/linux/arch/alpha/kernel/entry.S linux/arch/alpha/kernel/entry.S --- v2.1.67/linux/arch/alpha/kernel/entry.S Sat Oct 25 02:44:14 1997 +++ linux/arch/alpha/kernel/entry.S Sun Nov 30 10:59:02 1997 @@ -10,7 +10,7 @@ #define rti .long PAL_rti #define SIGCHLD 20 -#define NR_SYSCALLS 352 +#define NR_SYSCALLS 360 #define osf_vfork sys_fork /* @@ -32,13 +32,12 @@ #define TASK_STATE 0 #define TASK_COUNTER 8 #define TASK_PRIORITY 16 -#define TASK_SIGNAL 24 -#define TASK_BLOCKED 32 -#define TASK_FLAGS 40 +#define TASK_FLAGS 24 /* * task flags (must match include/linux/sched.h): */ +#define PF_SIGPENDING 0x00000008 #define PF_PTRACED 0x00000010 /* @@ -509,7 +508,7 @@ lda $4,NR_SYSCALLS($31) stq $16,SP_OFF+24($30) lda $5,sys_call_table - lda $27,do_entSys + lda $27,alpha_ni_syscall cmpult $0,$4,$4 ldq $3,TASK_FLAGS($8) stq $17,SP_OFF+32($30) @@ -519,7 +518,7 @@ bne $3,strace beq $4,1f ldq $27,0($5) -1: jsr $26,($27),do_entSys +1: jsr $26,($27),alpha_ni_syscall ldgp $29,0($26) blt $0,syscall_error /* the call failed */ stq $0,0($30) @@ -544,11 +543,10 @@ lda $4,init_task_union bne $2,reschedule xor $4,$8,$4 + ldq $5,TASK_FLAGS($8) beq $4,restore_all - ldq $4,TASK_SIGNAL($8) - ldq $16,TASK_BLOCKED($8) - bic $4,$16,$4 - bne $4,signal_return + and $5,PF_SIGPENDING,$5 + bne $5,signal_return restore_all: RESTORE_ALL rti @@ -574,12 +572,12 @@ /* get the system call pointer.. */ lda $1,NR_SYSCALLS($31) lda $2,sys_call_table - lda $27,do_entSys + lda $27,alpha_ni_syscall cmpult $0,$1,$1 s8addq $0,$2,$2 beq $1,1f ldq $27,0($2) -1: jsr $26,($27),do_entSys +1: jsr $26,($27),alpha_ni_syscall ldgp $29,0($26) /* check return.. */ @@ -656,6 +654,7 @@ bis $30,$30,$17 br $1,do_switch_stack bis $30,$30,$18 + bis $31,$31,$16 jsr $26,do_signal lda $30,SWITCH_STACK_SIZE($30) br $31,restore_all @@ -686,6 +685,17 @@ .end sys_sigreturn .align 3 +.ent sys_rt_sigreturn +sys_rt_sigreturn: + bis $30,$30,$17 + lda $30,-SWITCH_STACK_SIZE($30) + bis $30,$30,$18 + jsr $26,do_rt_sigreturn + br $1,undo_switch_stack + br $31,ret_from_sys_call +.end sys_rt_sigreturn + +.align 3 .ent sys_sigsuspend sys_sigsuspend: bis $30,$30,$17 @@ -696,80 +706,383 @@ br $31,ret_from_sys_call .end sys_sigsuspend +.align 3 +.ent sys_rt_sigsuspend +sys_rt_sigsuspend: + bis $30,$30,$18 + br $1,do_switch_stack + bis $30,$30,$19 + jsr $26,do_rt_sigsuspend + lda $30,SWITCH_STACK_SIZE($30) + br $31,ret_from_sys_call +.end sys_rt_sigsuspend + + .data .align 3 .globl sys_call_table sys_call_table: -/*0*/ .quad do_entSys, sys_exit, sys_fork, sys_read, sys_write - .quad do_entSys, sys_close, sys_wait4, do_entSys, sys_link - .quad sys_unlink, do_entSys, sys_chdir, sys_fchdir, sys_mknod - .quad sys_chmod, sys_chown, osf_brk, do_entSys, sys_lseek - .quad sys_getxpid, osf_mount, osf_umount, sys_setuid, sys_getxuid - .quad do_entSys, sys_ptrace, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, sys_access, do_entSys - .quad do_entSys, sys_sync, sys_kill, do_entSys, sys_setpgid - .quad do_entSys, sys_dup, sys_pipe, osf_set_program_attributes, do_entSys - .quad sys_open, do_entSys, sys_getxgid, osf_sigprocmask, do_entSys -/*50*/ .quad do_entSys, sys_acct, sys_sigpending, do_entSys, sys_ioctl - .quad do_entSys, do_entSys, sys_symlink, sys_readlink, sys_execve - .quad sys_umask, sys_chroot, do_entSys, sys_getpgrp, sys_getpagesize - .quad do_entSys, osf_vfork, sys_newstat, sys_newlstat, do_entSys - .quad do_entSys, osf_mmap, do_entSys, sys_munmap, sys_mprotect - .quad sys_madvise, sys_vhangup, do_entSys, do_entSys, sys_getgroups + .quad alpha_ni_syscall /* 0 */ + .quad sys_exit + .quad sys_fork + .quad sys_read + .quad sys_write + .quad alpha_ni_syscall /* 5 */ + .quad sys_close + .quad sys_wait4 + .quad alpha_ni_syscall + .quad sys_link + .quad sys_unlink /* 10 */ + .quad alpha_ni_syscall + .quad sys_chdir + .quad sys_fchdir + .quad sys_mknod + .quad sys_chmod /* 15 */ + .quad sys_chown + .quad osf_brk + .quad alpha_ni_syscall + .quad sys_lseek + .quad sys_getxpid /* 20 */ + .quad osf_mount + .quad osf_umount + .quad sys_setuid + .quad sys_getxuid + .quad alpha_ni_syscall /* 25 */ + .quad sys_ptrace + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 30 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_access + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 35 */ + .quad sys_sync + .quad sys_kill + .quad alpha_ni_syscall + .quad sys_setpgid + .quad alpha_ni_syscall /* 40 */ + .quad sys_dup + .quad sys_pipe + .quad osf_set_program_attributes + .quad alpha_ni_syscall + .quad sys_open /* 45 */ + .quad alpha_ni_syscall + .quad sys_getxgid + .quad osf_sigprocmask + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 50 */ + .quad sys_acct + .quad osf_sigpending + .quad alpha_ni_syscall + .quad sys_ioctl + .quad alpha_ni_syscall /* 55 */ + .quad alpha_ni_syscall + .quad sys_symlink + .quad sys_readlink + .quad sys_execve + .quad sys_umask /* 60 */ + .quad sys_chroot + .quad alpha_ni_syscall + .quad sys_getpgrp + .quad sys_getpagesize + .quad alpha_ni_syscall /* 65 */ + .quad osf_vfork + .quad sys_newstat + .quad sys_newlstat + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 70 */ + .quad osf_mmap + .quad alpha_ni_syscall + .quad sys_munmap + .quad sys_mprotect + .quad sys_madvise /* 75 */ + .quad sys_vhangup + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_getgroups /* map BSD's setpgrp to sys_setpgid for binary compatibility: */ - .quad sys_setgroups, do_entSys, sys_setpgid, sys_setitimer, do_entSys - .quad do_entSys, sys_getitimer, sys_gethostname, sys_sethostname, sys_getdtablesize - .quad sys_dup2, sys_newfstat, sys_fcntl, sys_select, sys_poll - .quad sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept -/*100*/ .quad osf_getpriority, sys_send, sys_recv, sys_sigreturn, sys_bind - .quad sys_setsockopt, sys_listen, do_entSys, do_entSys, do_entSys - .quad do_entSys, sys_sigsuspend, do_entSys, sys_recvmsg, sys_sendmsg - .quad do_entSys, sys_gettimeofday, sys_getrusage, sys_getsockopt, do_entSys - .quad sys_readv, sys_writev, sys_settimeofday, sys_fchown, sys_fchmod - .quad sys_recvfrom, sys_setreuid, sys_setregid, sys_rename, sys_truncate - .quad sys_ftruncate, sys_flock, sys_setgid, sys_sendto, sys_shutdown - .quad sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, do_entSys - .quad do_entSys, sys_getpeername, do_entSys, do_entSys, sys_getrlimit - .quad sys_setrlimit, do_entSys, sys_setsid, sys_quotactl, do_entSys -/*150*/ .quad sys_getsockname, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, sys_sigaction, do_entSys, do_entSys, osf_getdirentries - .quad osf_statfs, osf_fstatfs, do_entSys, do_entSys, do_entSys - .quad osf_getdomainname, sys_setdomainname, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, osf_swapon -/*200*/ .quad sys_msgctl, sys_msgget, sys_msgrcv, sys_msgsnd, sys_semctl - .quad sys_semget, sys_semop, osf_utsname, do_entSys, osf_shmat - .quad sys_shmctl, sys_shmdt, sys_shmget, do_entSys, do_entSys - .quad do_entSys, do_entSys, sys_msync, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, sys_getpgid, sys_getsid - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, osf_sysinfo, do_entSys, do_entSys, osf_proplist_syscall - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys -/*250*/ .quad do_entSys, osf_usleep_thread, do_entSys, do_entSys, sys_sysfs - .quad do_entSys, osf_getsysinfo, osf_setsysinfo, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys - .quad do_entSys, do_entSys, do_entSys, do_entSys, do_entSys + .quad sys_setgroups /* 80 */ + .quad alpha_ni_syscall + .quad sys_setpgid + .quad sys_setitimer + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 85 */ + .quad sys_getitimer + .quad sys_gethostname + .quad sys_sethostname + .quad sys_getdtablesize + .quad sys_dup2 /* 90 */ + .quad sys_newfstat + .quad sys_fcntl + .quad sys_select + .quad sys_poll + .quad sys_fsync /* 95 */ + .quad sys_setpriority + .quad sys_socket + .quad sys_connect + .quad sys_accept + .quad osf_getpriority /* 100 */ + .quad sys_send + .quad sys_recv + .quad sys_sigreturn + .quad sys_bind + .quad sys_setsockopt /* 105 */ + .quad sys_listen + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 110 */ + .quad sys_sigsuspend + .quad alpha_ni_syscall + .quad sys_recvmsg + .quad sys_sendmsg + .quad alpha_ni_syscall /* 115 */ + .quad sys_gettimeofday + .quad sys_getrusage + .quad sys_getsockopt + .quad alpha_ni_syscall + .quad sys_readv /* 120 */ + .quad sys_writev + .quad sys_settimeofday + .quad sys_fchown + .quad sys_fchmod + .quad sys_recvfrom /* 125 */ + .quad sys_setreuid + .quad sys_setregid + .quad sys_rename + .quad sys_truncate + .quad sys_ftruncate /* 130 */ + .quad sys_flock + .quad sys_setgid + .quad sys_sendto + .quad sys_shutdown + .quad sys_socketpair /* 135 */ + .quad sys_mkdir + .quad sys_rmdir + .quad sys_utimes + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 140 */ + .quad sys_getpeername + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_getrlimit + .quad sys_setrlimit /* 145 */ + .quad alpha_ni_syscall + .quad sys_setsid + .quad sys_quotactl + .quad alpha_ni_syscall + .quad sys_getsockname /* 150 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 155 */ + .quad osf_sigaction + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad osf_getdirentries + .quad osf_statfs /* 160 */ + .quad osf_fstatfs + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad osf_getdomainname /* 165 */ + .quad sys_setdomainname + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 170 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 175 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 180 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 185 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 190 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 195 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad osf_swapon + .quad sys_msgctl /* 200 */ + .quad sys_msgget + .quad sys_msgrcv + .quad sys_msgsnd + .quad sys_semctl + .quad sys_semget /* 205 */ + .quad sys_semop + .quad osf_utsname + .quad alpha_ni_syscall + .quad osf_shmat + .quad sys_shmctl /* 210 */ + .quad sys_shmdt + .quad sys_shmget + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 215 */ + .quad alpha_ni_syscall + .quad sys_msync + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 220 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 225 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 230 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_getpgid + .quad sys_getsid + .quad alpha_ni_syscall /* 235 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 240 */ + .quad osf_sysinfo + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad osf_proplist_syscall + .quad alpha_ni_syscall /* 245 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 250 */ + .quad osf_usleep_thread + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad sys_sysfs + .quad alpha_ni_syscall /* 255 */ + .quad osf_getsysinfo + .quad osf_setsysinfo + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 260 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 265 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 270 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 275 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 280 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 285 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 290 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 295 */ + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* linux-specific system calls start at 300 */ -/*300*/ .quad sys_bdflush, sys_sethae, sys_mount, sys_adjtimex, sys_swapoff - .quad sys_getdents, alpha_create_module, sys_init_module, sys_delete_module, sys_get_kernel_syms - .quad sys_syslog, sys_reboot, sys_clone, sys_uselib, sys_mlock - .quad sys_munlock, sys_mlockall, sys_munlockall, sys_sysinfo, sys_sysctl - .quad sys_idle, sys_umount, sys_swapon, sys_times, sys_personality - .quad sys_setfsuid, sys_setfsgid, sys_ustat, sys_statfs, sys_fstatfs - .quad sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler, sys_sched_yield - .quad sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, do_entSys /* sys_afs_syscall */, sys_newuname - .quad sys_nanosleep, sys_mremap, sys_nfsservctl, sys_setresuid, sys_getresuid - .quad sys_pciconfig_read, sys_pciconfig_write, sys_query_module - .quad sys_prctl, sys_pread, sys_pwrite - .quad do_entSys, do_entSys + .quad sys_bdflush /* 300 */ + .quad sys_sethae + .quad sys_mount + .quad sys_adjtimex + .quad sys_swapoff + .quad sys_getdents /* 305 */ + .quad alpha_create_module + .quad sys_init_module + .quad sys_delete_module + .quad sys_get_kernel_syms + .quad sys_syslog /* 310 */ + .quad sys_reboot + .quad sys_clone + .quad sys_uselib + .quad sys_mlock + .quad sys_munlock /* 315 */ + .quad sys_mlockall + .quad sys_munlockall + .quad sys_sysinfo + .quad sys_sysctl + .quad sys_idle /* 320 */ + .quad sys_umount + .quad sys_swapon + .quad sys_times + .quad sys_personality + .quad sys_setfsuid /* 325 */ + .quad sys_setfsgid + .quad sys_ustat + .quad sys_statfs + .quad sys_fstatfs + .quad sys_sched_setparam /* 330 */ + .quad sys_sched_getparam + .quad sys_sched_setscheduler + .quad sys_sched_getscheduler + .quad sys_sched_yield + .quad sys_sched_get_priority_max /* 335 */ + .quad sys_sched_get_priority_min + .quad sys_sched_rr_get_interval + .quad alpha_ni_syscall /* sys_afs_syscall */ + .quad sys_newuname + .quad sys_nanosleep /* 340 */ + .quad sys_mremap + .quad sys_nfsservctl + .quad sys_setresuid + .quad sys_getresuid + .quad sys_pciconfig_read /* 345 */ + .quad sys_pciconfig_write + .quad sys_query_module + .quad sys_prctl + .quad sys_pread + .quad sys_pwrite /* 350 */ + .quad sys_rt_sigreturn + .quad sys_rt_sigaction + .quad sys_rt_sigprocmask + .quad sys_rt_sigpending + .quad sys_rt_sigtimedwait /* 355 */ + .quad sys_rt_sigqueueinfo + .quad sys_rt_sigsuspend + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 360 */ diff -u --recursive --new-file v2.1.67/linux/arch/alpha/kernel/ptrace.c linux/arch/alpha/kernel/ptrace.c --- v2.1.67/linux/arch/alpha/kernel/ptrace.c Mon Aug 4 16:25:35 1997 +++ linux/arch/alpha/kernel/ptrace.c Sun Nov 30 10:59:02 1997 @@ -574,7 +574,7 @@ (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; - if ((unsigned long) data > NSIG) + if ((unsigned long) data > _NSIG) goto out; if (request == PTRACE_SYSCALL) child->flags |= PF_TRACESYS; @@ -606,7 +606,7 @@ case PTRACE_SINGLESTEP: { /* execute single instruction. */ ret = -EIO; - if ((unsigned long) data > NSIG) + if ((unsigned long) data > _NSIG) goto out; child->debugreg[4] = -1; /* mark single-stepping */ child->flags &= ~PF_TRACESYS; @@ -619,7 +619,7 @@ case PTRACE_DETACH: { /* detach a process that was attached. */ ret = -EIO; - if ((unsigned long) data > NSIG) + if ((unsigned long) data > _NSIG) goto out; child->flags &= ~(PF_PTRACED|PF_TRACESYS); wake_up_process(child); @@ -627,7 +627,7 @@ REMOVE_LINKS(child); child->p_pptr = child->p_opptr; SET_LINKS(child); - /* make sure single-step breakpoint is gone. */ + /* make sure single-step breakpoint is gone. */ ptrace_cancel_bpt(child); ret = 0; goto out; @@ -644,22 +644,20 @@ asmlinkage void syscall_trace(void) { - lock_kernel(); if ((current->flags & (PF_PTRACED|PF_TRACESYS)) != (PF_PTRACED|PF_TRACESYS)) - goto out; + return; current->exit_code = SIGTRAP; current->state = TASK_STOPPED; notify_parent(current, SIGCHLD); schedule(); /* - * this isn't the same as continuing with a signal, but it will do + * This isn't the same as continuing with a signal, but it will do * for normal use. strace only continues with a signal if the * stopping signal is not SIGTRAP. -brl */ - if (current->exit_code) - current->signal |= (1 << (current->exit_code - 1)); - current->exit_code = 0; -out: - unlock_kernel(); + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } } diff -u --recursive --new-file v2.1.67/linux/arch/alpha/kernel/signal.c linux/arch/alpha/kernel/signal.c --- v2.1.67/linux/arch/alpha/kernel/signal.c Mon Aug 4 16:25:35 1997 +++ linux/arch/alpha/kernel/signal.c Sun Nov 30 10:59:02 1997 @@ -2,6 +2,8 @@ * linux/arch/alpha/kernel/signal.c * * Copyright (C) 1995 Linus Torvalds + * + * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson */ #include @@ -14,21 +16,26 @@ #include #include #include +#include +#include #include #include +#include + +#define DEBUG_SIG 0 -#define _S(nr) (1<<((nr)-1)) -#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) asmlinkage int sys_wait4(int, int *, int, struct rusage *); asmlinkage void ret_from_sys_call(void); -asmlinkage int do_signal(unsigned long, struct pt_regs *, struct switch_stack *, - unsigned long, unsigned long); +asmlinkage int do_signal(sigset_t *, struct pt_regs *, + struct switch_stack *, unsigned long, unsigned long); extern int ptrace_set_bpt (struct task_struct *child); extern int ptrace_cancel_bpt (struct task_struct *child); + /* * The OSF/1 sigprocmask calling sequence is different from the * C sigprocmask() sequence.. @@ -44,51 +51,152 @@ * Note that we don't need to acquire the kernel lock for SMP * operation, as all of this is local to this thread. */ -asmlinkage unsigned long osf_sigprocmask(int how, unsigned long newmask, - long a2, long a3, long a4, long a5, struct pt_regs regs) +asmlinkage unsigned long +osf_sigprocmask(int how, unsigned long newmask, long a2, long a3, + long a4, long a5, struct pt_regs regs) { - unsigned long ok, oldmask; - struct task_struct * tsk; + unsigned long oldmask = -EINVAL; - ok = how-1; /* 0 .. 2 */ - tsk = current; - ok = ok <= 2; - oldmask = -EINVAL; - if (ok) { - long sign; /* -1 .. 1 */ + if ((unsigned long)how-1 <= 2) { + long sign = how-2; /* -1 .. 1 */ unsigned long block, unblock; - oldmask = tsk->blocked; newmask &= _BLOCKABLE; - sign = how-2; + spin_lock_irq(¤t->sigmask_lock); + oldmask = current->blocked.sig[0]; + unblock = oldmask & ~newmask; block = oldmask | newmask; if (!sign) block = unblock; - regs.r0 = 0; /* special no error return */ if (sign <= 0) newmask = block; - tsk->blocked = newmask; + if (_NSIG_WORDS > 1 && sign > 0) + sigemptyset(¤t->blocked); + current->blocked.sig[0] = newmask; + spin_unlock_irq(¤t->sigmask_lock); + + (®s)->r0 = 0; /* special no error return */ } return oldmask; } +asmlinkage int +osf_sigaction(int sig, const struct osf_sigaction *act, + struct osf_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags)) + return -EFAULT; + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + new_ka.ka_restorer = NULL; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags)) + return -EFAULT; + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +asmlinkage int +sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact, + void *restorer, size_t sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (act) { + new_ka.ka_restorer = restorer; + if (copy_from_user(&new_ka.sa, act, sizeof(*act))) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (copy_to_user(oact, &old_ka.sa, sizeof(*oact))) + return -EFAULT; + } + + return ret; +} + +asmlinkage int +osf_sigpending(old_sigset_t *set) +{ + sigset_t pending; + + spin_lock_irq(¤t->sigmask_lock); + sigandsets(&pending, ¤t->blocked, ¤t->signal); + spin_unlock_irq(¤t->sigmask_lock); + + return copy_to_user(set, &pending, sizeof(*set)); +} + /* - * atomically swap in the new signal mask, and wait for a signal. + * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage int do_sigsuspend(unsigned long mask, struct pt_regs * regs, struct switch_stack * sw) +asmlinkage int +do_sigsuspend(old_sigset_t mask, struct pt_regs *reg, struct switch_stack *sw) +{ + sigset_t oldset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sigmask_lock); + oldset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&oldset, reg, sw, 0, 0)) + return -EINTR; + } +} + +asmlinkage int +do_rt_sigsuspend(sigset_t *uset, size_t sigsetsize, + struct pt_regs *reg, struct switch_stack *sw) { - unsigned long oldmask; + sigset_t oldset, set; + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(&set, uset, sizeof(set))) + return -EFAULT; + + sigdelsetmask(&set, ~_BLOCKABLE); spin_lock_irq(¤t->sigmask_lock); - oldmask = current->blocked; - current->blocked = mask & _BLOCKABLE; + oldset = current->blocked; + current->blocked = set; + recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); - if (do_signal(oldmask, regs, sw, 0, 0)) + if (do_signal(&oldset, reg, sw, 0, 0)) return -EINTR; } } @@ -96,26 +204,35 @@ /* * Do a signal return; undo the signal stack. */ -asmlinkage void do_sigreturn(struct sigcontext * sc, - struct pt_regs * regs, struct switch_stack * sw) + +struct sigframe { - unsigned long mask, ps, usp; - int i; + struct sigcontext sc; + unsigned long extramask[_NSIG_WORDS-1]; + unsigned int retcode[3]; +}; - /* verify that it's a good sigcontext before using it */ - if (verify_area(VERIFY_READ, sc, sizeof(*sc))) - goto give_sigsegv; - if (__get_user(ps, &sc->sc_ps) || ps != 8) - goto give_sigsegv; - if (__get_user(mask, &sc->sc_mask) || (mask & ~_BLOCKABLE)) - goto give_sigsegv; +struct rt_sigframe +{ + struct siginfo info; + struct ucontext uc; + unsigned int retcode[3]; +}; + +#define INSN_MOV_R30_R16 0x47fe0410 +#define INSN_LDI_R0 0x201f0000 +#define INSN_CALLSYS 0x00000083 + + +static void +restore_sigcontext(struct sigcontext *sc, struct pt_regs *regs, + struct switch_stack *sw) +{ + unsigned long usp; + int i; - /* ok, looks fine, start restoring */ - __get_user(usp, sc->sc_regs+30); - wrusp(usp); __get_user(regs->pc, &sc->sc_pc); sw->r26 = (unsigned long) ret_from_sys_call; - current->blocked = mask; __get_user(regs->r0, sc->sc_regs+0); __get_user(regs->r1, sc->sc_regs+1); @@ -147,47 +264,98 @@ __get_user(regs->r27, sc->sc_regs+27); __get_user(regs->r28, sc->sc_regs+28); __get_user(regs->gp, sc->sc_regs+29); + __get_user(usp, sc->sc_regs+30); + wrusp(usp); + for (i = 0; i < 31; i++) __get_user(sw->fp[i], sc->sc_fpregs+i); + __get_user(sw->fp[31], &sc->sc_fpcr); +} - /* send SIGTRAP if we're single-stepping: */ - lock_kernel(); +asmlinkage void +do_sigreturn(struct sigframe *frame, struct pt_regs *regs, + struct switch_stack *sw) +{ + unsigned long ps; + sigset_t set; + + /* Verify that it's a good sigcontext before using it */ + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto give_sigsegv; + if (__get_user(ps, &frame->sc.sc_ps) || ps != 8) + goto give_sigsegv; + if (__get_user(set.sig[0], &frame->sc.sc_mask) + || (_NSIG_WORDS > 1 + && __copy_from_user(&set.sig[1], &frame->extramask, + sizeof(frame->extramask)))) + goto give_sigsegv; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + restore_sigcontext(&frame->sc, regs, sw); + + /* Send SIGTRAP if we're single-stepping: */ if (ptrace_cancel_bpt (current)) send_sig(SIGTRAP, current, 1); - unlock_kernel(); return; give_sigsegv: lock_kernel(); do_exit(SIGSEGV); - unlock_kernel(); } -/* - * Set up a signal frame... - */ -static void setup_frame(struct sigaction * sa, - struct pt_regs * regs, - struct switch_stack * sw, int signr, - unsigned long oldmask) +asmlinkage void +do_rt_sigreturn(struct rt_sigframe *frame, struct pt_regs *regs, + struct switch_stack *sw) { - int i; - unsigned long oldsp; - struct sigcontext * sc; + unsigned long ps; + sigset_t set; - oldsp = rdusp(); - sc = ((struct sigcontext *) oldsp) - 1; + /* Verify that it's a good sigcontext before using it */ + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto give_sigsegv; + if (__get_user(ps, &frame->uc.uc_mcontext.sc_ps) || ps != 8) + goto give_sigsegv; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto give_sigsegv; - /* check here if we would need to switch stacks.. */ - if (verify_area(VERIFY_WRITE, sc, sizeof(*sc))) - do_exit(SIGSEGV); + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); - wrusp((unsigned long) sc); + restore_sigcontext(&frame->uc.uc_mcontext, regs, sw); - __put_user(oldmask, &sc->sc_mask); - __put_user(8, &sc->sc_ps); + /* Send SIGTRAP if we're single-stepping: */ + if (ptrace_cancel_bpt (current)) + send_sig(SIGTRAP, current, 1); + return; + +give_sigsegv: + lock_kernel(); + do_exit(SIGSEGV); +} + + +/* + * Set up a signal frame. + */ + +static void +setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, + struct switch_stack *sw, unsigned long mask, unsigned long sp) +{ + long i; + + __put_user(0, &sc->sc_onstack); + __put_user(mask, &sc->sc_mask); __put_user(regs->pc, &sc->sc_pc); - __put_user(oldsp, sc->sc_regs+30); + __put_user(8, &sc->sc_ps); __put_user(regs->r0 , sc->sc_regs+0); __put_user(regs->r1 , sc->sc_regs+1); @@ -219,63 +387,167 @@ __put_user(regs->r27, sc->sc_regs+27); __put_user(regs->r28, sc->sc_regs+28); __put_user(regs->gp , sc->sc_regs+29); + __put_user(sp, sc->sc_regs+30); + __put_user(0, sc->sc_regs+31); + for (i = 0; i < 31; i++) __put_user(sw->fp[i], sc->sc_fpregs+i); + __put_user(0, sc->sc_fpregs+31); + __put_user(sw->fp[31], &sc->sc_fpcr); + __put_user(regs->trap_a0, &sc->sc_traparg_a0); __put_user(regs->trap_a1, &sc->sc_traparg_a1); __put_user(regs->trap_a2, &sc->sc_traparg_a2); +} + +static void +setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs *regs, struct switch_stack * sw) +{ + unsigned long oldsp; + struct sigframe *frame; + + oldsp = rdusp(); + frame = (struct sigframe *)((oldsp - sizeof(*frame)) & -32); + + /* XXX: Check here if we would need to switch stacks.. */ + if (verify_area(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + setup_sigcontext(&frame->sc, regs, sw, set->sig[0], oldsp); + if (_NSIG_WORDS > 1) { + __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->ka_restorer) { + regs->r26 = (unsigned long) ka->ka_restorer; + } else { + __put_user(INSN_MOV_R30_R16, frame->retcode+0); + __put_user(INSN_LDI_R0+__NR_sigreturn, frame->retcode+1); + __put_user(INSN_CALLSYS, frame->retcode+2); + imb(); + regs->r26 = (unsigned long) frame->retcode; + } + + /* "Return" to the handler */ + regs->r27 = regs->pc = (unsigned long) ka->sa.sa_handler; + regs->r16 = sig; /* a0: signal number */ + regs->r17 = 0; /* a1: exception code */ + regs->r18 = (unsigned long) &frame->sc; /* a2: sigcontext pointer */ + wrusp((unsigned long) frame); + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->pc, regs->r26); +#endif + + return; + +give_sigsegv: + lock_kernel(); + do_exit(SIGSEGV); +} + +static void +setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs, struct switch_stack * sw) +{ + unsigned long oldsp; + struct rt_sigframe *frame; + + oldsp = rdusp(); + frame = (struct rt_sigframe *)((oldsp - sizeof(*frame)) & -32); + + /* XXX: Check here if we would need to switch stacks.. */ + if (verify_area(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; - /* - * The following is: - * - * bis $30,$30,$16 - * addq $31,0x67,$0 - * call_pal callsys - * - * ie, "sigreturn(stack-pointer)" - */ - __put_user(0x43ecf40047de0410, sc->sc_retcode+0); - __put_user(0x0000000000000083, sc->sc_retcode+1); - imb(); - - /* "return" to the handler */ - regs->r27 = regs->pc = (unsigned long) sa->sa_handler; - regs->r26 = (unsigned long) sc->sc_retcode; - regs->r16 = signr; /* a0: signal number */ - regs->r17 = 0; /* a1: exception code; see gentrap.h */ - regs->r18 = (unsigned long) sc; /* a2: sigcontext pointer */ + __copy_to_user(&frame->info, info, sizeof(siginfo_t)); + + /* Zero all bits of the ucontext besides the sigcontext. */ + __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext)); + + /* Copy in the bits we actually use. */ + __put_user(set->sig[0], &frame->uc.uc_osf_sigmask); + setup_sigcontext(&frame->uc.uc_mcontext, regs, sw, set->sig[0], oldsp); + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->ka_restorer) { + regs->r26 = (unsigned long) ka->ka_restorer; + } else { + __put_user(INSN_MOV_R30_R16, frame->retcode+0); + __put_user(INSN_LDI_R0+__NR_rt_sigreturn, frame->retcode+1); + __put_user(INSN_CALLSYS, frame->retcode+2); + imb(); + regs->r26 = (unsigned long) frame->retcode; + } + + /* "Return" to the handler */ + regs->r27 = regs->pc = (unsigned long) ka->sa.sa_handler; + regs->r16 = sig; /* a0: signal number */ + regs->r17 = (unsigned long) &frame->info; /* a1: siginfo pointer */ + regs->r18 = (unsigned long) &frame->uc; /* a2: ucontext pointer */ + wrusp((unsigned long) frame); + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->pc, regs->r26); +#endif + + return; + +give_sigsegv: + lock_kernel(); + do_exit(SIGSEGV); } + /* - * OK, we're invoking a handler + * OK, we're invoking a handler. */ -static inline void handle_signal(unsigned long signr, struct sigaction *sa, - unsigned long oldmask, struct pt_regs * regs, struct switch_stack *sw) +static inline void +handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *oldset, struct pt_regs * regs, struct switch_stack *sw) { - setup_frame(sa,regs,sw,signr,oldmask); - - if (sa->sa_flags & SA_ONESHOT) - sa->sa_handler = NULL; - if (!(sa->sa_flags & SA_NOMASK)) - current->blocked |= (sa->sa_mask | _S(signr)) & _BLOCKABLE; + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka, info, oldset, regs, sw); + else + setup_frame(sig, ka, oldset, regs, sw); + + if (ka->sa.sa_flags & SA_RESETHAND) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } } -static inline void syscall_restart(unsigned long r0, unsigned long r19, - struct pt_regs * regs, struct sigaction * sa) +static inline void +syscall_restart(unsigned long r0, unsigned long r19, + struct pt_regs *regs, struct k_sigaction *ka) { switch (regs->r0) { + case ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { case ERESTARTNOHAND: - no_system_call_restart: regs->r0 = EINTR; break; - case ERESTARTSYS: - if (!(sa->sa_flags & SA_RESTART)) - goto no_system_call_restart; + } /* fallthrough */ - case ERESTARTNOINTR: - regs->r0 = r0; /* reset v0 and a3 and replay syscall */ - regs->r19 = r19; - regs->pc -= 4; + case ERESTARTNOINTR: + regs->r0 = r0; /* reset v0 and a3 and replay syscall */ + regs->r19 = r19; + regs->pc -= 4; + break; } } @@ -293,94 +565,118 @@ * restart. "r0" is also used as an indicator whether we can restart at * all (if we get here from anything but a syscall return, it will be 0) */ -asmlinkage int do_signal(unsigned long oldmask, - struct pt_regs * regs, - struct switch_stack * sw, - unsigned long r0, unsigned long r19) -{ - unsigned long mask; - unsigned long signr, single_stepping; - struct sigaction * sa; - int ret; +asmlinkage int +do_signal(sigset_t *oldset, struct pt_regs * regs, struct switch_stack * sw, + unsigned long r0, unsigned long r19) +{ + sigset_t _oldset; + siginfo_t info; + unsigned long signr, single_stepping, core = 0; + struct k_sigaction *ka; - lock_kernel(); - mask = ~current->blocked; single_stepping = ptrace_cancel_bpt(current); - while ((signr = current->signal & mask) != 0) { - signr = ffz(~signr); - clear_bit(signr, ¤t->signal); - sa = current->sig->action + signr; - signr++; + spin_lock_irq(current->sigmask_lock); + if (!oldset) { + _oldset = current->blocked; + oldset = &_oldset; + } + while ((signr = dequeue_signal(¤t->blocked, &info)) != 0) { + spin_unlock_irq(¤t->sigmask_lock); + if ((current->flags & PF_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ current->exit_code = signr; current->state = TASK_STOPPED; notify_parent(current, SIGCHLD); schedule(); single_stepping |= ptrace_cancel_bpt(current); + + /* We're back. Did the debugger cancel the sig? */ if (!(signr = current->exit_code)) - continue; + goto skip_signal; current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ if (signr == SIGSTOP) - continue; - if (_S(signr) & current->blocked) { - current->signal |= _S(signr); - continue; + goto skip_signal; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + goto skip_signal; } - sa = current->sig->action + signr - 1; - } - if (sa->sa_handler == SIG_IGN) { - if (signr != SIGCHLD) - continue; - /* check for SIGCHLD: it's special */ - while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) - /* nothing */; - continue; } - if (sa->sa_handler == SIG_DFL) { + + ka = ¤t->sig->action[signr-1]; + if (ka->sa.sa_handler == SIG_DFL) { + /* Init gets no signals it doesn't want. */ if (current->pid == 1) - continue; + goto skip_signal; + switch (signr) { case SIGCONT: case SIGCHLD: case SIGWINCH: - continue; + goto skip_signal; case SIGTSTP: case SIGTTIN: case SIGTTOU: if (is_orphaned_pgrp(current->pgrp)) - continue; + goto skip_signal; + /* FALLTHRU */ + case SIGSTOP: - if (current->flags & PF_PTRACED) - continue; current->state = TASK_STOPPED; current->exit_code = signr; - if (!(current->p_pptr->sig->action[SIGCHLD-1].sa_flags & - SA_NOCLDSTOP)) + if (!(current->p_pptr->sig->action[SIGCHLD-1] + .sa.sa_flags & SA_NOCLDSTOP)) notify_parent(current, SIGCHLD); schedule(); single_stepping |= ptrace_cancel_bpt(current); - continue; + break; case SIGQUIT: case SIGILL: case SIGTRAP: case SIGABRT: case SIGFPE: case SIGSEGV: - if (current->binfmt && current->binfmt->core_dump) { - if (current->binfmt->core_dump(signr, regs)) - signr |= 0x80; - } - /* fall through */ + lock_kernel(); + if (current->binfmt + && current->binfmt->core_dump + &¤t->binfmt->core_dump(signr, regs)) + core = 0x80; + unlock_kernel(); + /* FALLTHRU */ + default: - current->signal |= _S(signr & 0x7f); + lock_kernel(); + sigaddset(¤t->signal, signr); current->flags |= PF_SIGNALED; - do_exit(signr); + do_exit((signr & 0x7f) | core); } + } else if (ka->sa.sa_handler == SIG_IGN) { + if (signr == SIGCHLD) { + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + } + } else { + /* Whee! Actually deliver the signal. */ + if (r0) syscall_restart(r0, r19, regs, ka); + handle_signal(signr, ka, &info, oldset, regs, sw); + if (single_stepping) + ptrace_set_bpt(current); /* re-set bpt */ + return 1; } - if (r0) - syscall_restart(r0, r19, regs, sa); - handle_signal(signr, sa, oldmask, regs, sw); - if (single_stepping) { - ptrace_set_bpt(current); /* re-set breakpoint */ - } - ret = 1; - goto out; + skip_signal: + spin_lock_irq(¤t->sigmask_lock); } + spin_unlock_irq(¤t->sigmask_lock); + if (r0 && (regs->r0 == ERESTARTNOHAND || regs->r0 == ERESTARTSYS || @@ -389,11 +685,8 @@ regs->r19 = r19; regs->pc -= 4; } - if (single_stepping) { + if (single_stepping) ptrace_set_bpt(current); /* re-set breakpoint */ - } - ret = 0; -out: - unlock_kernel(); - return ret; + + return 0; } diff -u --recursive --new-file v2.1.67/linux/arch/alpha/kernel/time.c linux/arch/alpha/kernel/time.c --- v2.1.67/linux/arch/alpha/kernel/time.c Wed Nov 12 13:34:25 1997 +++ linux/arch/alpha/kernel/time.c Sun Nov 30 10:59:02 1997 @@ -17,17 +17,18 @@ * (round system clock to nearest tick instead of truncating) * fixed algorithm in time_init for getting time from CMOS clock */ +#include #include #include #include #include #include #include +#include #include #include #include -#include #include #include @@ -52,16 +53,18 @@ /* lump static variables together for more efficient access: */ static struct { - __u32 last_time; /* cycle counter last time it got invoked */ - unsigned long scaled_ticks_per_cycle; /* ticks/cycle * 2^48 */ - long last_rtc_update; /* last time the cmos clock got updated */ + /* cycle counter last time it got invoked */ + __u32 last_time; + /* ticks/cycle * 2^48 */ + unsigned long scaled_ticks_per_cycle; + /* last time the cmos clock got updated */ + time_t last_rtc_update; } state; static inline __u32 rpcc(void) { __u32 result; - asm volatile ("rpcc %0" : "r="(result)); return result; } @@ -73,37 +76,46 @@ */ void timer_interrupt(int irq, void *dev, struct pt_regs * regs) { - __u32 delta, now; - int i, nticks; + const unsigned long half = 1UL << (FIX_SHIFT - 1); + const unsigned long mask = (1UL << (FIX_SHIFT + 1)) - 1; + unsigned long delta; + __u32 now; + long nticks; + /* + * Estimate how many ticks have passed since the last update. + * Round the result, .5 to even. When we loose ticks due to + * say using IDE, the clock has been seen to run up to 15% slow + * if we truncate. + */ now = rpcc(); delta = now - state.last_time; state.last_time = now; - if(hwrpb->cycle_freq) { - nticks = (delta * state.scaled_ticks_per_cycle) >> (FIX_SHIFT-1); - nticks = (nticks+1) >> 1; - } - else nticks=1; /* No way to estimate lost ticks if we don't know - the cycle frequency. */ - for (i = 0; i < nticks; ++i) { + delta = delta * state.scaled_ticks_per_cycle; + if ((delta & mask) != half) + delta += half; + nticks = delta >> FIX_SHIFT; + + do { do_timer(regs); - } + } while (--nticks > 0); /* * If we have an externally synchronized Linux clock, then update * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if (time_state != TIME_BAD && xtime.tv_sec > state.last_rtc_update + 660 && - xtime.tv_usec > 500000 - (tick >> 1) && - xtime.tv_usec < 500000 + (tick >> 1)) - if (set_rtc_mmss(xtime.tv_sec) == 0) - state.last_rtc_update = xtime.tv_sec; - else - state.last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */ + if (time_state != TIME_BAD + && xtime.tv_sec > state.last_rtc_update + 660 + && xtime.tv_usec >= 500000 - (tick >> 1) + && xtime.tv_usec <= 500000 + (tick >> 1)) { + int tmp = set_rtc_mmss(xtime.tv_sec); + state.last_rtc_update = xtime.tv_sec - (tmp ? 600 : 0); + } } -/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. +/* + * Converts Gregorian date to seconds since 1970-01-01 00:00:00. * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. * @@ -140,25 +152,40 @@ unsigned char save_control; #endif void (*irq_handler)(int, void *, struct pt_regs *); - unsigned int year, mon, day, hour, min, sec; + unsigned int year, mon, day, hour, min, sec, cc1, cc2; - /* The Linux interpretation of the CMOS clock register contents: + /* + * The Linux interpretation of the CMOS clock register contents: * When the Update-In-Progress (UIP) flag goes from 1 to 0, the * RTC registers show the second which has precisely just started. * Let's hope other operating systems interpret the RTC the same way. */ - /* read RTC exactly on falling edge of update flag */ - /* Wait for rise.... (may take up to 1 second) */ - - do {} while(!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)); - -/* Jay Estabook : - * Wait for the Update Done Interrupt bit (0x10) in reg C (12) to be set, - * which (hopefully) indicates that the update is really done. - */ - - do {} while(!CMOS_READ(RTC_REG_C) & RTC_UIP); - + do { } while (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)); + do { } while (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); + + /* Read cycle counter exactly on falling edge of update flag */ + cc1 = rpcc(); + + /* If our cycle frequency isn't valid, go another round and give + a guess at what it should be. */ + if (hwrpb->cycle_freq == 0) { + printk("HWPRB cycle frequency bogus. Estimating... "); + + do { } while (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)); + do { } while (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); + cc2 = rpcc(); + hwrpb->cycle_freq = cc2 - cc1; + cc1 = cc2; + + printk("%lu Hz\n", hwrpb->cycle_freq); + } + + /* From John Bowman : allow the values + to settle, as the Update-In-Progress bit going low isn't good + enough on some hardware. 2ms is our guess; we havn't found + bogomips yet, but this is close on a 500Mhz box. */ + __delay(1000000); + sec = CMOS_READ(RTC_SECONDS); min = CMOS_READ(RTC_MINUTES); hour = CMOS_READ(RTC_HOURS); @@ -167,14 +194,14 @@ year = CMOS_READ(RTC_YEAR); if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - { - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); - } + { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } #ifdef ALPHA_PRE_V1_2_SRM_CONSOLE /* * The meaning of life, the universe, and everything. Plus @@ -192,9 +219,10 @@ extern void __you_loose (void); __you_loose(); } - state.last_time = rpcc(); - if(hwrpb->cycle_freq) - state.scaled_ticks_per_cycle = ((unsigned long) HZ << FIX_SHIFT) / hwrpb->cycle_freq; + + state.last_time = cc1; + state.scaled_ticks_per_cycle + = ((unsigned long) HZ << FIX_SHIFT) / hwrpb->cycle_freq; state.last_rtc_update = 0; #ifdef CONFIG_RTC @@ -210,22 +238,52 @@ /* setup timer */ irq_handler = timer_interrupt; if (request_irq(TIMER_IRQ, irq_handler, 0, "timer", NULL)) - panic("Could not allocate timer IRQ!"); + panic("Could not allocate timer IRQ!"); } /* - * We could get better timer accuracy by using the alpha - * time counters or something. Now this is limited to - * the HZ clock frequency. + * Use the cycle counter to estimate an displacement from the last time + * tick. Unfortunately the Alpha designers made only the low 32-bits of + * the cycle counter active, so we overflow on 8.2 seconds on a 500MHz + * part. So we can't do the "find absolute time in terms of cycles" thing + * that the other ports do. */ void do_gettimeofday(struct timeval *tv) { - unsigned long flags; + unsigned long flags, now, delta_cycles, delta_usec; + unsigned long sec, usec; - save_flags(flags); - cli(); - *tv = xtime; + now = rpcc(); + save_and_cli(flags); + sec = xtime.tv_sec; + usec = xtime.tv_usec; + delta_cycles = now - state.last_time; restore_flags(flags); + + /* + * usec = cycles * ticks_per_cycle * 2**48 * 1e6 / (2**48 * ticks) + * = cycles * (s_t_p_c) * 1e6 / (2**48 * ticks) + * = cycles * (s_t_p_c) * 15625 / (2**42 * ticks) + * + * which, given a 600MHz cycle and a 1024Hz tick, has a + * dynamic range of about 1.7e17, which is less than the + * 1.8e19 in an unsigned long, so we are safe from overflow. + * + * Round, but with .5 up always, since .5 to even is harder + * with no clear gain. + */ + + delta_usec = delta_cycles * state.scaled_ticks_per_cycle * 15625; + delta_usec = ((delta_usec / ((1UL << (FIX_SHIFT-6)) * HZ)) + 1) / 2; + + usec += delta_usec; + if (usec >= 1000000) { + sec += 1; + usec -= 1000000; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; } void do_settimeofday(struct timeval *tv) @@ -252,10 +310,12 @@ int real_seconds, real_minutes, cmos_minutes; unsigned char save_control, save_freq_select; - save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */ + /* Tell the clock it's being set */ + save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */ + /* Stop and reset prescaler */ + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); cmos_minutes = CMOS_READ(RTC_MINUTES); @@ -270,8 +330,10 @@ */ real_seconds = nowtime % 60; real_minutes = nowtime / 60; - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) - real_minutes += 30; /* correct for half hour time zone */ + if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) { + /* correct for half hour time zone */ + real_minutes += 30; + } real_minutes %= 60; if (abs(real_minutes - cmos_minutes) < 30) { diff -u --recursive --new-file v2.1.67/linux/arch/alpha/kernel/traps.c linux/arch/alpha/kernel/traps.c --- v2.1.67/linux/arch/alpha/kernel/traps.c Wed Sep 3 20:52:41 1997 +++ linux/arch/alpha/kernel/traps.c Sun Nov 30 10:59:02 1997 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,45 +21,41 @@ #include -void die_if_kernel(char * str, struct pt_regs * regs, long err, - unsigned long *r9_15) +static void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15) { - long i; - unsigned long ra; - unsigned int * pc; - unsigned long * sp; - - if (regs->ps & 8) - return; - printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err); - sp = (unsigned long *) (regs+1); - __get_user(ra, (unsigned long *)sp); - printk("pc = [<%016lx>] ps = %04lx\n", regs->pc, regs->ps); - printk("rp = [<%016lx>] ra = [<%016lx>]\n", regs->r26, ra); - printk("r0 = %016lx r1 = %016lx\n", regs->r0, regs->r1); - printk("r2 = %016lx r3 = %016lx\n", regs->r2, regs->r3); - printk("r4 = %016lx r5 = %016lx\n", regs->r4, regs->r5); - printk("r6 = %016lx r7 = %016lx\n", regs->r6, regs->r7); + printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx\n", + regs->pc, regs->r26, regs->ps); + printk("r0 = %016lx r1 = %016lx r2 = %016lx\n", + regs->r0, regs->r1, regs->r2); + printk("r3 = %016lx r4 = %016lx r5 = %016lx\n", + regs->r3, regs->r4, regs->r5); + printk("r6 = %016lx r7 = %016lx r8 = %016lx\n", + regs->r6, regs->r7, regs->r8); if (r9_15) { - printk("r8 = %016lx r9 = %016lx\n", regs->r8, r9_15[9]); - printk("r10= %016lx r11= %016lx\n", r9_15[10], r9_15[11]); - printk("r12= %016lx r13= %016lx\n", r9_15[12], r9_15[13]); - printk("r14= %016lx r15= %016lx\n", r9_15[14], r9_15[15]); - } else { - printk("r8 = %016lx\n", regs->r8); - } + printk("r9 = %016lx r10= %016lx r11= %016lx\n", + r9_15[9], r9_15[10], r9_15[11]); + printk("r12= %016lx r13= %016lx r14= %016lx\n", + r9_15[12], r9_15[13], r9_15[14]); + printk("r15= %016lx\n", r9_15[15]); + } + + printk("r16= %016lx r17= %016lx r18= %016lx\n", + regs->r16, regs->r17, regs->r18); + printk("r19= %016lx r20= %016lx r21= %016lx\n", + regs->r19, regs->r20, regs->r21); + printk("r22= %016lx r23= %016lx r24= %016lx\n", + regs->r22, regs->r23, regs->r24); + printk("r25= %016lx r27= %016lx r28= %016lx\n", + regs->r25, regs->r27, regs->r28); + printk("gp = %016lx sp = %p\n", regs->gp, regs+1); +} - printk("r16= %016lx r17= %016lx\n", regs->r16, regs->r17); - printk("r18= %016lx r19= %016lx\n", regs->r18, regs->r19); - printk("r20= %016lx r21= %016lx\n", regs->r20, regs->r21); - printk("r22= %016lx r23= %016lx\n", regs->r22, regs->r23); - printk("r24= %016lx r25= %016lx\n", regs->r24, regs->r25); - printk("r27= %016lx r28= %016lx\n", regs->r27, regs->r28); - printk("gp = %016lx sp = %p\n", regs->gp, sp); +static void dik_show_code(unsigned int *pc) +{ + long i; printk("Code:"); - pc = (unsigned int *) regs->pc; for (i = -3; i < 6; i++) { unsigned int insn; if (__get_user(insn, pc+i)) @@ -66,6 +63,11 @@ printk("%c%08x%c",i?' ':'<',insn,i?' ':'>'); } printk("\n"); +} + +static void dik_show_trace(unsigned long *sp) +{ + long i = 0; printk("Trace:"); while (0x1ff8 & (unsigned long) sp) { extern unsigned long _stext, _etext; @@ -76,9 +78,30 @@ if (tmp >= (unsigned long) &_etext) continue; printk(" [<%lx>]", tmp); + if (++i > 40) { + printk(" ..."); + break; + } } printk("\n"); - +} + +void die_if_kernel(char * str, struct pt_regs *regs, long err, + unsigned long *r9_15) +{ + if (regs->ps & 8) + return; + printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err); + dik_show_regs(regs, r9_15); + dik_show_code((unsigned int *)regs->pc); + dik_show_trace((unsigned long *)(regs+1)); + + if (current->tss.flags & (1UL << 63)) { + printk("die_if_kernel recursion detected.\n"); + sti(); + while (1); + } + current->tss.flags |= (1UL << 63); do_exit(SIGSEGV); } @@ -397,8 +420,6 @@ printk("Bad unaligned kernel access at %016lx: %p %lx %ld\n", pc, va, opcode, reg); do_exit(SIGSEGV); - unlock_kernel(); - return; got_exception: /* Ok, we caught the exception, but we don't want it. Is there @@ -416,13 +437,48 @@ return; } - /* Yikes! No one to forward the exception to. */ + /* + * Yikes! No one to forward the exception to. + * Since the registers are in a weird format, dump them ourselves. + */ lock_kernel(); - printk("%s: unhandled unaligned exception at pc=%lx ra=%lx" - " (bad address = %p)\n", current->comm, - pc, una_reg(26), va); + + printk("%s(%d): unhandled unaligned exception\n", + current->comm, current->pid); + + printk("pc = [<%016lx>] ra = [<%016lx>] ps = %04lx\n", + pc, una_reg(26), regs.ps); + printk("r0 = %016lx r1 = %016lx r2 = %016lx\n", + una_reg(0), una_reg(1), una_reg(2)); + printk("r3 = %016lx r4 = %016lx r5 = %016lx\n", + una_reg(3), una_reg(4), una_reg(5)); + printk("r6 = %016lx r7 = %016lx r8 = %016lx\n", + una_reg(6), una_reg(7), una_reg(8)); + printk("r9 = %016lx r10= %016lx r11= %016lx\n", + una_reg(9), una_reg(10), una_reg(11)); + printk("r12= %016lx r13= %016lx r14= %016lx\n", + una_reg(12), una_reg(13), una_reg(14)); + printk("r15= %016lx\n", una_reg(15)); + printk("r16= %016lx r17= %016lx r18= %016lx\n", + una_reg(16), una_reg(17), una_reg(18)); + printk("r19= %016lx r20= %016lx r21= %016lx\n", + una_reg(19), una_reg(20), una_reg(21)); + printk("r22= %016lx r23= %016lx r24= %016lx\n", + una_reg(22), una_reg(23), una_reg(24)); + printk("r25= %016lx r27= %016lx r28= %016lx\n", + una_reg(25), una_reg(27), una_reg(28)); + printk("gp = %016lx sp = %p\n", regs.gp, ®s+1); + + dik_show_code((unsigned int *)pc); + dik_show_trace((unsigned long *)(®s+1)); + + if (current->tss.flags & (1UL << 63)) { + printk("die_if_kernel recursion detected.\n"); + sti(); + while (1); + } + current->tss.flags |= (1UL << 63); do_exit(SIGSEGV); - unlock_kernel(); } /* @@ -800,26 +856,17 @@ } /* - * DEC means people to use the "retsys" instruction for return from - * a system call, but they are clearly misguided about this. We use - * "rti" in all cases, and fill in the stack with the return values. - * That should make signal handling etc much cleaner. - * - * Even more horribly, DEC doesn't allow system calls from kernel mode. - * "Security" features letting the user do something the kernel can't - * are a thinko. DEC palcode is strange. The PAL-code designers probably - * got terminally tainted by VMS at some point. + * Unimplemented system calls. */ -asmlinkage long do_entSys(unsigned long a0, unsigned long a1, unsigned long a2, - unsigned long a3, unsigned long a4, unsigned long a5, - struct pt_regs regs) +asmlinkage long alpha_ni_syscall(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + unsigned long a4, unsigned long a5, + struct pt_regs regs) { - lock_kernel(); /* Only report OSF system calls. */ if (regs.r0 != 112 && regs.r0 < 300) printk("", regs.r0, a0, a1, a2); - unlock_kernel(); - return -1; + return -ENOSYS; } extern asmlinkage void entMM(void); diff -u --recursive --new-file v2.1.67/linux/arch/alpha/lib/Makefile linux/arch/alpha/lib/Makefile --- v2.1.67/linux/arch/alpha/lib/Makefile Sun Dec 1 11:27:21 1996 +++ linux/arch/alpha/lib/Makefile Sun Nov 30 10:59:02 1997 @@ -19,9 +19,6 @@ lib.a: $(OBJS) $(AR) rcs lib.a $(OBJS) -memset.o: memset.S - $(CC) -c -o memset.o memset.S - __divqu.o: divide.S $(CC) -DDIV -c -o __divqu.o divide.S diff -u --recursive --new-file v2.1.67/linux/arch/alpha/lib/clear_user.S linux/arch/alpha/lib/clear_user.S --- v2.1.67/linux/arch/alpha/lib/clear_user.S Fri Jan 3 08:48:37 1997 +++ linux/arch/alpha/lib/clear_user.S Sun Nov 30 10:59:02 1997 @@ -37,8 +37,8 @@ .set noreorder .align 4 - .globl __clear_user - .ent __clear_user + .globl __do_clear_user + .ent __do_clear_user .frame $30, 0, $28 .prologue 0 @@ -79,7 +79,7 @@ EX( stq_u $5, 0($6) ) # e0 : ret $31, ($28), 1 # .. e1 : -__clear_user: +__do_clear_user: and $6, 7, $4 # e0 : find dest misalignment beq $0, $zerolength # .. e1 : addq $0, $4, $1 # e0 : bias counter @@ -110,4 +110,4 @@ $exception: ret $31, ($28), 1 # .. e1 : - .end __clear_user + .end __do_clear_user diff -u --recursive --new-file v2.1.67/linux/arch/alpha/lib/csum_partial_copy.c linux/arch/alpha/lib/csum_partial_copy.c --- v2.1.67/linux/arch/alpha/lib/csum_partial_copy.c Mon Apr 14 16:28:05 1997 +++ linux/arch/alpha/lib/csum_partial_copy.c Sun Nov 30 10:59:02 1997 @@ -8,7 +8,7 @@ */ #include -#include +#include #include diff -u --recursive --new-file v2.1.67/linux/arch/i386/defconfig linux/arch/i386/defconfig --- v2.1.67/linux/arch/i386/defconfig Sat Nov 29 11:25:09 1997 +++ linux/arch/i386/defconfig Sun Nov 30 14:05:58 1997 @@ -58,7 +58,7 @@ CONFIG_BLK_DEV_CMD640=y # CONFIG_BLK_DEV_CMD640_ENHANCED is not set CONFIG_BLK_DEV_RZ1000=y -CONFIG_BLK_DEV_TRITON=y +CONFIG_BLK_DEV_IDEDMA=y # CONFIG_IDE_CHIPSETS is not set # @@ -74,22 +74,27 @@ # # Networking options # +# CONFIG_PACKET is not set # CONFIG_NETLINK is not set # CONFIG_FIREWALL is not set # CONFIG_NET_ALIAS is not set +CONFIG_UNIX=y CONFIG_INET=y # CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set # CONFIG_IP_ACCT is not set +# CONFIG_IP_MASQUERADE is not set # CONFIG_IP_ROUTER is not set # CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_ALIAS is not set # CONFIG_SYN_COOKIES is not set # # (it is safe to leave these untouched) # -# CONFIG_INET_PCTCP is not set # CONFIG_INET_RARP is not set -CONFIG_PATH_MTU_DISCOVERY=y CONFIG_IP_NOSR=y CONFIG_SKB_LARGE=y diff -u --recursive --new-file v2.1.67/linux/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S --- v2.1.67/linux/arch/i386/kernel/entry.S Wed Nov 26 16:24:01 1997 +++ linux/arch/i386/kernel/entry.S Sun Nov 30 10:59:02 1997 @@ -73,15 +73,14 @@ state = 0 counter = 4 priority = 8 -signal = 12 -blocked = 16 -flags = 20 -dbgreg6 = 52 -dbgreg7 = 56 -exec_domain = 60 +flags = 12 +dbgreg6 = 44 +dbgreg7 = 48 +exec_domain = 52 ENOSYS = 38 + #define SAVE_ALL \ cld; \ push %es; \ @@ -170,10 +169,7 @@ ret_with_reschedule: cmpl $0,SYMBOL_NAME(need_resched) jne reschedule - movl blocked(%ebx),%eax - movl %eax,%esi # save blocked in %esi for signal handling - notl %eax - andl signal(%ebx),%eax + testb $0x8,flags(%ebx) # PF_SIGPENDING jne signal_return RESTORE_ALL ALIGN @@ -181,7 +177,7 @@ testl $(VM_MASK),EFLAGS(%esp) pushl %esp jne v86_signal_return - pushl %esi + pushl $0 call SYMBOL_NAME(do_signal) addl $8,%esp RESTORE_ALL @@ -190,7 +186,7 @@ call SYMBOL_NAME(save_v86_state) movl %eax,%esp pushl %eax - pushl %esi + pushl $0 call SYMBOL_NAME(do_signal) addl $8,%esp RESTORE_ALL @@ -529,6 +525,13 @@ .long SYMBOL_NAME(sys_setresgid) /* 170 */ .long SYMBOL_NAME(sys_getresgid) .long SYMBOL_NAME(sys_prctl) - .rept NR_syscalls-172 + .long SYMBOL_NAME(sys_rt_sigreturn) + .long SYMBOL_NAME(sys_rt_sigaction) + .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ + .long SYMBOL_NAME(sys_rt_sigpending) + .long SYMBOL_NAME(sys_rt_sigtimedwait) + .long SYMBOL_NAME(sys_rt_sigqueueinfo) + .long SYMBOL_NAME(sys_rt_sigsuspend) + .rept NR_syscalls-179 .long SYMBOL_NAME(sys_ni_syscall) .endr diff -u --recursive --new-file v2.1.67/linux/arch/i386/kernel/ptrace.c linux/arch/i386/kernel/ptrace.c --- v2.1.67/linux/arch/i386/kernel/ptrace.c Mon Aug 4 16:25:35 1997 +++ linux/arch/i386/kernel/ptrace.c Sun Nov 30 10:59:02 1997 @@ -492,7 +492,7 @@ long tmp; ret = -EIO; - if ((unsigned long) data > NSIG) + if ((unsigned long) data > _NSIG) goto out; if (request == PTRACE_SYSCALL) child->flags |= PF_TRACESYS; @@ -530,7 +530,7 @@ long tmp; ret = -EIO; - if ((unsigned long) data > NSIG) + if ((unsigned long) data > _NSIG) goto out; child->flags &= ~PF_TRACESYS; tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; @@ -546,7 +546,7 @@ long tmp; ret = -EIO; - if ((unsigned long) data > NSIG) + if ((unsigned long) data > _NSIG) goto out; child->flags &= ~(PF_PTRACED|PF_TRACESYS); wake_up_process(child); @@ -585,9 +585,7 @@ * stopping signal is not SIGTRAP. -brl */ if (current->exit_code) { - spin_lock_irq(¤t->sigmask_lock); - current->signal |= (1 << (current->exit_code - 1)); - spin_unlock_irq(¤t->sigmask_lock); + send_sig(current->exit_code, current, 1); + current->exit_code = 0; } - current->exit_code = 0; } diff -u --recursive --new-file v2.1.67/linux/arch/i386/kernel/signal.c linux/arch/i386/kernel/signal.c --- v2.1.67/linux/arch/i386/kernel/signal.c Mon Aug 4 16:25:35 1997 +++ linux/arch/i386/kernel/signal.c Sun Nov 30 12:49:13 1997 @@ -2,6 +2,8 @@ * linux/arch/i386/kernel/signal.c * * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson */ #include @@ -16,40 +18,132 @@ #include #include #include +#include #include -#define _S(nr) (1<<((nr)-1)) +#define DEBUG_SIG 0 -#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) asmlinkage int sys_wait4(pid_t pid, unsigned long *stat_addr, int options, unsigned long *ru); - -asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs); +asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs); /* - * atomically swap in the new signal mask, and wait for a signal. + * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, unsigned long set) +asmlinkage int +sys_sigsuspend(int history0, int history1, old_sigset_t mask) { - struct pt_regs * regs = (struct pt_regs *) &restart; - unsigned long mask; + struct pt_regs * regs = (struct pt_regs *) &history0; + sigset_t saveset; + mask &= _BLOCKABLE; spin_lock_irq(¤t->sigmask_lock); - mask = current->blocked; - current->blocked = set & _BLOCKABLE; + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); regs->eax = -EINTR; while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); - if (do_signal(mask, regs)) + if (do_signal(&saveset, regs)) return -EINTR; } } +asmlinkage int +sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +{ + struct pt_regs * regs = (struct pt_regs *) &unewset; + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs)) + return -EINTR; + } +} + +asmlinkage int +sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + + +/* + * Do a signal return; undo the signal stack. + */ + +struct sigframe +{ + char *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + char *pretcode; + int sig; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + + static inline void restore_i387_hard(struct _fpstate *buf) { #ifdef __SMP__ @@ -64,94 +158,150 @@ #endif current->used_math = 1; current->flags &= ~PF_USEDFPU; - copy_from_user(¤t->tss.i387.hard, buf, sizeof(*buf)); + __copy_from_user(¤t->tss.i387.hard, buf, sizeof(*buf)); } -static void restore_i387(struct _fpstate *buf) +static inline void restore_i387(struct _fpstate *buf) { #ifndef CONFIG_MATH_EMULATION restore_i387_hard(buf); #else - if (hard_math) { + if (hard_math) restore_i387_hard(buf); - return; - } - restore_i387_soft(buf); -#endif + else + restore_i387_soft(buf); +#endif } - -/* - * This sets regs->esp even though we don't actually use sigstacks yet.. - */ -asmlinkage int sys_sigreturn(unsigned long __unused) +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) { -#define COPY(x) regs->x = context->x -#define COPY_SEG(seg) \ -{ unsigned int tmp = context->seg; \ -if ( (tmp & 0xfffc) /* not a NULL selectors */ \ - && (tmp & 0x4) != 0x4 /* not a LDT selector */ \ - && (tmp & 3) != 3 /* not a RPL3 GDT selector */ \ - ) goto badframe; \ -regs->x##seg = tmp; } -#define COPY_SEG_STRICT(seg) \ -{ unsigned int tmp = context->seg; \ -if ((tmp & 0xfffc) && (tmp & 3) != 3) goto badframe; \ -regs->x##seg = tmp; } -#define GET_SEG(seg) \ -{ unsigned int tmp = context->seg; \ -if ( (tmp & 0xfffc) /* not a NULL selectors */ \ - && (tmp & 0x4) != 0x4 /* not a LDT selector */ \ - && (tmp & 3) != 3 /* not a RPL3 GDT selector */ \ - ) goto badframe; \ -__asm__("mov %w0,%%" #seg: :"r" (tmp)); } - struct sigcontext * context; - struct pt_regs * regs; - - regs = (struct pt_regs *) &__unused; - context = (struct sigcontext *) regs->esp; - if (verify_area(VERIFY_READ, context, sizeof(*context))) - goto badframe; - current->blocked = context->oldmask & _BLOCKABLE; - COPY_SEG(ds); - COPY_SEG(es); - GET_SEG(fs); + unsigned int tmp; + +#define COPY(x) __get_user(regs->x, &sc->x) + +#define COPY_SEG(seg) \ + { __get_user(tmp, &sc->seg); \ + if ((tmp & 0xfffc) /* not a NULL selectors */ \ + && (tmp & 0x4) != 0x4 /* not a LDT selector */ \ + && (tmp & 3) != 3) /* not a RPL3 GDT selector */ \ + goto badframe; \ + regs->x##seg = tmp; } + +#define COPY_SEG_STRICT(seg) \ + { __get_user(tmp, &sc->seg); \ + if ((tmp & 0xfffc) && (tmp & 3) != 3) goto badframe; \ + regs->x##seg = tmp; } + +#define GET_SEG(seg) \ + { __get_user(tmp, &sc->seg); \ + if ((tmp & 0xfffc) /* not a NULL selectors */ \ + && (tmp & 0x4) != 0x4 /* not a LDT selector */ \ + && (tmp & 3) != 3) /* not a RPL3 GDT selector */ \ + goto badframe; \ + __asm__ __volatile__("mov %w0,%%" #seg : : "r"(tmp)); } + GET_SEG(gs); - COPY_SEG_STRICT(ss); - COPY_SEG_STRICT(cs); - COPY(eip); - COPY(ecx); COPY(edx); + GET_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); + COPY(edi); + COPY(esi); + COPY(ebp); + COPY(esp); COPY(ebx); - COPY(esp); COPY(ebp); - COPY(edi); COPY(esi); - regs->eflags &= ~0x40DD5; - regs->eflags |= context->eflags & 0x40DD5; + COPY(edx); + COPY(ecx); + COPY(eip); + COPY_SEG_STRICT(cs); + COPY_SEG_STRICT(ss); + + __get_user(tmp, &sc->eflags); + regs->eflags = (regs->eflags & ~0x40DD5) | (tmp & 0x40DD5); regs->orig_eax = -1; /* disable syscall checks */ - if (context->fpstate) { - struct _fpstate * buf = context->fpstate; + + __get_user(tmp, (unsigned long *)&sc->fpstate); + if (tmp) { + struct _fpstate * buf = (struct _fpstate *) tmp; if (verify_area(VERIFY_READ, buf, sizeof(*buf))) goto badframe; restore_i387(buf); } - return context->eax; + + __get_user(tmp, &sc->eax); + return tmp; badframe: lock_kernel(); do_exit(SIGSEGV); - unlock_kernel(); } +asmlinkage int sys_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct sigframe *frame = (struct sigframe *)(regs->esp - 8); + sigset_t set; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_NSIG_WORDS > 1 + && __copy_from_user(&set.sig[1], &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + return restore_sigcontext(regs, &frame->sc); + +badframe: + lock_kernel(); + do_exit(SIGSEGV); +} + +asmlinkage int sys_rt_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); + sigset_t set; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + return restore_sigcontext(regs, &frame->uc.uc_mcontext); + +badframe: + lock_kernel(); + do_exit(SIGSEGV); +} + +/* + * Set up a signal frame. + */ + static inline struct _fpstate * save_i387_hard(struct _fpstate * buf) { #ifdef __SMP__ if (current->flags & PF_USEDFPU) { - __asm__ __volatile__("fnsave %0":"=m" (current->tss.i387.hard)); + __asm__ __volatile__("fnsave %0":"=m"(current->tss.i387.hard)); stts(); current->flags &= ~PF_USEDFPU; } #else if (current == last_task_used_math) { - __asm__ __volatile__("fnsave %0":"=m" (current->tss.i387.hard)); + __asm__ __volatile__("fnsave %0":"=m"(current->tss.i387.hard)); last_task_used_math = NULL; __asm__ __volatile__("fwait"); /* not needed on 486+ */ stts(); @@ -163,7 +313,7 @@ return buf; } -static struct _fpstate * save_i387(struct _fpstate * buf) +static struct _fpstate * save_i387(struct _fpstate *buf) { if (!current->used_math) return NULL; @@ -171,85 +321,168 @@ #ifndef CONFIG_MATH_EMULATION return save_i387_hard(buf); #else - if (hard_math) - return save_i387_hard(buf); - return save_i387_soft(buf); + return hard_math ? save_i387_hard(buf) : save_i387_soft(buf); #endif } -/* - * Set up a signal frame... Make the stack look the way iBCS2 expects - * it to look. - */ -static void setup_frame(struct sigaction * sa, - struct pt_regs * regs, int signr, - unsigned long oldmask) -{ - unsigned long * frame; - - frame = (unsigned long *) regs->esp; - if ((regs->xss & 0xffff) != USER_DS && sa->sa_restorer) - frame = (unsigned long *) sa->sa_restorer; - frame -= 64; - if (!access_ok(VERIFY_WRITE,frame,64*4)) - goto segv_and_exit; +static void +setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + unsigned int tmp; -/* set up the "normal" stack seen by the signal handler (iBCS2) */ -#define __CODE ((unsigned long)(frame+24)) -#define CODE(x) ((unsigned long *) ((x)+__CODE)) - - /* XXX Can possible miss a SIGSEGV when frame crosses a page border - and a thread unmaps it while we are accessing it. - So either check all put_user() calls or don't do it at all. - We use __put_user() here because the access_ok() call was already - done earlier. */ - if (__put_user(__CODE,frame)) + tmp = 0; + __asm__("mov %%gs,%w0" : "=r"(tmp): "0"(tmp)); + __put_user(tmp, (unsigned int *)&sc->gs); + __asm__("mov %%fs,%w0" : "=r"(tmp): "0"(tmp)); + __put_user(tmp, (unsigned int *)&sc->fs); + + __put_user(regs->xes, (unsigned int *)&sc->es); + __put_user(regs->xds, (unsigned int *)&sc->ds); + __put_user(regs->edi, &sc->edi); + __put_user(regs->esi, &sc->esi); + __put_user(regs->ebp, &sc->ebp); + __put_user(regs->esp, &sc->esp); + __put_user(regs->ebx, &sc->ebx); + __put_user(regs->edx, &sc->edx); + __put_user(regs->ecx, &sc->ecx); + __put_user(regs->eax, &sc->eax); + __put_user(current->tss.trap_no, &sc->trapno); + __put_user(current->tss.error_code, &sc->err); + __put_user(regs->eip, &sc->eip); + __put_user(regs->xcs, (unsigned int *)&sc->cs); + __put_user(regs->eflags, &sc->eflags); + __put_user(regs->esp, &sc->esp_at_signal); + __put_user(regs->xss, (unsigned int *)&sc->ss); + + __put_user(save_i387(fpstate), &sc->fpstate); + + /* non-iBCS2 extensions.. */ + __put_user(mask, &sc->oldmask); + __put_user(current->tss.cr2, &sc->cr2); +} + +static void setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) +{ + struct sigframe *frame; + + frame = (struct sigframe *)((regs->esp - sizeof(*frame)) & -8); + + /* XXX: Check here if we need to switch stacks.. */ + + /* This is legacy signal stack switching. */ + if ((regs->xss & 0xffff) != USER_DS + && !(ka->sa.sa_flags & SA_RESTORER) && ka->sa.sa_restorer) + frame = (struct sigframe *) ka->sa.sa_restorer; + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto segv_and_exit; - if (current->exec_domain && current->exec_domain->signal_invmap) - __put_user(current->exec_domain->signal_invmap[signr], frame+1); - else - __put_user(signr, frame+1); + + __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + + setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + + if (_NSIG_WORDS > 1) { + __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + __put_user(frame->retcode, &frame->pretcode); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + __put_user(0xb858, (short *)(frame->retcode+0)); + __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); + __put_user(0x80cd, (short *)(frame->retcode+6)); + } + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; { - unsigned int tmp = 0; -#define PUT_SEG(seg, mem) \ -__asm__("mov %%" #seg",%w0":"=r" (tmp):"0" (tmp)); __put_user(tmp,mem); - PUT_SEG(gs, frame+2); - PUT_SEG(fs, frame+3); - } - __put_user(regs->xes, frame+4); - __put_user(regs->xds, frame+5); - __put_user(regs->edi, frame+6); - __put_user(regs->esi, frame+7); - __put_user(regs->ebp, frame+8); - __put_user(regs->esp, frame+9); - __put_user(regs->ebx, frame+10); - __put_user(regs->edx, frame+11); - __put_user(regs->ecx, frame+12); - __put_user(regs->eax, frame+13); - __put_user(current->tss.trap_no, frame+14); - __put_user(current->tss.error_code, frame+15); - __put_user(regs->eip, frame+16); - __put_user(regs->xcs, frame+17); - __put_user(regs->eflags, frame+18); - __put_user(regs->esp, frame+19); - __put_user(regs->xss, frame+20); - __put_user((unsigned long) save_i387((struct _fpstate *)(frame+32)),frame+21); -/* non-iBCS2 extensions.. */ - __put_user(oldmask, frame+22); - __put_user(current->tss.cr2, frame+23); -/* set up the return code... */ - __put_user(0x0000b858, CODE(0)); /* popl %eax ; movl $,%eax */ - __put_user(0x80cd0000, CODE(4)); /* int $0x80 */ - __put_user(__NR_sigreturn, CODE(2)); -#undef __CODE -#undef CODE + unsigned long seg = USER_DS; + __asm__("mov %w0,%%fs ; mov %w0,%%gs": "=r"(seg) : "0"(seg)); + set_fs(seg); + regs->xds = seg; + regs->xes = seg; + regs->xss = seg; + regs->xcs = USER_CS; + } + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +segv_and_exit: + lock_kernel(); + do_exit(SIGSEGV); +} + +static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe *frame; + + frame = (struct rt_sigframe *)((regs->esp - sizeof(*frame)) & -8); + + /* XXX: Check here if we need to switch stacks.. */ + + /* This is legacy signal stack switching. */ + if ((regs->xss & 0xffff) != USER_DS + && !(ka->sa.sa_flags & SA_RESTORER) && ka->sa.sa_restorer) + frame = (struct rt_sigframe *) ka->sa.sa_restorer; + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto segv_and_exit; + + __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + __put_user(&frame->info, &frame->pinfo); + __put_user(&frame->uc, &frame->puc); + __copy_to_user(&frame->info, info, sizeof(*info)); + + /* Clear all the bits of the ucontext we don't use. */ + __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext)); + + setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + __put_user(frame->retcode, &frame->pretcode); + /* This is movl $,%eax ; int $0x80 */ + __put_user(0xb8, (char *)(frame->retcode+0)); + __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); + __put_user(0x80cd, (short *)(frame->retcode+5)); + } /* Set up registers for signal handler */ regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) sa->sa_handler; + regs->eip = (unsigned long) ka->sa.sa_handler; { unsigned long seg = USER_DS; - __asm__("mov %w0,%%fs ; mov %w0,%%gs":"=r" (seg) :"0" (seg)); + __asm__("mov %w0,%%fs ; mov %w0,%%gs": "=r"(seg) : "0"(seg)); set_fs(seg); regs->xds = seg; regs->xes = seg; @@ -257,21 +490,28 @@ regs->xcs = USER_CS; } regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + return; segv_and_exit: lock_kernel(); do_exit(SIGSEGV); - unlock_kernel(); } /* * OK, we're invoking a handler */ -static void handle_signal(unsigned long signr, struct sigaction *sa, - unsigned long oldmask, struct pt_regs * regs) + +static void +handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) { - /* are we from a system call? */ + /* Are we from a system call? */ if (regs->orig_eax >= 0) { /* If so, check system call restarting.. */ switch (regs->eax) { @@ -280,7 +520,7 @@ break; case -ERESTARTSYS: - if (!(sa->sa_flags & SA_RESTART)) { + if (!(ka->sa.sa_flags & SA_RESTART)) { regs->eax = -EINTR; break; } @@ -291,14 +531,20 @@ } } - /* set up the stack frame */ - setup_frame(sa, regs, signr, oldmask); + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka, info, oldset, regs); + else + setup_frame(sig, ka, oldset, regs); - if (sa->sa_flags & SA_ONESHOT) - sa->sa_handler = NULL; - if (!(sa->sa_flags & SA_NOMASK)) { + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sigmask_lock); - current->blocked |= (sa->sa_mask | _S(signr)) & _BLOCKABLE; + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); } } @@ -312,107 +558,115 @@ * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs) +asmlinkage int do_signal(sigset_t *oldset, struct pt_regs *regs) { - unsigned long mask; - unsigned long signr; - struct sigaction * sa; + sigset_t _oldset; + siginfo_t info; + unsigned long signr, core = 0; + struct k_sigaction *ka; /* - * We want the common case to go fast, which + * We want the common case to go fast, which * is why we may in certain cases get here from * kernel mode. Just return without doing anything * if so. */ if ((regs->xcs & 3) != 3) return 1; - mask = ~current->blocked; - while ((signr = current->signal & mask)) { - /* - * This stops gcc flipping out. Otherwise the assembler - * including volatiles for the inline function to get - * current combined with this gets it confused. - */ - struct task_struct *t=current; - __asm__("bsf %3,%1\n\t" -#ifdef __SMP__ - "lock ; " -#endif - "btrl %1,%0" - :"=m" (t->signal),"=r" (signr) - :"0" (t->signal), "1" (signr)); - sa = current->sig->action + signr; - signr++; + + spin_lock_irq(¤t->sigmask_lock); + if (!oldset) { + _oldset = current->blocked; + oldset = &_oldset; + } + while ((signr = dequeue_signal(¤t->blocked, &info)) != 0) { + spin_unlock_irq(¤t->sigmask_lock); + if ((current->flags & PF_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ current->exit_code = signr; current->state = TASK_STOPPED; notify_parent(current, SIGCHLD); schedule(); + + /* We're back. Did the debugger cancel the sig? */ if (!(signr = current->exit_code)) - continue; + goto skip_signal; current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ if (signr == SIGSTOP) - continue; - if (_S(signr) & current->blocked) { - spin_lock_irq(¤t->sigmask_lock); - current->signal |= _S(signr); - spin_unlock_irq(¤t->sigmask_lock); - continue; + goto skip_signal; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + goto skip_signal; } - sa = current->sig->action + signr - 1; - } - if (sa->sa_handler == SIG_IGN) { - if (signr != SIGCHLD) - continue; - /* check for SIGCHLD: it's special */ - while (sys_wait4(-1,NULL,WNOHANG, NULL) > 0) - /* nothing */; - continue; } - if (sa->sa_handler == SIG_DFL) { + + ka = ¤t->sig->action[signr-1]; + if (ka->sa.sa_handler == SIG_DFL) { + /* Init gets no signals it doesn't want. */ if (current->pid == 1) - continue; + goto skip_signal; + switch (signr) { case SIGCONT: case SIGCHLD: case SIGWINCH: - continue; + goto skip_signal; case SIGTSTP: case SIGTTIN: case SIGTTOU: if (is_orphaned_pgrp(current->pgrp)) - continue; + goto skip_signal; + /* FALLTHRU */ + case SIGSTOP: - if (current->flags & PF_PTRACED) - continue; current->state = TASK_STOPPED; current->exit_code = signr; - if (!(current->p_pptr->sig->action[SIGCHLD-1].sa_flags & - SA_NOCLDSTOP)) + if (!(current->p_pptr->sig->action[SIGCHLD-1] + .sa.sa_flags & SA_NOCLDSTOP)) notify_parent(current, SIGCHLD); schedule(); - continue; + break; case SIGQUIT: case SIGILL: case SIGTRAP: case SIGABRT: case SIGFPE: case SIGSEGV: lock_kernel(); - if (current->binfmt && current->binfmt->core_dump) { - if (current->binfmt->core_dump(signr, regs)) - signr |= 0x80; - } + if (current->binfmt + && current->binfmt->core_dump + &¤t->binfmt->core_dump(signr, regs)) + core = 0x80; unlock_kernel(); - /* fall through */ - default: - spin_lock_irq(¤t->sigmask_lock); - current->signal |= _S(signr & 0x7f); - spin_unlock_irq(¤t->sigmask_lock); + /* FALLTHRU */ + default: + lock_kernel(); + sigaddset(¤t->signal, signr); current->flags |= PF_SIGNALED; - - lock_kernel(); /* 8-( */ - do_exit(signr); - unlock_kernel(); + do_exit((signr & 0x7f) | core); + } + } else if (ka->sa.sa_handler == SIG_IGN) { + if (signr == SIGCHLD) { + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; } + } else { + /* Whee! Actually deliver the signal. */ + handle_signal(signr, ka, &info, oldset, regs); + return 1; } - handle_signal(signr, sa, oldmask, regs); - return 1; + skip_signal: + spin_lock_irq(¤t->sigmask_lock); } /* Did we come from a system call? */ diff -u --recursive --new-file v2.1.67/linux/arch/i386/kernel/vm86.c linux/arch/i386/kernel/vm86.c --- v2.1.67/linux/arch/i386/kernel/vm86.c Tue May 13 22:41:01 1997 +++ linux/arch/i386/kernel/vm86.c Sun Nov 30 10:59:02 1997 @@ -438,8 +438,13 @@ } if (trapno !=1) return 1; /* we let this handle by the calling routine */ - if (current->flags & PF_PTRACED) - current->blocked &= ~(1 << (SIGTRAP-1)); + if (current->flags & PF_PTRACED) { + unsigned long flags; + spin_lock_irqsave(¤t->sigmask_lock, flags); + sigdelset(¤t->blocked, SIGTRAP); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + } send_sig(SIGTRAP, current, 1); current->tss.trap_no = trapno; current->tss.error_code = error_code; diff -u --recursive --new-file v2.1.67/linux/arch/i386/lib/delay.c linux/arch/i386/lib/delay.c --- v2.1.67/linux/arch/i386/lib/delay.c Sat Nov 29 11:25:09 1997 +++ linux/arch/i386/lib/delay.c Sun Nov 30 13:48:47 1997 @@ -9,7 +9,7 @@ */ #include -#include +#include #ifdef __SMP__ #include diff -u --recursive --new-file v2.1.67/linux/drivers/block/Config.in linux/drivers/block/Config.in --- v2.1.67/linux/drivers/block/Config.in Sat Nov 29 11:25:09 1997 +++ linux/drivers/block/Config.in Sun Nov 30 13:48:47 1997 @@ -23,7 +23,12 @@ fi if [ "$CONFIG_PCI" = "y" ]; then bool ' RZ1000 chipset bugfix/support' CONFIG_BLK_DEV_RZ1000 - bool ' Intel PIIX/PIIX3/PIIX4 (Triton 430FX/HX/VX/TX, 440FX) DMA support' CONFIG_BLK_DEV_TRITON + bool ' PCI bus-master DMA support' CONFIG_BLK_DEV_IDEDMA + if [ "$CONFIG_BLK_DEV_IDEDMA" = "y" ]; then + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' OPTi 82C621 enhanced support (EXPERIMENTAL)' CONFIG_BLK_DEV_OPTI621 + fi + fi fi bool ' Other IDE chipset support' CONFIG_IDE_CHIPSETS if [ "$CONFIG_IDE_CHIPSETS" = "y" ]; then @@ -32,10 +37,7 @@ bool ' DTC-2278 support' CONFIG_BLK_DEV_DTC2278 bool ' Holtek HT6560B support' CONFIG_BLK_DEV_HT6560B if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' PROMISE DC4030 support (EXPERIMENTAL)' CONFIG_BLK_DEV_PROMISE - if [ "$CONFIG_PCI" = "y" ]; then - bool ' OPTi 82C621 support (EXPERIMENTAL)' CONFIG_BLK_DEV_OPTI621 - fi + bool ' PROMISE DC4030 support (EXPERIMENTAL)' CONFIG_BLK_DEV_PDC4030 fi bool ' QDI QD6580 support' CONFIG_BLK_DEV_QD6580 bool ' UMC 8672 support' CONFIG_BLK_DEV_UMC8672 diff -u --recursive --new-file v2.1.67/linux/drivers/block/Makefile linux/drivers/block/Makefile --- v2.1.67/linux/drivers/block/Makefile Sat Nov 29 11:25:09 1997 +++ linux/drivers/block/Makefile Sun Nov 30 13:48:47 1997 @@ -96,8 +96,8 @@ L_OBJS += cmd640.o endif -ifeq ($(CONFIG_BLK_DEV_TRITON),y) -L_OBJS += triton.o +ifeq ($(CONFIG_BLK_DEV_IDEDMA),y) +L_OBJS += ide-dma.o endif ifeq ($(CONFIG_BLK_DEV_PS2),y) @@ -125,8 +125,8 @@ L_OBJS += ali14xx.o endif -ifeq ($(CONFIG_BLK_DEV_PROMISE),y) -L_OBJS += promise.o +ifeq ($(CONFIG_BLK_DEV_PDC4030),y) +L_OBJS += pdc4030.o endif ifeq ($(CONFIG_BLK_DEV_OPTI621),y) diff -u --recursive --new-file v2.1.67/linux/drivers/block/floppy.c linux/drivers/block/floppy.c --- v2.1.67/linux/drivers/block/floppy.c Wed Nov 26 16:24:01 1997 +++ linux/drivers/block/floppy.c Sun Nov 30 13:48:47 1997 @@ -4065,7 +4065,7 @@ if (FDCS->address != -1){ if (check_region(FDCS->address, 6) < 0 || check_region(FDCS->address+7, 1) < 0) { - DPRINT("Floppy io-port 0x%04x in use\n", FDCS->address); + DPRINT("Floppy io-port 0x%04lx in use\n", FDCS->address); fd_free_irq(); fd_free_dma(); while(--fdc >= 0) { diff -u --recursive --new-file v2.1.67/linux/drivers/block/ide-disk.c linux/drivers/block/ide-disk.c --- v2.1.67/linux/drivers/block/ide-disk.c Tue May 13 22:41:04 1997 +++ linux/drivers/block/ide-disk.c Sun Nov 30 13:48:47 1997 @@ -1,7 +1,7 @@ /* - * linux/drivers/block/ide-disk.c Version 1.01 Nov 25, 1996 + * linux/drivers/block/ide-disk.c Version 1.02 Nov 29, 1997 * - * Copyright (C) 1994-1996 Linus Torvalds & authors (see below) + * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) */ /* @@ -39,6 +39,7 @@ * Version 1.00 move disk only code from ide.c to ide-disk.c * support optional byte-swapping of all data * Version 1.01 fix previous byte-swapping code + * Verions 1.02 remove ", LBA" from drive identification msgs */ #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -308,23 +309,23 @@ */ static void do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) { -#ifdef CONFIG_BLK_DEV_PROMISE +#ifdef CONFIG_BLK_DEV_PDC4030 ide_hwif_t *hwif = HWIF(drive); - int use_promise_io = 0; -#endif /* CONFIG_BLK_DEV_PROMISE */ + int use_pdc4030_io = 0; +#endif /* CONFIG_BLK_DEV_PDC4030 */ OUT_BYTE(drive->ctl,IDE_CONTROL_REG); OUT_BYTE(rq->nr_sectors,IDE_NSECTOR_REG); -#ifdef CONFIG_BLK_DEV_PROMISE - if (IS_PROMISE_DRIVE) { - if (hwif->is_promise2 || rq->cmd == READ) { - use_promise_io = 1; +#ifdef CONFIG_BLK_DEV_PDC4030 + if (IS_PDC4030_DRIVE) { + if (hwif->is_pdc4030_2 || rq->cmd == READ) { + use_pdc4030_io = 1; } } - if (drive->select.b.lba || use_promise_io) { -#else /* !CONFIG_BLK_DEV_PROMISE */ + if (drive->select.b.lba || use_pdc4030_io) { +#else /* !CONFIG_BLK_DEV_PDC4030 */ if (drive->select.b.lba) { -#endif /* CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ #ifdef DEBUG printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n", drive->name, (rq->cmd==READ)?"read":"writ", @@ -350,26 +351,27 @@ head, sect, rq->nr_sectors, (unsigned long) rq->buffer); #endif } -#ifdef CONFIG_BLK_DEV_PROMISE - if (use_promise_io) { - do_promise_io (drive, rq); +#ifdef CONFIG_BLK_DEV_PDC4030 + if (use_pdc4030_io) { + extern void do_pdc4030_io(ide_drive_t *, struct request *); + do_pdc4030_io (drive, rq); return; } -#endif /* CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ if (rq->cmd == READ) { -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive))) return; -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ ide_set_handler(drive, &read_intr, WAIT_CMD); OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG); return; } if (rq->cmd == WRITE) { -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive))) return; -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG); if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, @@ -459,17 +461,17 @@ OUT_BYTE(drive->cyl,IDE_LCYL_REG); OUT_BYTE(drive->cyl>>8,IDE_HCYL_REG); OUT_BYTE(((drive->head-1)|drive->select.all)&0xBF,IDE_SELECT_REG); - if (!IS_PROMISE_DRIVE) + if (!IS_PDC4030_DRIVE) ide_cmd(drive, WIN_SPECIFY, drive->sect, &set_geometry_intr); } else if (s->b.recalibrate) { s->b.recalibrate = 0; - if (!IS_PROMISE_DRIVE) + if (!IS_PDC4030_DRIVE) ide_cmd(drive, WIN_RESTORE, drive->sect, &recal_intr); } else if (s->b.set_multmode) { s->b.set_multmode = 0; if (drive->id && drive->mult_req > drive->id->max_multsect) drive->mult_req = drive->id->max_multsect; - if (!IS_PROMISE_DRIVE) + if (!IS_PDC4030_DRIVE) ide_cmd(drive, WIN_SETMULT, drive->mult_req, &set_multmode_intr); } else if (s->all) { int special = s->all; @@ -602,12 +604,16 @@ (void) idedisk_capacity (drive); /* initialize LBA selection */ - printk (KERN_INFO "%s: %.40s, %ldMB w/%dkB Cache, %sCHS=%d/%d/%d%s\n", + printk (KERN_INFO "%s: %.40s, %ldMB w/%dkB Cache, CHS=%d/%d/%d", drive->name, id->model, idedisk_capacity(drive)/2048L, id->buf_size/2, - drive->select.b.lba ? "LBA, " : "", - drive->bios_cyl, drive->bios_head, drive->bios_sect, - drive->using_dma ? ", DMA" : ""); - + drive->bios_cyl, drive->bios_head, drive->bios_sect); + if (drive->using_dma) { + if ((id->field_valid & 4) && (id->dma_ultra & (id->dma_ultra >> 8) & 7)) + printk(", UDMA"); + else + printk(", DMA"); + } + printk("\n"); drive->mult_count = 0; if (id->max_multsect) { drive->mult_req = INITIAL_MULT_COUNT; diff -u --recursive --new-file v2.1.67/linux/drivers/block/ide-dma.c linux/drivers/block/ide-dma.c --- v2.1.67/linux/drivers/block/ide-dma.c Wed Dec 31 16:00:00 1969 +++ linux/drivers/block/ide-dma.c Sun Nov 30 14:55:59 1997 @@ -0,0 +1,626 @@ +/* + * linux/drivers/block/ide-dma.c Version 4.01 November 30, 1997 + * + * Copyright (c) 1995-1998 Mark Lord + * May be copied or modified under the terms of the GNU General Public License + */ + +/* + * This module provides support for the bus-master IDE DMA functions + * of various PCI chipsets, including the Intel PIIX (i82371FB for + * the 430 FX chipset), the PIIX3 (i82371SB for the 430 HX/VX and + * 440 chipsets), and the PIIX4 (i82371AB for the 430 TX chipset) + * ("PIIX" stands for "PCI ISA IDE Xcellerator"). + * + * Pretty much the same code works for other IDE PCI bus-mastering chipsets. + * + * DMA is supported for all IDE devices (disk drives, cdroms, tapes, floppies). + * + * By default, DMA support is prepared for use, but is currently enabled only + * for drives which already have DMA enabled (UltraDMA or mode 2 multi/single), + * or which are recognized as "good" (see table below). Drives with only mode0 + * or mode1 (multi/single) DMA should also work with this chipset/driver + * (eg. MC2112A) but are not enabled by default. + * + * Use "hdparm -i" to view modes supported by a given drive. + * + * The hdparm-2.4 (or later) utility can be used for manually enabling/disabling + * DMA support, but must be (re-)compiled against this kernel version or later. + * + * To enable DMA, use "hdparm -d1 /dev/hd?" on a per-drive basis after booting. + * If problems arise, ide.c will disable DMA operation after a few retries. + * This error recovery mechanism works and has been extremely well exercised. + * + * IDE drives, depending on their vintage, may support several different modes + * of DMA operation. The boot-time modes are indicated with a "*" in + * the "hdparm -i" listing, and can be changed with *knowledgeable* use of + * the "hdparm -X" feature. There is seldom a need to do this, as drives + * normally power-up with their "best" PIO/DMA modes enabled. + * + * Testing has been done with a rather extensive number of drives, + * with Quantum & Western Digital models generally outperforming the pack, + * and Fujitsu & Conner (and some Seagate which are really Conner) drives + * showing more lackluster throughput. + * + * Keep an eye on /var/adm/messages for "DMA disabled" messages. + * + * Some people have reported trouble with Intel Zappa motherboards. + * This can be fixed by upgrading the AMI BIOS to version 1.00.04.BS0, + * available from ftp://ftp.intel.com/pub/bios/10004bs0.exe + * (thanks to Glen Morrell for researching this). + * + * Thanks to "Christopher J. Reimer" for fixing the + * problem with some (all?) ACER motherboards/BIOSs. Hopefully the fix + * still works here (?). + * + * Thanks to "Benoit Poulot-Cazajous" for testing + * "TX" chipset compatibility and for providing patches for the "TX" chipset. + * + * Thanks to Christian Brunner for taking a good first crack + * at generic DMA -- his patches were referred to when preparing this code. + * + * Most importantly, thanks to Robert Bringman + * for supplying a Promise UDMA board & WD UDMA drive for this work! + * + * And, yes, Intel Zappa boards really *do* use both PIIX IDE ports. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ide.h" + +/* + * good_dma_drives() lists the model names (from "hdparm -i") + * of drives which do not support mode2 DMA but which are + * known to work fine with this interface under Linux. + */ +const char *good_dma_drives[] = {"Micropolis 2112A", + "CONNER CTMA 4000", + NULL}; + +/* + * Our Physical Region Descriptor (PRD) table should be large enough + * to handle the biggest I/O request we are likely to see. Since requests + * can have no more than 256 sectors, and since the typical blocksize is + * two or more sectors, we could get by with a limit of 128 entries here for + * the usual worst case. Most requests seem to include some contiguous blocks, + * further reducing the number of table entries required. + * + * The driver reverts to PIO mode for individual requests that exceed + * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling + * 100% of all crazy scenarios here is not necessary. + * + * As it turns out though, we must allocate a full 4KB page for this, + * so the two PRD tables (ide0 & ide1) will each get half of that, + * allowing each to have about 256 entries (8 bytes each) from this. + */ +#define PRD_BYTES 8 +#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) + +static int config_drive_for_dma (ide_drive_t *); + +/* + * dma_intr() is the handler for disk read/write DMA interrupts + */ +static void dma_intr (ide_drive_t *drive) +{ + byte stat, dma_stat; + int i; + struct request *rq = HWGROUP(drive)->rq; + unsigned short dma_base = HWIF(drive)->dma_base; + + dma_stat = inb(dma_base+2); /* get DMA status */ + outb(inb(dma_base)&~1, dma_base); /* stop DMA operation */ + stat = GET_STAT(); /* get drive status */ + if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { + if ((dma_stat & 7) == 4) { /* verify good DMA status */ + rq = HWGROUP(drive)->rq; + for (i = rq->nr_sectors; i > 0;) { + i -= rq->current_nr_sectors; + ide_end_request(1, HWGROUP(drive)); + } + return; + } + printk("%s: bad DMA status: 0x%02x\n", drive->name, dma_stat); + } + sti(); + ide_error(drive, "dma_intr", stat); +} + +/* + * build_dmatable() prepares a dma request. + * Returns 0 if all went okay, returns 1 otherwise. + */ +static int build_dmatable (ide_drive_t *drive) +{ + struct request *rq = HWGROUP(drive)->rq; + struct buffer_head *bh = rq->bh; + unsigned long size, addr, *table = HWIF(drive)->dmatable; + unsigned int count = 0; + + do { + /* + * Determine addr and size of next buffer area. We assume that + * individual virtual buffers are always composed linearly in + * physical memory. For example, we assume that any 8kB buffer + * is always composed of two adjacent physical 4kB pages rather + * than two possibly non-adjacent physical 4kB pages. + */ + if (bh == NULL) { /* paging requests have (rq->bh == NULL) */ + addr = virt_to_bus (rq->buffer); + size = rq->nr_sectors << 9; + } else { + /* group sequential buffers into one large buffer */ + addr = virt_to_bus (bh->b_data); + size = bh->b_size; + while ((bh = bh->b_reqnext) != NULL) { + if ((addr + size) != virt_to_bus (bh->b_data)) + break; + size += bh->b_size; + } + } + + /* + * Fill in the dma table, without crossing any 64kB boundaries. + * We assume 16-bit alignment of all blocks. + */ + while (size) { + if (++count >= PRD_ENTRIES) { + printk("%s: DMA table too small\n", drive->name); + return 1; /* revert to PIO for this request */ + } else { + unsigned long bcount = 0x10000 - (addr & 0xffff); + if (bcount > size) + bcount = size; + *table++ = addr; + *table++ = bcount & 0xffff; + addr += bcount; + size -= bcount; + } + } + } while (bh != NULL); + if (count) { + *--table |= 0x80000000; /* set End-Of-Table (EOT) bit */ + return 0; + } + printk("%s: empty DMA table?\n", drive->name); + return 1; /* let the PIO routines handle this weirdness */ +} + +/* + * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. + * + * The caller is assumed to have selected the drive and programmed the drive's + * sector address using CHS or LBA. All that remains is to prepare for DMA + * and then issue the actual read/write DMA/PIO command to the drive. + * + * For ATAPI devices, we just prepare for DMA and return. The caller should + * then issue the packet command to the drive and call us again with + * ide_dma_begin afterwards. + * + * Returns 0 if all went well. + * Returns 1 if DMA read/write could not be started, in which case + * the caller should revert to PIO for the current request. + */ +static int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) +{ + unsigned long dma_base = HWIF(drive)->dma_base; + unsigned int reading = 0; + + switch (func) { + case ide_dma_off: + printk("%s: DMA disabled\n", drive->name); + case ide_dma_off_quietly: + case ide_dma_on: + drive->using_dma = (func == ide_dma_on); + return 0; + case ide_dma_abort: + outb(inb(dma_base)&~1, dma_base); /* stop DMA */ + return 0; + case ide_dma_check: + return config_drive_for_dma (drive); + case ide_dma_status_bad: + return ((inb(dma_base+2) & 7) != 4); /* verify good DMA status */ + case ide_dma_transferred: + return 0; /* NOT IMPLEMENTED: number of bytes actually transferred */ + case ide_dma_begin: + outb(inb(dma_base)|1, dma_base); /* begin DMA */ + return 0; + default: + printk("ide_dmaproc: unsupported func: %d\n", func); + return 1; + case ide_dma_read: + reading = (1 << 3); + case ide_dma_write: + if (build_dmatable (drive)) + return 1; + outl(virt_to_bus (HWIF(drive)->dmatable), dma_base + 4); /* PRD table */ + outb(reading, dma_base); /* specify r/w */ + outb(inb(dma_base+2)|0x06, dma_base+2); /* clear status bits */ + if (drive->media != ide_disk) + return 0; + ide_set_handler(drive, &dma_intr, WAIT_CMD); /* issue cmd to drive */ + OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG); + outb(inb(dma_base)|1, dma_base); /* begin DMA */ + return 0; + } +} + +static int config_drive_for_dma (ide_drive_t *drive) +{ + const char **list; + + struct hd_driveid *id = drive->id; + if (id && (id->capability & 1)) { + /* Enable DMA on any drive that has UltraDMA (mode 0/1/2) enabled */ + if (id->field_valid & 4) /* UltraDMA */ + if ((id->dma_ultra & (id->dma_ultra >> 8) & 7)) + return ide_dmaproc(ide_dma_on, drive); + /* Enable DMA on any drive that has mode2 DMA (multi or single) enabled */ + if (id->field_valid & 2) /* regular DMA */ + if ((id->dma_mword & 0x404) == 0x404 || (id->dma_1word & 0x404) == 0x404) + return ide_dmaproc(ide_dma_on, drive); + /* Consult the list of known "good" drives */ + list = good_dma_drives; + while (*list) { + if (!strcmp(*list++,id->model)) + return ide_dmaproc(ide_dma_on, drive); + } + } + return ide_dmaproc(ide_dma_off_quietly, drive); +} + +#define DEVID_PIIX (PCI_VENDOR_ID_INTEL |(PCI_DEVICE_ID_INTEL_82371_1 <<16)) +#define DEVID_PIIX3 (PCI_VENDOR_ID_INTEL |(PCI_DEVICE_ID_INTEL_82371SB_1 <<16)) +#define DEVID_PIIX4 (PCI_VENDOR_ID_INTEL |(PCI_DEVICE_ID_INTEL_82371AB <<16)) +#define DEVID_VP_IDE (PCI_VENDOR_ID_VIA |(PCI_DEVICE_ID_VIA_82C586_1 <<16)) +#define DEVID_PDC2046 (PCI_VENDOR_ID_PROMISE|(PCI_DEVICE_ID_PROMISE_20246 <<16)) +#define DEVID_RZ1000 (PCI_VENDOR_ID_PCTECH |(PCI_DEVICE_ID_PCTECH_RZ1000 <<16)) +#define DEVID_RZ1001 (PCI_VENDOR_ID_PCTECH |(PCI_DEVICE_ID_PCTECH_RZ1001 <<16)) +#define DEVID_CMD640 (PCI_VENDOR_ID_CMD |(PCI_DEVICE_ID_CMD_640 <<16)) +#define DEVID_CMD646 (PCI_VENDOR_ID_CMD |(PCI_DEVICE_ID_CMD_646 <<16)) +#define DEVID_SIS5513 (PCI_VENDOR_ID_SI |(PCI_DEVICE_ID_SI_5513 <<16)) +#define DEVID_OPTI (PCI_VENDOR_ID_OPTI |(PCI_DEVICE_ID_OPTI_82C621 <<16)) +#define DEVID_OPTI2 (PCI_VENDOR_ID_OPTI |(0xd568 /* from datasheets */ <<16)) + +#ifdef CONFIG_BLK_DEV_OPTI621 +extern void ide_init_opti621(byte, byte, ide_hwif_t *); +#define INIT_OPTI (&ide_init_opti621) +#else +#define INIT_OPTI (NULL) +#endif + +typedef struct ide_pci_enablebit_s { + byte reg; /* byte pci reg holding the enable-bit */ + byte mask; /* mask to isolate the enable-bit */ + byte val; /* value of masked reg when "enabled" */ +} ide_pci_enablebit_t; + +typedef struct ide_pci_device_s { + unsigned int id; + const char *name; + void (*init_hwif)(byte bus, byte fn, ide_hwif_t *hwif); + ide_pci_enablebit_t enablebits[2]; +} ide_pci_device_t; + +static ide_pci_device_t ide_pci_chipsets[] = { + {DEVID_PIIX, "PIIX", NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}} }, + {DEVID_PIIX3, "PIIX3", NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}} }, + {DEVID_PIIX4, "PIIX4", NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}} }, + {DEVID_VP_IDE, "VP_IDE", NULL, {{0x40,0x02,0x02}, {0x40,0x01,0x01}} }, + {DEVID_PDC2046, "PDC2046", NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}} }, + {DEVID_RZ1000, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}} }, + {DEVID_RZ1001, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}} }, + {DEVID_CMD640, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}} }, + {DEVID_OPTI, "OPTI", INIT_OPTI, {{0x45,0x80,0x00}, {0x40,0x08,0x00}} }, + {DEVID_OPTI2, "OPTI2", INIT_OPTI, {{0x45,0x80,0x00}, {0x40,0x08,0x00}} }, + {DEVID_SIS5513, "SIS5513", NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}} }, + {DEVID_CMD646, "CMD646", NULL, {{0x00,0x00,0x00}, {0x51,0x80,0x80}} }, + {0, "PCI_IDE", NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}} }}; + +__initfunc(static ide_pci_device_t *lookup_devid(unsigned int devid)) +{ + ide_pci_device_t *d = ide_pci_chipsets; + while (d->id && d->id != devid) + ++d; + return d; +} + +__initfunc(static void ide_setup_dma (ide_hwif_t *hwif, unsigned short dmabase)) +{ + static unsigned long dmatable = 0; + static unsigned leftover = 0; + + printk(" %s: BM-DMA at 0x%04x-0x%04x", hwif->name, dmabase, dmabase+7); + if (check_region(dmabase, 8)) { + printk(" -- ERROR, PORTS ALREADY IN USE"); + } else { + request_region(dmabase, 8, hwif->name); + hwif->dma_base = dmabase; + if (leftover < (PRD_ENTRIES * PRD_BYTES)) { + /* + * The BM-DMA uses full 32bit addr, so we can + * safely use __get_free_page() here instead + * of __get_dma_pages() -- no ISA limitations. + */ + dmatable = __get_free_pages(GFP_KERNEL,1,0); + leftover = dmatable ? PAGE_SIZE : 0; + } + if (dmatable) { + printk(", PRD table at %08lx", dmatable); + hwif->dmatable = (unsigned long *) dmatable; + dmatable += (PRD_ENTRIES * PRD_BYTES); + leftover -= (PRD_ENTRIES * PRD_BYTES); + outl(virt_to_bus(hwif->dmatable), dmabase + 4); + hwif->dmaproc = &ide_dmaproc; + } + } + printk("\n"); +} + +/* The next two functions were stolen from cmd640.c, with + a few modifications */ + +__initfunc(static void write_pcicfg_dword (byte fn, unsigned short reg, long val)) +{ + unsigned long flags; + + save_flags(flags); + cli(); + outl_p((reg & 0xfc) | ((fn * 0x100) + 0x80000000), 0xcf8); + outl_p(val, (reg & 3) | 0xcfc); + restore_flags(flags); +} + +__initfunc(static long read_pcicfg_dword (byte fn, unsigned short reg)) +{ + long b; + unsigned long flags; + + save_flags(flags); + cli(); + outl_p((reg & 0xfc) | ((fn * 0x100) + 0x80000000), 0xcf8); + b = inl_p((reg & 3) | 0xcfc); + restore_flags(flags); + return b; +} + +/* + * Search for an (apparently) unused block of I/O space + * of "size" bytes in length. + */ +__initfunc(static short find_free_region (unsigned short size)) +{ + unsigned short i, base = 0xe800; + for (base = 0xe800; base > 0; base -= 0x800) { + if (!check_region(base,size)) { + for (i = 0; i < size; i++) { + if (inb(base+i) != 0xff) + goto next; + } + return base; /* success */ + } + next: + } + return 0; /* failure */ +} + +/* + * Fetch the Bus-Master I/O Base-Address (BMIBA) from PCI space: + */ +__initfunc(static unsigned int ide_get_or_set_bmiba (byte bus, byte fn, const char *name)) +{ + unsigned int bmiba = 0; + unsigned short base; + int rc; + + if ((rc = pcibios_read_config_dword(bus, fn, 0x20, &bmiba))) { + printk("%s: failed to read BMIBA\n", name); + } else if ((bmiba &= 0xfff0) == 0) { + printk("%s: BMIBA is invalid (0x%04x, BIOS problem)\n", name, bmiba); + base = find_free_region(16); + if (base) { + printk("%s: setting BMIBA to 0x%04x\n", name, base); + pcibios_write_config_dword(bus, fn, 0x20, base | 1); + pcibios_read_config_dword(bus, fn, 0x20, &bmiba); + bmiba &= 0xfff0; + if (bmiba != base) { + if (bus == 0) { + printk("%s: operation failed, bypassing BIOS to try again\n", name); + write_pcicfg_dword(fn, 0x20, base | 1); + bmiba = read_pcicfg_dword(fn, 0x20) & 0xfff0; + } + if (bmiba != base) { + printk("%s: operation failed, DMA disabled\n", name); + bmiba = 0; + } + } + } + } + return bmiba; +} + +/* + * Match a PCI IDE port against an entry in ide_hwifs[], + * based on io_base port if possible. + */ +__initfunc(ide_hwif_t *ide_match_hwif (unsigned int io_base)) +{ + int h; + ide_hwif_t *hwif; + + /* + * Look for a hwif with matching io_base specified using + * parameters to ide_setup(). + */ + for (h = 0; h < MAX_HWIFS; ++h) { + hwif = &ide_hwifs[h]; + if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) { + if (hwif->chipset == ide_generic) + return hwif; /* a perfect match */ + } + } + /* + * Look for a hwif with matching io_base default value. + * If chipset is "ide_unknown", then claim that hwif slot. + * Otherwise, some other chipset has already claimed it.. :( + */ + for (h = 0; h < MAX_HWIFS; ++h) { + hwif = &ide_hwifs[h]; + if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) { + if (hwif->chipset == ide_unknown) + return hwif; /* match */ + return NULL; /* already claimed */ + } + } + /* + * Okay, there is no hwif matching our io_base, + * so we'll just claim an unassigned slot. + * Give preference to claiming ide2/ide3 before ide0/ide1, + * just in case there's another interface yet-to-be-scanned + * which uses ports 1f0/170 (the ide0/ide1 defaults). + */ + for (h = 0; h < MAX_HWIFS; ++h) { + int hwifs[] = {2,3,1,0}; /* assign 3rd/4th before 1st/2nd */ + hwif = &ide_hwifs[hwifs[h]]; + if (hwif->chipset == ide_unknown) + return hwif; /* pick an unused entry */ + } + return NULL; +} + +/* + * ide_setup_pci_device() looks at the primary/secondary interfaces + * on a PCI IDE device and, if they are enabled, prepares the IDE driver + * for use with them. This generic code works for most PCI chipsets. + * + * One thing that is not standardized is the location of the + * primary/secondary interface "enable/disable" bits. For chipsets that + * we "know" about, this information is in the ide_pci_device_t struct; + * for all other chipsets, we just assume both interfaces are enabled. + */ +__initfunc(static void ide_setup_pci_device (byte bus, byte fn, unsigned int bmiba, ide_pci_device_t *d)) +{ + unsigned int port, at_least_one_hwif_enabled = 0; + unsigned short base = 0, ctl = 0; + byte tmp = 0, pciirq = 0; + ide_hwif_t *hwif; + + if (pcibios_read_config_byte(bus, fn, 0x3c, &pciirq)) + pciirq = 0; /* probe later if not set */ + for (port = 0; port <= 1; ++port) { + ide_pci_enablebit_t *e = &(d->enablebits[port]); + if (e->reg) { + if (pcibios_read_config_byte(bus, fn, e->reg, &tmp)) { + printk("%s: unable to read pci reg 0x%x\n", d->name, e->reg); + } else if ((tmp & e->mask) != e->val) + continue; /* port not enabled */ + } + if (pcibios_read_config_word(bus, fn, 0x14+(port*8), &ctl)) + ctl = 0; + if ((ctl &= 0xfffc) == 0) + ctl = 0x3f4 ^ (port << 7); + if (pcibios_read_config_word(bus, fn, 0x10+(port*8), &base)) + base = 0; + if ((base &= 0xfff8) == 0) + base = 0x1F0 ^ (port << 7); + if ((hwif = ide_match_hwif(base)) == NULL) { + printk("%s: no room in hwif table for port %d\n", d->name, port); + continue; + } + hwif->chipset = ide_pci; + if (hwif->io_ports[IDE_DATA_OFFSET] != base) { + ide_init_hwif_ports(hwif->io_ports, base, NULL); + hwif->io_ports[IDE_CONTROL_OFFSET] = ctl + 2; + } + if (!hwif->irq) + hwif->irq = port ? 0 : pciirq; /* always probe for secondary irq */ + if (bmiba) { + if ((inb(bmiba+2) & 0x80)) { /* simplex DMA only? */ + printk("%s: simplex device: DMA disabled\n", d->name); + } else { /* supports simultaneous DMA on both channels */ + ide_setup_dma(hwif, bmiba + (8 * port)); + } + } + if (d->init_hwif) /* Call chipset-specific routine for each enabled hwif */ + d->init_hwif(bus, fn, hwif); + at_least_one_hwif_enabled = 1; + } + if (!at_least_one_hwif_enabled) + printk("%s: neither IDE port is enabled\n", d->name); +} + +/* + * ide_scan_pci_device() examines all functions of a PCI device, + * looking for IDE interfaces and/or devices in ide_pci_chipsets[]. + */ +__initfunc(static inline void ide_scan_pci_device (unsigned int bus, unsigned int fn)) +{ + unsigned int devid, ccode; + unsigned short pcicmd; + ide_pci_device_t *d; + byte hedt; + + if (pcibios_read_config_byte(bus, fn, 0x0e, &hedt)) + hedt = 0; + do { + if (pcibios_read_config_dword(bus, fn, 0x00, &devid) + || devid == 0xffffffff + || pcibios_read_config_dword(bus, fn, 0x08, &ccode)) + return; + d = lookup_devid(devid); + if (d->name == NULL) /* some chips (cmd640 & rz1000) are handled elsewhere */ + continue; + if (d->id || (ccode >> 16) == PCI_CLASS_STORAGE_IDE) { + printk("%s: %sIDE device on PCI bus %d function %d\n", d->name, d->id ? "" : "unknown ", bus, fn); + /* + * See if IDE ports are enabled + */ + if (pcibios_read_config_word(bus, fn, 0x04, &pcicmd)) { + printk("%s: error accessing PCICMD\n", d->name); + } else if ((pcicmd & 1) == 0) { + printk("%s: device is disabled (BIOS)\n", d->name); + } else { + unsigned int bmiba = 0; + /* + * Check for Bus-Master DMA capability + */ + if (!(pcicmd & 4) || !(bmiba = ide_get_or_set_bmiba(bus, fn, d->name))) { + if ((ccode >> 16) == PCI_CLASS_STORAGE_RAID || (ccode && 0x8000)) + printk("%s: Bus-Master DMA is disabled (BIOS)\n", d->name); + } + ide_setup_pci_device(bus, fn, bmiba, d); + } + } + } while (hedt == 0x80 && (++fn & 7)); +} + +/* + * ide_scan_pcibus() gets invoked at boot time from ide.c + */ +__initfunc(void ide_scan_pcibus (void)) +{ + unsigned int bus, dev; + + if (!pcibios_present()) + return; + for (bus = 0; bus <= 255; ++bus) { + for (dev = 0; dev <= 31; ++dev) { + ide_scan_pci_device(bus, dev << 3); + } + } +} + diff -u --recursive --new-file v2.1.67/linux/drivers/block/ide-floppy.c linux/drivers/block/ide-floppy.c --- v2.1.67/linux/drivers/block/ide-floppy.c Tue May 13 22:41:04 1997 +++ linux/drivers/block/ide-floppy.c Sun Nov 30 13:48:47 1997 @@ -532,7 +532,7 @@ } } -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA static void idefloppy_update_buffers (ide_drive_t *drive, idefloppy_pc_t *pc) { struct request *rq = pc->rq; @@ -541,7 +541,7 @@ while ((bh = rq->bh) != NULL) idefloppy_end_request (1, HWGROUP(drive)); } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ /* * idefloppy_queue_pc_head generates a new packet command request in front @@ -681,7 +681,7 @@ printk (KERN_INFO "ide-floppy: Reached idefloppy_pc_intr interrupt handler\n"); #endif /* IDEFLOPPY_DEBUG_LOG */ -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (test_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { if (HWIF(drive)->dmaproc(ide_dma_status_bad, drive)) { set_bit (PC_DMA_ERROR, &pc->flags); @@ -694,7 +694,7 @@ printk (KERN_INFO "ide-floppy: DMA finished\n"); #endif /* IDEFLOPPY_DEBUG_LOG */ } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ status.all = GET_STAT(); /* Clear the interrupt */ @@ -725,7 +725,7 @@ pc->callback(drive); /* Command finished - Call the callback function */ return; } -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (test_and_clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { printk (KERN_ERR "ide-floppy: The floppy wants to issue more interrupts in DMA mode\n"); printk (KERN_ERR "ide-floppy: DMA disabled, reverting to PIO\n"); @@ -733,7 +733,7 @@ ide_do_reset (drive); return; } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ bcount.b.high=IN_BYTE (IDE_BCOUNTH_REG); /* Get the number of bytes to transfer */ bcount.b.low=IN_BYTE (IDE_BCOUNTL_REG); /* on this interrupt */ ireason.all=IN_BYTE (IDE_IREASON_REG); @@ -841,14 +841,14 @@ pc->current_position=pc->buffer; bcount.all=pc->request_transfer; /* Request to transfer the entire buffer at once */ -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) { printk (KERN_WARNING "ide-floppy: DMA disabled, reverting to PIO\n"); HWIF(drive)->dmaproc(ide_dma_off, drive); } if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ OUT_BYTE (drive->ctl,IDE_CONTROL_REG); OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ @@ -856,12 +856,12 @@ OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG); OUT_BYTE (drive->select.all,IDE_SELECT_REG); -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (dma_ok) { /* Begin DMA, if necessary */ set_bit (PC_DMA_IN_PROGRESS, &pc->flags); (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ if (test_bit (IDEFLOPPY_DRQ_INTERRUPT, &floppy->flags)) { ide_set_handler (drive, &idefloppy_transfer_pc, WAIT_CMD); diff -u --recursive --new-file v2.1.67/linux/drivers/block/ide-probe.c linux/drivers/block/ide-probe.c --- v2.1.67/linux/drivers/block/ide-probe.c Mon Nov 3 13:04:25 1997 +++ linux/drivers/block/ide-probe.c Sun Nov 30 13:48:47 1997 @@ -114,13 +114,13 @@ if (cmd == WIN_PIDENTIFY) { byte type = (id->config >> 8) & 0x1f; printk("ATAPI "); -#ifdef CONFIG_BLK_DEV_PROMISE - if (HWIF(drive)->is_promise2) { +#ifdef CONFIG_BLK_DEV_PDC4030 + if (HWIF(drive)->is_pdc4030_2) { printk(" -- not supported on 2nd Promise port\n"); drive->present = 0; return; } -#endif /* CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ switch (type) { case ide_floppy: if (!strstr(id->model, "oppy") && !strstr(id->model, "poyp") && !strstr(id->model, "ZIP")) @@ -192,15 +192,16 @@ } else hd_status = IDE_ALTSTATUS_REG; /* use non-intrusive polling */ -#if CONFIG_BLK_DEV_PROMISE - if (IS_PROMISE_DRIVE) { - if (promise_cmd(drive,PROMISE_IDENTIFY)) { +#if CONFIG_BLK_DEV_PDC4030 + if (IS_PDC4030_DRIVE) { + extern int pdc4030_cmd(ide_drive_t *, byte); + if (pdc4030_cmd(drive,PROMISE_IDENTIFY)) { if (irqs) (void) probe_irq_off(irqs); return 1; } } else -#endif /* CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ OUT_BYTE(cmd,IDE_COMMAND_REG); /* ask drive for ID */ timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; timeout += jiffies; @@ -363,10 +364,10 @@ byte cmos_disks, *BIOS = (byte *) &drive_info; int unit; -#ifdef CONFIG_BLK_DEV_PROMISE - if (hwif->is_promise2) +#ifdef CONFIG_BLK_DEV_PDC4030 + if (hwif->is_pdc4030_2) return; -#endif /* CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ outb_p(0x12,0x70); /* specify CMOS address 0x12 */ cmos_disks = inb_p(0x71); /* read the data from 0x12 */ /* Extract drive geometry from CMOS+BIOS if not already setup */ @@ -397,12 +398,12 @@ return; if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) probe_cmos_for_drives (hwif); -#if CONFIG_BLK_DEV_PROMISE - if (!hwif->is_promise2 && +#if CONFIG_BLK_DEV_PDC4030 + if (!hwif->is_pdc4030_2 && (ide_check_region(hwif->io_ports[IDE_DATA_OFFSET],8) || ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET],1))) { #else if (ide_check_region(hwif->io_ports[IDE_DATA_OFFSET],8) || ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET],1)) { -#endif /* CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ int msgout = 0; for (unit = 0; unit < MAX_DRIVES; ++unit) { ide_drive_t *drive = &hwif->drives[unit]; diff -u --recursive --new-file v2.1.67/linux/drivers/block/ide-tape.c linux/drivers/block/ide-tape.c --- v2.1.67/linux/drivers/block/ide-tape.c Sat Oct 25 02:44:15 1997 +++ linux/drivers/block/ide-tape.c Sun Nov 30 13:48:47 1997 @@ -170,13 +170,13 @@ * unit, making performance almost independent of the * chosen user block size. * Some improvements in error recovery. - * By cooperating with triton.c, bus mastering DMA can + * By cooperating with ide-dma.c, bus mastering DMA can * now sometimes be used with IDE tape drives as well. * Bus mastering DMA has the potential to dramatically * reduce the CPU's overhead when accessing the device, * and can be enabled by using hdparm -d1 on the tape's * block device interface. For more info, read the - * comments in triton.c. + * comments in ide-dma.c. * Ver 1.4 Mar 13 96 Fixed serialize support. * Ver 1.5 Apr 12 96 Fixed shared interface operation, broken in 1.3.85. * Fixed pipelined read mode inefficiency. @@ -1093,7 +1093,7 @@ } } -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA static void idetape_update_buffers (idetape_pc_t *pc) { struct buffer_head *bh = pc->bh; @@ -1116,7 +1116,7 @@ } pc->bh = bh; } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ /* * idetape_postpone_request postpones the current request so that @@ -1610,7 +1610,7 @@ printk (KERN_INFO "ide-tape: pc = %x, sense key = %x, asc = %x, ascq = %x\n",pc->c[0],result->sense_key,result->asc,result->ascq); #endif /* IDETAPE_DEBUG_LOG */ -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA /* * Correct pc->actually_transferred by asking the tape. @@ -1619,7 +1619,7 @@ pc->actually_transferred = pc->request_transfer - tape->tape_block_size * ntohl (get_unaligned (&result->information)); idetape_update_buffers (pc); } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ if (pc->c[0] == IDETAPE_READ_CMD && result->filemark) { pc->error = IDETAPE_ERROR_FILEMARK; set_bit (PC_ABORT, &pc->flags); @@ -1721,7 +1721,7 @@ printk (KERN_INFO "ide-tape: Reached idetape_pc_intr interrupt handler\n"); #endif /* IDETAPE_DEBUG_LOG */ -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (test_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { if (HWIF(drive)->dmaproc(ide_dma_status_bad, drive)) { set_bit (PC_DMA_ERROR, &pc->flags); @@ -1739,7 +1739,7 @@ printk (KERN_INFO "ide-tape: DMA finished\n"); #endif /* IDETAPE_DEBUG_LOG */ } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ status.all = GET_STAT(); /* Clear the interrupt */ @@ -1776,7 +1776,7 @@ pc->callback(drive); /* Command finished - Call the callback function */ return; } -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (test_and_clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) { printk (KERN_ERR "ide-tape: The tape wants to issue more interrupts in DMA mode\n"); printk (KERN_ERR "ide-tape: DMA disabled, reverting to PIO\n"); @@ -1784,7 +1784,7 @@ ide_do_reset (drive); return; } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ bcount.b.high=IN_BYTE (IDE_BCOUNTH_REG); /* Get the number of bytes to transfer */ bcount.b.low=IN_BYTE (IDE_BCOUNTL_REG); /* on this interrupt */ ireason.all=IN_BYTE (IDE_IREASON_REG); @@ -1915,14 +1915,14 @@ pc->current_position=pc->buffer; bcount.all=pc->request_transfer; /* Request to transfer the entire buffer at once */ -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) { printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n"); HWIF(drive)->dmaproc(ide_dma_off, drive); } if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ OUT_BYTE (drive->ctl,IDE_CONTROL_REG); OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ @@ -1952,12 +1952,12 @@ return; } atapi_output_bytes (drive,pc->c,12); /* Send the actual packet */ -#ifdef CONFIG_BLK_DEV_TRITON +#ifdef CONFIG_BLK_DEV_IDEDMA if (dma_ok) { /* Begin DMA, if necessary */ set_bit (PC_DMA_IN_PROGRESS, &pc->flags); (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); } -#endif /* CONFIG_BLK_DEV_TRITON */ +#endif /* CONFIG_BLK_DEV_IDEDMA */ } static void idetape_media_access_finished (ide_drive_t *drive) diff -u --recursive --new-file v2.1.67/linux/drivers/block/ide.c linux/drivers/block/ide.c --- v2.1.67/linux/drivers/block/ide.c Thu Aug 14 20:49:16 1997 +++ linux/drivers/block/ide.c Sun Nov 30 13:48:47 1997 @@ -1,7 +1,7 @@ /* - * linux/drivers/block/ide.c Version 6.03 June 4, 1997 + * linux/drivers/block/ide.c Version 6.05 November 30, 1997 * - * Copyright (C) 1994-1997 Linus Torvalds & authors (see below) + * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) */ #define _IDE_C /* needed by */ @@ -65,197 +65,6 @@ * Version 1.4 BETA added auto probing for irq(s) * Version 1.5 BETA added ALPHA (untested) support for IDE cd-roms, * ... - * Version 3.5 correct the bios_cyl field if it's too small - * (linux 1.1.76) (to help fdisk with brain-dead BIOSs) - * Version 3.6 cosmetic corrections to comments and stuff - * (linux 1.1.77) reorganise probing code to make it understandable - * added halfway retry to probing for drive identification - * added "hdx=noprobe" command line option - * allow setting multmode even when identification fails - * Version 3.7 move set_geometry=1 from do_identify() to ide_init() - * increase DRQ_WAIT to eliminate nuisance messages - * wait for DRQ_STAT instead of DATA_READY during probing - * (courtesy of Gary Thomas gary@efland.UU.NET) - * Version 3.8 fixed byte-swapping for confused Mitsumi cdrom drives - * update of ide-cd.c from Scott, allows blocksize=1024 - * cdrom probe fixes, inspired by jprang@uni-duisburg.de - * Version 3.9 don't use LBA if lba_capacity looks funny - * correct the drive capacity calculations - * fix probing for old Seagates without IDE_ALTSTATUS_REG - * fix byte-ordering for some NEC cdrom drives - * Version 3.10 disable multiple mode by default; was causing trouble - * Version 3.11 fix mis-identification of old WD disks as cdroms - * Version 3,12 simplify logic for selecting initial mult_count - * (fixes problems with buggy WD drives) - * Version 3.13 remove excess "multiple mode disabled" messages - * Version 3.14 fix ide_error() handling of BUSY_STAT - * fix byte-swapped cdrom strings (again.. arghh!) - * ignore INDEX bit when checking the ALTSTATUS reg - * Version 3.15 add SINGLE_THREADED flag for use with dual-CMD i/f - * ignore WRERR_STAT for non-write operations - * added vlb_sync support for DC-2000A & others, - * (incl. some Promise chips), courtesy of Frank Gockel - * Version 3.16 convert vlb_32bit and vlb_sync into runtime flags - * add ioctls to get/set VLB flags (HDIO_[SG]ET_CHIPSET) - * rename SINGLE_THREADED to SUPPORT_SERIALIZE, - * add boot flag to "serialize" operation for CMD i/f - * add optional support for DTC2278 interfaces, - * courtesy of andy@cercle.cts.com (Dyan Wile). - * add boot flag to enable "dtc2278" probe - * add probe to avoid EATA (SCSI) interfaces, - * courtesy of neuffer@goofy.zdv.uni-mainz.de. - * Version 4.00 tidy up verify_area() calls - heiko@colossus.escape.de - * add flag to ignore WRERR_STAT for some drives - * courtesy of David.H.West@um.cc.umich.edu - * assembly syntax tweak to vlb_sync - * removable drive support from scuba@cs.tu-berlin.de - * add transparent support for DiskManager-6.0x "Dynamic - * Disk Overlay" (DDO), most of this is in genhd.c - * eliminate "multiple mode turned off" message at boot - * Version 4.10 fix bug in ioctl for "hdparm -c3" - * fix DM6:DDO support -- now works with LILO, fdisk, ... - * don't treat some naughty WD drives as removable - * Version 4.11 updated DM6 support using info provided by OnTrack - * Version 5.00 major overhaul, multmode setting fixed, vlb_sync fixed - * added support for 3rd/4th/alternative IDE ports - * created ide.h; ide-cd.c now compiles separate from ide.c - * hopefully fixed infinite "unexpected_intr" from cdroms - * zillions of other changes and restructuring - * somehow reduced overall memory usage by several kB - * probably slowed things down slightly, but worth it - * Version 5.01 AT LAST!! Finally understood why "unexpected_intr" - * was happening at various times/places: whenever the - * ide-interface's ctl_port was used to "mask" the irq, - * it also would trigger an edge in the process of masking - * which would result in a self-inflicted interrupt!! - * (such a stupid way to build a hardware interrupt mask). - * This is now fixed (after a year of head-scratching). - * Version 5.02 got rid of need for {enable,disable}_irq_list() - * Version 5.03 tune-ups, comments, remove "busy wait" from drive resets - * removed PROBE_FOR_IRQS option -- no longer needed - * OOOPS! fixed "bad access" bug for 2nd drive on an i/f - * Version 5.04 changed "ira %d" to "irq %d" in DEBUG message - * added more comments, cleaned up unexpected_intr() - * OOOPS! fixed null pointer problem in ide reset code - * added autodetect for Triton chipset -- no effect yet - * Version 5.05 OOOPS! fixed bug in revalidate_disk() - * OOOPS! fixed bug in ide_do_request() - * added ATAPI reset sequence for cdroms - * Version 5.10 added Bus-Mastered DMA support for Triton Chipset - * some (mostly) cosmetic changes - * Version 5.11 added ht6560b support by malafoss@snakemail.hut.fi - * reworked PCI scanning code - * added automatic RZ1000 detection/support - * added automatic PCI CMD640 detection/support - * added option for VLB CMD640 support - * tweaked probe to find cdrom on hdb with disks on hda,hdc - * Version 5.12 some performance tuning - * added message to alert user to bad /dev/hd[cd] entries - * OOOPS! fixed bug in atapi reset - * driver now forces "serialize" again for all cmd640 chips - * noticed REALLY_SLOW_IO had no effect, moved it to ide.c - * made do_drive_cmd() into public ide_do_drive_cmd() - * Version 5.13 fixed typo ('B'), thanks to houston@boyd.geog.mcgill.ca - * fixed ht6560b support - * Version 5.13b (sss) fix problem in calling ide_cdrom_setup() - * don't bother invalidating nonexistent partitions - * Version 5.14 fixes to cmd640 support.. maybe it works now(?) - * added & tested full EZ-DRIVE support -- don't use LILO! - * don't enable 2nd CMD640 PCI port during init - conflict - * Version 5.15 bug fix in init_cmd640_vlb() - * bug fix in interrupt sharing code - * Version 5.16 ugh.. fix "serialize" support, broken in 5.15 - * remove "Huh?" from cmd640 code - * added qd6580 interface speed select from Colten Edwards - * Version 5.17 kludge around bug in BIOS32 on Intel triton motherboards - * Version 5.18 new CMD640 code, moved to cmd640.c, #include'd for now - * new UMC8672 code, moved to umc8672.c, #include'd for now - * disallow turning on DMA when h/w not capable of DMA - * Version 5.19 fix potential infinite timeout on resets - * extend reset poll into a general purpose polling scheme - * add atapi tape drive support from Gadi Oxman - * simplify exit from _intr routines -- no IDE_DO_REQUEST - * Version 5.20 leave current rq on blkdev request list during I/O - * generalized ide_do_drive_cmd() for tape/cdrom driver use - * Version 5.21 fix nasty cdrom/tape bug (ide_preempt was messed up) - * Version 5.22 fix ide_xlate_1024() to work with/without drive->id - * Version 5.23 miscellaneous touch-ups - * Version 5.24 fix #if's for SUPPORT_CMD640 - * Version 5.25 more touch-ups, fix cdrom resets, ... - * cmd640.c now configs/compiles separate from ide.c - * Version 5.26 keep_settings now maintains the using_dma flag - * fix [EZD] remap message to only output at boot time - * fix "bad /dev/ entry" message to say hdc, not hdc0 - * fix ide_xlate_1024() to respect user specified CHS - * use CHS from partn table if it looks translated - * re-merged flags chipset,vlb_32bit,vlb_sync into io_32bit - * keep track of interface chipset type, when known - * add generic PIO mode "tuneproc" mechanism - * fix cmd640_vlb option - * fix ht6560b support (was completely broken) - * umc8672.c now configures/compiles separate from ide.c - * move dtc2278 support to dtc2278.c - * move ht6560b support to ht6560b.c - * move qd6580 support to qd6580.c - * add ali14xx support in ali14xx.c - * Version 5.27 add [no]autotune parameters to help cmd640 - * move rz1000 support to rz1000.c - * Version 5.28 #include "ide_modes.h" - * fix disallow_unmask: now per-interface "no_unmask" bit - * force io_32bit to be the same on drive pairs of dtc2278 - * improved IDE tape error handling, and tape DMA support - * bugfix in ide_do_drive_cmd() for cdroms + serialize - * Version 5.29 fixed non-IDE check for too many physical heads - * don't use LBA if capacity is smaller than CHS - * Version 5.30 remove real_devices kludge, formerly used by genhd.c - * Version 5.32 change "KB" to "kB" - * fix serialize (was broken in kernel 1.3.72) - * add support for "hdparm -I" - * use common code for disk/tape/cdrom IDE_DRIVE_CMDs - * add support for Promise DC4030VL caching card - * improved serialize support - * put partition check back into alphabetical order - * add config option for PCMCIA baggage - * try to make PCMCIA support safer to use - * improve security on ioctls(): all are suser() only - * Version 5.33 improve handling of HDIO_DRIVE_CMDs that read data - * Version 5.34 fix irq-sharing problem from 5.33 - * fix cdrom ioctl problem from 5.33 - * Version 5.35 cosmetic changes - * fix cli() problem in try_to_identify() - * Version 5.36 fixes to optional PCMCIA support - * Version 5.37 don't use DMA when "noautotune" is specified - * Version 5.37a (go) fix shared irq probing (was broken in kernel 1.3.72) - * call unplug_device() from ide_do_drive_cmd() - * Version 5.38 add "hdx=none" option, courtesy of Joel Maslak - * mask drive irq after use, if sharing with another hwif - * add code to help debug weird cmd640 problems - * Version 5.39 fix horrible error in earlier irq sharing "fix" - * Version 5.40 fix serialization -- was broken in 5.39 - * help sharing by masking device irq after probing - * Version 5.41 more fixes to irq sharing/serialize detection - * disable io_32bit by default on drive reset - * Version 5.42 simplify irq-masking after probe - * fix NULL pointer deref in save_match() - * Version 5.43 Ugh.. unexpected_intr is back: try to exterminate it - * Version 5.44 Fix for "irq probe failed" on cmd640 - * change path on message regarding MAKEDEV.ide - * add a throttle to the unexpected_intr() messages - * Version 5.45 fix ugly parameter parsing bugs (thanks Derek) - * include Gadi's magic fix for cmd640 unexpected_intr - * include mc68000 patches from Geert Uytterhoeven - * add Gadi's fix for PCMCIA cdroms - * Version 5.46 remove the mc68000 #ifdefs for 2.0.x - * Version 5.47 fix set_tune race condition - * fix bug in earlier PCMCIA cdrom update - * Version 5.48 if def'd, invoke CMD640_DUMP_REGS when irq probe fails - * lengthen the do_reset1() pulse, for laptops - * add idebus=xx parameter for cmd640 and ali chipsets - * no_unmask flag now per-drive instead of per-hwif - * fix tune_req so that it gets done immediately - * fix missing restore_flags() in ide_ioctl - * prevent use of io_32bit on cmd640 with no prefetch - * Version 5.49 fix minor quirks in probing routines * Version 5.50 allow values as small as 20 for idebus= * Version 5.51 force non io_32bit in drive_cmd_intr() * change delay_10ms() to delay_50ms() to fix problems @@ -281,6 +90,11 @@ * Version 6.02 fix ide_ack_intr() call * check partition table on floppies * Version 6.03 handle bad status bit sequencing in ide_wait_stat() + * Version 6.10 deleted old entries from this list of updates + * replaced triton.c with ide-dma.c generic PCI DMA + * added support for BIOS-enabled UltraDMA + * rename all "promise" things to "pdc4030" + * fix EZ-DRIVE handling on small disks * * Some additional driver compile-time options are in ide.h * @@ -867,14 +681,14 @@ #if FANCY_STATUS_DUMPS if (drive->media == ide_disk) { printk(" { "); - if (err & BBD_ERR) printk("BadSector "); + if (err & ABRT_ERR) printk("DriveStatusError "); + if (err & ICRC_ERR) printk((err & ABRT_ERR) ? "BadCRC " : "BadSector "); if (err & ECC_ERR) printk("UncorrectableError "); if (err & ID_ERR) printk("SectorIdNotFound "); - if (err & ABRT_ERR) printk("DriveStatusError "); if (err & TRK0_ERR) printk("TrackZeroNotFound "); if (err & MARK_ERR) printk("AddrMarkNotFound "); printk("}"); - if (err & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) { + if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) { byte cur = IN_BYTE(IDE_SELECT_REG); if (cur & 0x40) { /* using LBA? */ printk(", LBAsect=%ld", (unsigned long) @@ -922,7 +736,7 @@ } /* - * ide_error() takes action based on the error returned by the controller. + * ide_error() takes action based on the error returned by the drive. */ void ide_error (ide_drive_t *drive, const char *msg, byte stat) { @@ -943,7 +757,12 @@ } else { if (drive->media == ide_disk && (stat & ERR_STAT)) { /* err has different meaning on cdrom and tape */ - if (err & (BBD_ERR | ECC_ERR)) /* retries won't help these */ + if (err == ABRT_ERR) { + if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY) + return; /* some newer drives don't support WIN_SPECIFY */ + } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) + ; /* UDMA crc error -- just retry the operation */ + else if (err & (BBD_ERR | ECC_ERR)) /* retries won't help these */ rq->errors = ERROR_MAX; else if (err & TRK0_ERR) /* help it find track zero */ rq->errors |= ERROR_RECAL; @@ -1545,7 +1364,7 @@ struct request *cur_rq; struct semaphore sem = MUTEX_LOCKED; - if (IS_PROMISE_DRIVE && rq->buffer != NULL) + if (IS_PDC4030_DRIVE && rq->buffer != NULL) return -ENOSYS; /* special drive cmds not supported */ rq->errors = 0; rq->rq_status = RQ_ACTIVE; @@ -1931,8 +1750,13 @@ return -EINVAL; if (drive->id == NULL) return -ENOMSG; +#if 0 if (copy_to_user((char *)arg, (char *)drive->id, sizeof(*drive->id))) return -EFAULT; +#else + if (copy_to_user((char *)arg, (char *)drive->id, 142)) + return -EFAULT; +#endif return 0; case HDIO_GET_NOWERR: @@ -2379,13 +2203,14 @@ case -12: /* "reset" */ hwif->reset = 1; goto done; -#ifdef CONFIG_BLK_DEV_PROMISE +#ifdef CONFIG_BLK_DEV_PDC4030 case -11: /* "dc4030" */ { - setup_dc4030(hwif); + extern void setup_pdc4030(ide_hwif_t *); + setup_pdc4030(hwif); goto done; } -#endif /* CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ #ifdef CONFIG_BLK_DEV_ALI14XX case -10: /* "ali14xx" */ { @@ -2510,6 +2335,9 @@ printk("%s ", msg); + if (xparm == -1 && drive->bios_cyl < 1024) + return 0; /* small disk: no translation needed */ + if (drive->id) { drive->cyl = drive->id->cyls; drive->head = drive->id->heads; @@ -2550,32 +2378,6 @@ return 1; } -#ifdef CONFIG_PCI -#if defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) || defined(CONFIG_BLK_DEV_OPTI621) - -typedef void (ide_pci_init_proc_t)(byte, byte); - -/* - * ide_probe_pci() scans PCI for a specific vendor/device function, - * and invokes the supplied init routine for each instance detected. - */ -__initfunc(static void ide_probe_pci (unsigned short vendor, unsigned short device, ide_pci_init_proc_t *init, int func_adj)) -{ - unsigned long flags; - unsigned index; - byte fn, bus; - - save_flags(flags); - cli(); - for (index = 0; !pcibios_find_device (vendor, device, index, &bus, &fn); ++index) { - init (bus, fn + func_adj); - } - restore_flags(flags); -} - -#endif /* defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) || defined(CONFIG_BLK_DEV_OPTI621) */ -#endif /* CONFIG_PCI */ - /* * probe_for_hwifs() finds/initializes "known" IDE interfaces * @@ -2588,50 +2390,43 @@ /* * Find/initialize PCI IDE interfaces */ - if (pcibios_present()) { + if (pcibios_present()) + { +#ifdef CONFIG_BLK_DEV_IDEDMA + { + extern void ide_scan_pcibus(void); + ide_scan_pcibus(); + } +#endif #ifdef CONFIG_BLK_DEV_RZ1000 - ide_pci_init_proc_t init_rz1000; - ide_probe_pci (PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1000, &init_rz1000, 0); - ide_probe_pci (PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1001, &init_rz1000, 0); -#endif /* CONFIG_BLK_DEV_RZ1000 */ -#ifdef CONFIG_BLK_DEV_TRITON - /* - * Apparently the BIOS32 services on Intel motherboards are - * buggy and won't find the PCI_DEVICE_ID_INTEL_82371_1 for us. - * So instead, we search for PCI_DEVICE_ID_INTEL_82371_0, - * and then add 1. - */ - ide_probe_pci (PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371_0, &ide_init_triton, 1); - ide_probe_pci (PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_1, &ide_init_triton, 0); - ide_probe_pci (PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB, &ide_init_triton, 0); - ide_probe_pci (PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, &ide_init_triton, 0); -#endif /* CONFIG_BLK_DEV_TRITON */ -#ifdef CONFIG_BLK_DEV_OPTI621 - ide_probe_pci (PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C621, &ide_init_opti621, 0); -#endif /* CONFIG_BLK_DEV_OPTI621 */ + { + extern void ide_probe_for_rz100x(void); + ide_probe_for_rz100x(); + } +#endif } #endif /* CONFIG_PCI */ #ifdef CONFIG_BLK_DEV_CMD640 { - extern void ide_probe_for_cmd640x (void); + extern void ide_probe_for_cmd640x(void); ide_probe_for_cmd640x(); } #endif -#ifdef CONFIG_BLK_DEV_PROMISE - init_dc4030(); +#ifdef CONFIG_BLK_DEV_PDC4030 + { + extern int init_pdc4030(void); + (void) init_pdc4030(); + } #endif } __initfunc(void ide_init_builtin_drivers (void)) { /* - * Probe for special "known" interface chipsets + * Probe for special PCI and other "known" interface chipsets */ probe_for_hwifs (); - /* - * Probe for devices - */ #ifdef CONFIG_BLK_DEV_IDE #ifdef __mc68000__ if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { diff -u --recursive --new-file v2.1.67/linux/drivers/block/ide.h linux/drivers/block/ide.h --- v2.1.67/linux/drivers/block/ide.h Tue Sep 23 16:48:47 1997 +++ linux/drivers/block/ide.h Sun Nov 30 14:10:52 1997 @@ -3,7 +3,7 @@ /* * linux/drivers/block/ide.h * - * Copyright (C) 1994-1996 Linus Torvalds & authors + * Copyright (C) 1994-1998 Linus Torvalds & authors */ #include @@ -164,7 +164,7 @@ #define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ #define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ -#if defined(CONFIG_BLK_DEV_HT6560B) || defined(CONFIG_BLK_DEV_PROMISE) +#if defined(CONFIG_BLK_DEV_HT6560B) || defined(CONFIG_BLK_DEV_PDC4030) #define SELECT_DRIVE(hwif,drive) \ { \ if (hwif->selectproc) \ @@ -174,7 +174,7 @@ } #else #define SELECT_DRIVE(hwif,drive) OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); -#endif /* CONFIG_BLK_DEV_HT6560B || CONFIG_BLK_DEV_PROMISE */ +#endif /* CONFIG_BLK_DEV_HT6560B || CONFIG_BLK_DEV_PDC4030 */ /* * Now for the data we need to maintain per-drive: ide_drive_t @@ -244,6 +244,7 @@ byte bios_sect; /* BIOS/fdisk/LILO sectors per track */ unsigned short bios_cyl; /* BIOS/fdisk/LILO number of cyls */ unsigned short cyl; /* "real" number of cyls */ + unsigned int timing_data; /* for use by tuneproc()'s */ void *hwif; /* actually (ide_hwif_t *) */ struct wait_queue *wqueue; /* used to wait for drive in open() */ struct hd_driveid *id; /* drive model identification info */ @@ -268,7 +269,7 @@ ide_dma_abort = 2, ide_dma_check = 3, ide_dma_status_bad = 4, ide_dma_transferred = 5, ide_dma_begin = 6, ide_dma_on = 7, - ide_dma_off = 8 } + ide_dma_off = 8, ide_dma_off_quietly = 9 } ide_dma_action_t; typedef int (ide_dmaproc_t)(ide_dma_action_t, ide_drive_t *); @@ -288,7 +289,7 @@ typedef void (ide_tuneproc_t)(ide_drive_t *, byte); /* - * This is used to provide HT6560B & PROMISE interface support. + * This is used to provide HT6560B & PDC4030 interface support. */ typedef void (ide_selectproc_t) (ide_drive_t *); @@ -296,10 +297,10 @@ * hwif_chipset_t is used to keep track of the specific hardware * chipset used by each IDE interface, if known. */ -typedef enum { ide_unknown, ide_generic, ide_triton, +typedef enum { ide_unknown, ide_generic, ide_pci, ide_cmd640, ide_dtc2278, ide_ali14xx, ide_qd6580, ide_umc8672, ide_ht6560b, - ide_promise, ide_via } + ide_pdc4030, ide_rz1000 } hwif_chipset_t; typedef struct hwif_s { @@ -309,7 +310,7 @@ ide_drive_t drives[MAX_DRIVES]; /* drive info */ struct gendisk *gd; /* gendisk structure */ ide_tuneproc_t *tuneproc; /* routine to tune PIO mode for drives */ -#if defined(CONFIG_BLK_DEV_HT6560B) || defined(CONFIG_BLK_DEV_PROMISE) +#if defined(CONFIG_BLK_DEV_HT6560B) || defined(CONFIG_BLK_DEV_PDC4030) ide_selectproc_t *selectproc; /* tweaks hardware to select drive */ #endif ide_dmaproc_t *dmaproc; /* dma read/write/abort routine */ @@ -324,9 +325,9 @@ unsigned present : 1; /* this interface exists */ unsigned serialized : 1; /* serialized operation with mate hwif */ unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ -#ifdef CONFIG_BLK_DEV_PROMISE - unsigned is_promise2: 1; /* 2nd i/f on promise DC4030 */ -#endif /* CONFIG_BLK_DEV_PROMISE */ +#ifdef CONFIG_BLK_DEV_PDC4030 + unsigned is_pdc4030_2: 1;/* 2nd i/f on pdc4030 */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ unsigned reset : 1; /* reset after probe */ #if (DISK_RECOVERY_TIME > 0) unsigned long last_time; /* time when previous rq was done */ @@ -609,23 +610,15 @@ int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version); int ide_unregister_subdriver (ide_drive_t *drive); -#ifdef CONFIG_BLK_DEV_TRITON -void ide_init_triton (byte, byte); -#endif /* CONFIG_BLK_DEV_TRITON */ - -#ifdef CONFIG_BLK_DEV_OPTI621 -void ide_init_opti621 (byte, byte); -#endif /* CONFIG_BLK_DEV_OPTI621 */ - #ifdef CONFIG_BLK_DEV_IDE int ideprobe_init (void); #endif /* CONFIG_BLK_DEV_IDE */ -#ifdef CONFIG_BLK_DEV_PROMISE -#include "promise.h" -#define IS_PROMISE_DRIVE (HWIF(drive)->chipset == ide_promise) +#ifdef CONFIG_BLK_DEV_PDC4030 +#include "pdc4030.h" +#define IS_PDC4030_DRIVE (HWIF(drive)->chipset == ide_pdc4030) #else -#define IS_PROMISE_DRIVE (0) /* auto-NULLs out Promise code */ -#endif /* CONFIG_BLK_DEV_PROMISE */ +#define IS_PDC4030_DRIVE (0) /* auto-NULLs out pdc4030 code */ +#endif /* CONFIG_BLK_DEV_PDC4030 */ #endif /* _IDE_H */ diff -u --recursive --new-file v2.1.67/linux/drivers/block/opti621.c linux/drivers/block/opti621.c --- v2.1.67/linux/drivers/block/opti621.c Wed Nov 6 04:49:33 1996 +++ linux/drivers/block/opti621.c Sun Nov 30 13:48:47 1997 @@ -1,7 +1,7 @@ /* - * linux/drivers/block/opti621.c Version 0.1 Oct 26, 1996 + * linux/drivers/block/opti621.c Version 0.3 Nov 29, 1997 * - * Copyright (C) 1996 Linus Torvalds & author (see below) + * Copyright (C) 1996-1998 Linus Torvalds & author (see below) */ /* @@ -33,8 +33,8 @@ * PIO 3 and slave PIO 0, driver have to set some timings of * master for PIO 0. Second problem is that opti621_tune_drive * got only one drive to set, but have to set both drives. - * This is solved in opti621_compute_pios. If you don't set - * the second drive, opti621_compute_pios use ide_get_best_pio_mode + * This is solved in compute_pios. If you don't set + * the second drive, compute_pios use ide_get_best_pio_mode * for autoselect mode (you can change it to PIO 0, if you want). * If you then set the second drive to another PIO, the old value * (automatically selected) will be overrided by yours. @@ -48,7 +48,7 @@ * settings of jumpers on the card and I have to boot Linux with * Loadlin except LILO, cause I have to run the setupvic.exe program * already or I get disk errors (my test: rpm -Vf - * /usr/X11R6/bin/XF86_SVGA - or any big file). + * /usr/X11R6/bin/XF86_SVGA - or any big file). * Some numbers from hdparm -t /dev/hda: * Timing buffer-cache reads: 32 MB in 3.02 seconds =10.60 MB/sec * Timing buffered disk reads: 16 MB in 5.52 seconds = 2.90 MB/sec @@ -84,7 +84,7 @@ * address: 25 ns, data: 25 ns, recovery: 50 ns; * on 20MHz PCI bus (pulse 50 ns): * address: 50 ns, data: 50 ns, recovery: 100 ns. - */ + */ /* #define READ_PREFETCH 0 */ /* Uncommnent for disable read prefetch. @@ -103,58 +103,35 @@ #define MISC_REG 6 /* index of Miscellaneous register */ #define CNTRL_REG 3 /* index of Control register */ int reg_base; -int opti621_primary_base, opti621_secondary_base; #define PIO_NOT_EXIST 254 #define PIO_DONT_KNOW 255 -int opti621_drive_pio_modes[4]; + /* there are stored pio numbers from other calls of opti621_tune_drive */ -void opti621_compute_pios(ide_hwif_t *drv, int second_contr, int slave_drive, byte pio) -/* Store values into opti621_drive_pio_modes: +static void compute_pios(ide_drive_t *drive, byte pio) +/* Store values into drive->timing_data * second_contr - 0 for primary controller, 1 for secondary * slave_drive - 0 -> pio is for master, 1 -> pio is for slave - * pio - PIO mode for selected drive (for other we don't know) - */ + * pio - PIO mode for selected drive (for other we don't know) + */ { - ide_drive_t *p1, *p2, *drive; - int i; - - i = 2*second_contr; - p1 = &drv->drives[0]; - p2 = &drv->drives[1]; - drive = &drv->drives[slave_drive]; - pio = ide_get_best_pio_mode(drive, pio, OPTI621_MAX_PIO, NULL); - opti621_drive_pio_modes[i+slave_drive]=pio; - - if (p1->present) { - if (opti621_drive_pio_modes[i]==PIO_DONT_KNOW) - opti621_drive_pio_modes[i]=ide_get_best_pio_mode(p1, - 255, OPTI621_MAX_PIO, NULL); - /* we don't know the selected PIO mode, so we have to autoselect */ - } else - opti621_drive_pio_modes[i]=PIO_NOT_EXIST; - if (p2->present) { - if (opti621_drive_pio_modes[i+1]==PIO_DONT_KNOW) - opti621_drive_pio_modes[i+1]=ide_get_best_pio_mode(p2, - 255, OPTI621_MAX_PIO, NULL); - /* we don't know the selected PIO mode, so we have to autoselect */ - } else - opti621_drive_pio_modes[i+1]=PIO_NOT_EXIST; - /* in opti621_drive_pio_modes[i] and [i+1] are valid PIO modes (or PIO_NOT_EXIST, - if drive is not connected), we can continue */ + int d; + ide_hwif_t *hwif = HWIF(drive); + + drive->timing_data = ide_get_best_pio_mode(drive, pio, OPTI621_MAX_PIO, NULL); + for (d = 0; d < 2; ++d) { + drive = &hwif->drives[d]; + if (drive->present) { + if (drive->timing_data == PIO_DONT_KNOW) + drive->timing_data = ide_get_best_pio_mode(drive, 255, OPTI621_MAX_PIO, NULL); #ifdef OPTI621_DEBUG - printk("%s: (master): ", p1->name); - if (p1->present) - printk("PIO mode %d\n", opti621_drive_pio_modes[i]); - else - printk("not present\n"); - printk("%s: (slave): ", p2->name); - if (p2->present) - printk("PIO mode %d\n", opti621_drive_pio_modes[i+1]); - else - printk("not present\n"); + printk("%s: Selected PIO mode %d\n", drive->name, drive->timing_data); #endif + } else { + drive->timing_data = PIO_NOT_EXIST; + } + } } int cmpt_clk(int time, int bus_speed) @@ -169,7 +146,7 @@ return ((time*bus_speed+999)/1000); } -void write_reg(byte value, int reg) +static void write_reg(byte value, int reg) /* Write value to register reg, base of register * is at reg_base (0x1f0 primary, 0x170 secondary, * if not changed by PCI configuration). @@ -180,12 +157,12 @@ inw(reg_base+1); outb(3, reg_base+2); outb(value, reg_base+reg); - outb(0x83, reg_base+2); + outb(0x83, reg_base+2); } -byte read_reg(int reg) +static byte read_reg(int reg) /* Read value from register reg, base of register - * is at reg_base (0x1f0 primary, 0x170 secondary, + * is at reg_base (0x1f0 primary, 0x170 secondary, * if not changed by PCI configuration). * This is from setupvic.exe program. */ @@ -195,7 +172,7 @@ inw(reg_base+1); outb(3, reg_base+2); ret=inb(reg_base+reg); - outb(0x83, reg_base+2); + outb(0x83, reg_base+2); return ret; } @@ -205,9 +182,9 @@ int recovery_time; /* Recovery time (clocks) */ } pio_clocks_t; -void compute_clocks(int pio, pio_clocks_t *clks) +static void compute_clocks(int pio, pio_clocks_t *clks) { - if (pio!=PIO_NOT_EXIST) { + if (pio != PIO_NOT_EXIST) { int adr_setup, data_pls, bus_speed; bus_speed = ide_system_bus_speed(); adr_setup = ide_pio_timings[pio].setup_time; @@ -230,108 +207,80 @@ } } -static void opti621_tune_drive (ide_drive_t *drive, byte pio) /* Main tune procedure, hooked by tuneproc. */ +static void opti621_tune_drive (ide_drive_t *drive, byte pio) { - /* primary and secondary drives share some (but not same) registers, - so we have to program both drives */ + /* primary and secondary drives share some registers, + * so we have to program both drives + */ unsigned long flags; byte pio1, pio2; - int second_contr, slave_drive; pio_clocks_t first, second; int ax, drdy; byte cycle1, cycle2, misc; - - second_contr=HWIF(drive)->index; - if ((second_contr!=0) && (second_contr!=1)) - return; /* invalid controller number */ - if (((second_contr==0) && (opti621_primary_base==0)) || - ((second_contr==1) && (opti621_secondary_base==0))) - return; /* controller is unaccessible/not exist */ - slave_drive = drive->select.b.unit; - /* set opti621_drive_pio_modes[] */ - opti621_compute_pios(HWIF(drive), second_contr, slave_drive, pio); - - reg_base = second_contr ? opti621_primary_base : opti621_secondary_base; - - pio1 = opti621_drive_pio_modes[second_contr*2]; - pio2 = opti621_drive_pio_modes[second_contr*2+1]; - + ide_hwif_t *hwif = HWIF(drive); + + /* set drive->timing_data for both drives */ + compute_pios(drive, pio); + pio1 = hwif->drives[0].timing_data; + pio2 = hwif->drives[1].timing_data; + compute_clocks(pio1, &first); compute_clocks(pio2, &second); - - ax = (first.address_timename, ax, first.data_time, first.recovery_time, drdy); + hwif->name, ax, first.data_time, first.recovery_time, drdy); printk("%s: slave: address: %d, data: %d, recovery: %d, drdy: %d [clk]\n", - HWIF(drive)->name, ax, second.data_time, second.recovery_time, drdy); + hwif->name, ax, second.data_time, second.recovery_time, drdy); #endif save_flags(flags); cli(); - + + reg_base = hwif->io_ports[IDE_DATA_OFFSET]; outb(0xc0, reg_base+CNTRL_REG); /* allow Register-B */ outb(0xff, reg_base+5); /* hmm, setupvic.exe does this ;-) */ inb(reg_base+CNTRL_REG); /* if reads 0xff, adapter not exist? */ read_reg(CNTRL_REG); /* if reads 0xc0, no interface exist? */ read_reg(5); /* read version, probably 0 */ - - /* programming primary drive - 0 or 2 */ - write_reg(0, MISC_REG); /* select Index-0 for Register-A */ + + /* program primary drive */ + write_reg(0, MISC_REG); /* select Index-0 for Register-A */ write_reg(cycle1, READ_REG); /* set read cycle timings */ write_reg(cycle1, WRITE_REG); /* set write cycle timings */ - /* programming secondary drive - 1 or 3 */ - write_reg(1, MISC_REG); /* select Index-1 for Register-B */ - write_reg(cycle2, READ_REG); /* set read cycle timings */ - write_reg(cycle2, WRITE_REG); /* set write cycle timings */ - - write_reg(0x85, CNTRL_REG); /* use Register-A for drive 0 (or 2) and - Register-B for drive 1 (or 3) */ - - write_reg(misc, MISC_REG); /* set address setup, DRDY timings - and read prefetch for both drives */ - + /* program secondary drive */ + write_reg(1, MISC_REG); /* select Index-1 for Register-B */ + write_reg(cycle2, READ_REG); /* set read cycle timings */ + write_reg(cycle2, WRITE_REG); /* set write cycle timings */ + + write_reg(0x85, CNTRL_REG); /* use Register-A for drive 0 */ + /* use Register-B for drive 1 */ + + write_reg(misc, MISC_REG); /* set address setup, DRDY timings, */ + /* and read prefetch for both drives */ + restore_flags(flags); } -void ide_init_opti621 (byte bus, byte fn) -/* Init controller. Called on kernel boot. */ +/* + * ide_init_opti621() is Called from idedma.c once for each hwif found at boot. + */ +void ide_init_opti621 (byte bus, byte fn, ide_hwif_t *hwifs) { - int rc, i; - unsigned char sreg; - unsigned short reg; - unsigned int dreg; - unsigned char revision; - for (i=0; i<4; i++) - opti621_drive_pio_modes[i] = PIO_DONT_KNOW; - printk("ide: OPTi 82C621 on PCI bus %d function %d\n", bus, fn); - if ((rc = pcibios_read_config_byte (bus, fn, 0x08, &sreg))) - goto quit; - revision = sreg; - if ((rc = pcibios_read_config_dword (bus, fn, 0x10, &dreg))) - goto quit; - opti621_primary_base = ((dreg==0) || (dreg>0xffff)) ? 0 : dreg-1; - if ((rc = pcibios_read_config_dword (bus, fn, 0x18, &dreg))) - goto quit; - opti621_secondary_base = ((dreg==0) || (dreg>0xffff)) ? 0 : dreg-1; - printk("ide: revision %d, primary: 0x%04x, secondary: 0x%04x\n", - revision, opti621_primary_base, opti621_secondary_base); - if ((rc = pcibios_read_config_word (bus, fn, PCI_COMMAND, ®))) - goto quit; - if (!(reg & 1)) { - printk("ide: ports are not enabled (BIOS)\n"); - } else { - ide_hwifs[0].tuneproc = &opti621_tune_drive; - ide_hwifs[1].tuneproc = &opti621_tune_drive; - } - quit: if (rc) printk("ide: pcibios access failed - %s\n", pcibios_strerror(rc)); + if (hwif->io_ports[IDE_DATA_OFFSET]) { + hwif->drives[0].timing_data = PIO_DONT_KNOW; + hwif->drives[1].timing_data = PIO_DONT_KNOW; + hwif->tuneproc = &opti621_tune_drive; + } } diff -u --recursive --new-file v2.1.67/linux/drivers/block/pdc4030.c linux/drivers/block/pdc4030.c --- v2.1.67/linux/drivers/block/pdc4030.c Wed Dec 31 16:00:00 1969 +++ linux/drivers/block/pdc4030.c Sun Nov 30 13:48:47 1997 @@ -0,0 +1,363 @@ +/* -*- linux-c -*- + * linux/drivers/block/pdc4030.c Version 0.08 Nov 30, 1997 + * + * Copyright (C) 1995-1998 Linus Torvalds & authors (see below) + */ + +/* + * Principal Author/Maintainer: peterd@pnd-pc.demon.co.uk + * + * This file provides support for the second port and cache of Promise + * IDE interfaces, e.g. DC4030, DC5030. + * + * Thanks are due to Mark Lord for advice and patiently answering stupid + * questions, and all those mugs^H^H^H^Hbrave souls who've tested this. + * + * Version 0.01 Initial version, #include'd in ide.c rather than + * compiled separately. + * Reads use Promise commands, writes as before. Drives + * on second channel are read-only. + * Version 0.02 Writes working on second channel, reads on both + * channels. Writes fail under high load. Suspect + * transfers of >127 sectors don't work. + * Version 0.03 Brought into line with ide.c version 5.27. + * Other minor changes. + * Version 0.04 Updated for ide.c version 5.30 + * Changed initialization strategy + * Version 0.05 Kernel integration. -ml + * Version 0.06 Ooops. Add hwgroup to direct call of ide_intr() -ml + * Version 0.07 Added support for DC4030 variants + * Secondary interface autodetection + * Version 0.08 Renamed to pdc4030.c + */ + +/* + * Once you've compiled it in, you'll have to also enable the interface + * setup routine from the kernel command line, as in + * + * 'linux ide0=dc4030' + * + * As before, it seems that somewhere around 3Megs when writing, bad things + * start to happen [timeouts/retries -ml]. If anyone can give me more feedback, + * I'd really appreciate it. [email: peterd@pnd-pc.demon.co.uk] + * + */ + + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ide.h" +#include "pdc4030.h" + +/* This is needed as the controller may not interrupt if the required data is +available in the cache. We have to simulate an interrupt. Ugh! */ + +extern void ide_intr(int, void *dev_id, struct pt_regs*); + +/* + * promise_selectproc() is invoked by ide.c + * in preparation for access to the specified drive. + */ +static void promise_selectproc (ide_drive_t *drive) +{ + unsigned int number; + + OUT_BYTE(drive->select.all,IDE_SELECT_REG); + udelay(1); /* paranoia */ + number = ((HWIF(drive)->is_pdc4030_2)<<1) + drive->select.b.unit; + OUT_BYTE(number,IDE_FEATURE_REG); +} + +/* + * pdc4030_cmd handles the set of vendor specific commands that are initiated + * by command F0. They all have the same success/failure notification. + */ +int pdc4030_cmd(ide_drive_t *drive, byte cmd) +{ + unsigned long timeout, timer; + byte status_val; + + promise_selectproc(drive); /* redundant? */ + OUT_BYTE(0xF3,IDE_SECTOR_REG); + OUT_BYTE(cmd,IDE_SELECT_REG); + OUT_BYTE(PROMISE_EXTENDED_COMMAND,IDE_COMMAND_REG); + timeout = HZ * 10; + timeout += jiffies; + do { + if(jiffies > timeout) { + return 2; /* device timed out */ + } + /* This is out of delay_10ms() */ + /* Delays at least 10ms to give interface a chance */ + timer = jiffies + (HZ + 99)/100 + 1; + while (timer > jiffies); + status_val = IN_BYTE(IDE_SECTOR_REG); + } while (status_val != 0x50 && status_val != 0x70); + + if(status_val == 0x50) + return 0; /* device returned success */ + else + return 1; /* device returned failure */ +} + +ide_hwif_t *hwif_required = NULL; + +void setup_pdc4030 (ide_hwif_t *hwif) +{ + hwif_required = hwif; +} + +/* +init_pdc4030: Test for presence of a Promise caching controller card. +Returns: 0 if no Promise card present at this io_base + 1 if Promise card found +*/ +int init_pdc4030 (void) +{ + ide_hwif_t *hwif = hwif_required; + ide_drive_t *drive; + ide_hwif_t *second_hwif; + struct dc_ident ident; + int i; + + if (!hwif) return 0; + + drive = &hwif->drives[0]; + second_hwif = &ide_hwifs[hwif->index+1]; + if(hwif->is_pdc4030_2) /* we've already been found ! */ + return 1; + + if(IN_BYTE(IDE_NSECTOR_REG) == 0xFF || IN_BYTE(IDE_SECTOR_REG) == 0xFF) + { + return 0; + } + OUT_BYTE(0x08,IDE_CONTROL_REG); + if(pdc4030_cmd(drive,PROMISE_GET_CONFIG)) { + return 0; + } + if(ide_wait_stat(drive,DATA_READY,BAD_W_STAT,WAIT_DRQ)) { + printk("%s: Failed Promise read config!\n",hwif->name); + return 0; + } + ide_input_data(drive,&ident,SECTOR_WORDS); + if(ident.id[1] != 'P' || ident.id[0] != 'T') { + return 0; + } + printk("%s: Promise caching controller, ",hwif->name); + switch(ident.type) { + case 0x43: printk("DC4030VL-2, "); break; + case 0x41: printk("DC4030VL-1, "); break; + case 0x40: printk("DC4030VL, "); break; + default: printk("unknown - type 0x%02x - please report!\n" + ,ident.type); + return 0; + } + printk("%dKB cache, ",(int)ident.cache_mem); + switch(ident.irq) { + case 0x00: hwif->irq = 14; break; + case 0x01: hwif->irq = 12; break; + default: hwif->irq = 15; break; + } + printk("on IRQ %d\n",hwif->irq); + hwif->chipset = second_hwif->chipset = ide_pdc4030; + hwif->selectproc = second_hwif->selectproc = &promise_selectproc; +/* Shift the remaining interfaces down by one */ + for (i=MAX_HWIFS-1 ; i > hwif->index+1 ; i--) { + ide_hwif_t *h = &ide_hwifs[i]; + + printk("Shifting i/f %d values to i/f %d\n",i-1,i); + ide_init_hwif_ports(h->io_ports, (h-1)->io_ports[IDE_DATA_OFFSET], NULL); + h->io_ports[IDE_CONTROL_OFFSET] = (h-1)->io_ports[IDE_CONTROL_OFFSET]; + h->noprobe = (h-1)->noprobe; + } + second_hwif->is_pdc4030_2 = 1; + ide_init_hwif_ports(second_hwif->io_ports, hwif->io_ports[IDE_DATA_OFFSET], NULL); + second_hwif->io_ports[IDE_CONTROL_OFFSET] = hwif->io_ports[IDE_CONTROL_OFFSET]; + second_hwif->irq = hwif->irq; + for (i=0; i<2 ; i++) { + hwif->drives[i].io_32bit = 3; + second_hwif->drives[i].io_32bit = 3; + if(!ident.current_tm[i+2].cyl) second_hwif->drives[i].noprobe=1; + } + return 1; +} + +/* + * promise_read_intr() is the handler for disk read/multread interrupts + */ +static void promise_read_intr (ide_drive_t *drive) +{ + byte stat; + int i; + unsigned int sectors_left, sectors_avail, nsect; + struct request *rq; + + if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { + ide_error(drive, "promise_read_intr", stat); + return; + } + +read_again: + do { + sectors_left = IN_BYTE(IDE_NSECTOR_REG); + IN_BYTE(IDE_SECTOR_REG); + } while (IN_BYTE(IDE_NSECTOR_REG) != sectors_left); + rq = HWGROUP(drive)->rq; + sectors_avail = rq->nr_sectors - sectors_left; + +read_next: + rq = HWGROUP(drive)->rq; + if ((nsect = rq->current_nr_sectors) > sectors_avail) + nsect = sectors_avail; + sectors_avail -= nsect; + ide_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); +#ifdef DEBUG + printk("%s: promise_read: sectors(%ld-%ld), buffer=0x%08lx, " + "remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, + (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); +#endif + rq->sector += nsect; + rq->buffer += nsect<<9; + rq->errors = 0; + i = (rq->nr_sectors -= nsect); + if ((rq->current_nr_sectors -= nsect) <= 0) + ide_end_request(1, HWGROUP(drive)); + if (i > 0) { + if (sectors_avail) + goto read_next; + stat = GET_STAT(); + if(stat & DRQ_STAT) + goto read_again; + if(stat & BUSY_STAT) { + ide_set_handler (drive, &promise_read_intr, WAIT_CMD); + return; + } + printk("Ah! promise read intr: sectors left !DRQ !BUSY\n"); + ide_error(drive, "promise read intr", stat); + } +} + +/* + * promise_write_pollfunc() is the handler for disk write completion polling. + */ +static void promise_write_pollfunc (ide_drive_t *drive) +{ + int i; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq; + + if (IN_BYTE(IDE_NSECTOR_REG) != 0) { + if (jiffies < hwgroup->poll_timeout) { + ide_set_handler (drive, &promise_write_pollfunc, 1); + return; /* continue polling... */ + } + printk("%s: write timed-out!\n",drive->name); + ide_error (drive, "write timeout", GET_STAT()); + return; + } + + ide_multwrite(drive, 4); + rq = hwgroup->rq; + for (i = rq->nr_sectors; i > 0;) { + i -= rq->current_nr_sectors; + ide_end_request(1, hwgroup); + } + return; +} + +/* + * promise_write() transfers a block of one or more sectors of data to a + * drive as part of a disk write operation. All but 4 sectors are transfered + * in the first attempt, then the interface is polled (nicely!) for completion + * before the final 4 sectors are transfered. Don't ask me why, but this is + * how it's done in the drivers for other O/Ses. There is no interrupt + * generated on writes, which is why we have to do it like this. + */ +static void promise_write (ide_drive_t *drive) +{ + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq = &hwgroup->wrq; + int i; + + if (rq->nr_sectors > 4) { + ide_multwrite(drive, rq->nr_sectors - 4); + hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; + ide_set_handler (drive, &promise_write_pollfunc, 1); + return; + } else { + ide_multwrite(drive, rq->nr_sectors); + rq = hwgroup->rq; + for (i = rq->nr_sectors; i > 0;) { + i -= rq->current_nr_sectors; + ide_end_request(1, hwgroup); + } + } +} + +/* + * do_pdc4030_io() is called from do_rw_disk, having had the block number + * already set up. It issues a READ or WRITE command to the Promise + * controller, assuming LBA has been used to set up the block number. + */ +void do_pdc4030_io (ide_drive_t *drive, struct request *rq) +{ + unsigned long timeout; + byte stat; + + if (rq->cmd == READ) { + ide_set_handler(drive, &promise_read_intr, WAIT_CMD); + OUT_BYTE(PROMISE_READ, IDE_COMMAND_REG); +/* The card's behaviour is odd at this point. If the data is + available, DRQ will be true, and no interrupt will be + generated by the card. If this is the case, we need to simulate + an interrupt. Ugh! Otherwise, if an interrupt will occur, bit0 + of the SELECT register will be high, so we can just return and + be interrupted.*/ + timeout = jiffies + HZ/20; /* 50ms wait */ + do { + stat=GET_STAT(); + if(stat & DRQ_STAT) { +/* unsigned long flags; + save_flags(flags); + cli(); + disable_irq(HWIF(drive)->irq); +*/ + ide_intr(HWIF(drive)->irq,HWGROUP(drive),NULL); +/* enable_irq(HWIF(drive)->irq); + restore_flags(flags); +*/ + return; + } + if(IN_BYTE(IDE_SELECT_REG) & 0x01) + return; + udelay(1); + } while (jiffies < timeout); + printk("%s: reading: No DRQ and not waiting - Odd!\n", + drive->name); + return; + } + if (rq->cmd == WRITE) { + OUT_BYTE(PROMISE_WRITE, IDE_COMMAND_REG); + if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk("%s: no DRQ after issuing PROMISE_WRITE\n", drive->name); + return; + } + if (!drive->unmask) + cli(); + HWGROUP(drive)->wrq = *rq; /* scratchpad */ + promise_write(drive); + return; + } + printk("%s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); +} diff -u --recursive --new-file v2.1.67/linux/drivers/block/pdc4030.h linux/drivers/block/pdc4030.h --- v2.1.67/linux/drivers/block/pdc4030.h Wed Dec 31 16:00:00 1969 +++ linux/drivers/block/pdc4030.h Sun Nov 30 13:48:47 1997 @@ -0,0 +1,44 @@ +/* + * linux/drivers/block/pdc4030.h + * + * Copyright (C) 1995-1998 Linus Torvalds & authors + */ + +/* + * Principal author: Peter Denison + */ + +#ifndef IDE_PROMISE_H +#define IDE_PROMISE_H + +#define PROMISE_EXTENDED_COMMAND 0xF0 +#define PROMISE_READ 0xF2 +#define PROMISE_WRITE 0xF3 +/* Extended commands - main command code = 0xf0 */ +#define PROMISE_GET_CONFIG 0x10 +#define PROMISE_IDENTIFY 0x20 + +struct translation_mode { + u16 cyl; + u8 head; + u8 sect; +}; + +struct dc_ident { + u8 type; + u8 unknown1; + u8 hw_revision; + u8 firmware_major; + u8 firmware_minor; + u8 bios_address; + u8 irq; + u8 unknown2; + u16 cache_mem; + u16 unknown3; + u8 id[2]; + u16 info; + struct translation_mode current_tm[4]; + u8 pad[SECTOR_WORDS*4 - 32]; +}; + +#endif IDE_PROMISE_H diff -u --recursive --new-file v2.1.67/linux/drivers/block/promise.c linux/drivers/block/promise.c --- v2.1.67/linux/drivers/block/promise.c Wed Nov 6 04:49:33 1996 +++ linux/drivers/block/promise.c Wed Dec 31 16:00:00 1969 @@ -1,362 +0,0 @@ -/* -*- linux-c -*- - * linux/drivers/block/promise.c Version 0.07 Mar 26, 1996 - * - * Copyright (C) 1995-1996 Linus Torvalds & authors (see below) - */ - -/* - * Principal Author/Maintainer: peterd@pnd-pc.demon.co.uk - * - * This file provides support for the second port and cache of Promise - * IDE interfaces, e.g. DC4030, DC5030. - * - * Thanks are due to Mark Lord for advice and patiently answering stupid - * questions, and all those mugs^H^H^H^Hbrave souls who've tested this. - * - * Version 0.01 Initial version, #include'd in ide.c rather than - * compiled separately. - * Reads use Promise commands, writes as before. Drives - * on second channel are read-only. - * Version 0.02 Writes working on second channel, reads on both - * channels. Writes fail under high load. Suspect - * transfers of >127 sectors don't work. - * Version 0.03 Brought into line with ide.c version 5.27. - * Other minor changes. - * Version 0.04 Updated for ide.c version 5.30 - * Changed initialization strategy - * Version 0.05 Kernel integration. -ml - * Version 0.06 Ooops. Add hwgroup to direct call of ide_intr() -ml - * Version 0.07 Added support for DC4030 variants - * Secondary interface autodetection - */ - -/* - * Once you've compiled it in, you'll have to also enable the interface - * setup routine from the kernel command line, as in - * - * 'linux ide0=dc4030' - * - * As before, it seems that somewhere around 3Megs when writing, bad things - * start to happen [timeouts/retries -ml]. If anyone can give me more feedback, - * I'd really appreciate it. [email: peterd@pnd-pc.demon.co.uk] - * - */ - - -#undef REALLY_SLOW_IO /* most systems can safely undef this */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ide.h" -#include "promise.h" - -/* This is needed as the controller may not interrupt if the required data is -available in the cache. We have to simulate an interrupt. Ugh! */ - -extern void ide_intr(int, void *dev_id, struct pt_regs*); - -/* - * promise_selectproc() is invoked by ide.c - * in preparation for access to the specified drive. - */ -static void promise_selectproc (ide_drive_t *drive) -{ - unsigned int number; - - OUT_BYTE(drive->select.all,IDE_SELECT_REG); - udelay(1); /* paranoia */ - number = ((HWIF(drive)->is_promise2)<<1) + drive->select.b.unit; - OUT_BYTE(number,IDE_FEATURE_REG); -} - -/* - * promise_cmd handles the set of vendor specific commands that are initiated - * by command F0. They all have the same success/failure notification. - */ -int promise_cmd(ide_drive_t *drive, byte cmd) -{ - unsigned long timeout, timer; - byte status_val; - - promise_selectproc(drive); /* redundant? */ - OUT_BYTE(0xF3,IDE_SECTOR_REG); - OUT_BYTE(cmd,IDE_SELECT_REG); - OUT_BYTE(PROMISE_EXTENDED_COMMAND,IDE_COMMAND_REG); - timeout = HZ * 10; - timeout += jiffies; - do { - if(jiffies > timeout) { - return 2; /* device timed out */ - } - /* This is out of delay_10ms() */ - /* Delays at least 10ms to give interface a chance */ - timer = jiffies + (HZ + 99)/100 + 1; - while (timer > jiffies); - status_val = IN_BYTE(IDE_SECTOR_REG); - } while (status_val != 0x50 && status_val != 0x70); - - if(status_val == 0x50) - return 0; /* device returned success */ - else - return 1; /* device returned failure */ -} - -ide_hwif_t *hwif_required = NULL; - -void setup_dc4030 (ide_hwif_t *hwif) -{ - hwif_required = hwif; -} - -/* -init_dc4030: Test for presence of a Promise caching controller card. -Returns: 0 if no Promise card present at this io_base - 1 if Promise card found -*/ -int init_dc4030 (void) -{ - ide_hwif_t *hwif = hwif_required; - ide_drive_t *drive; - ide_hwif_t *second_hwif; - struct dc_ident ident; - int i; - - if (!hwif) return 0; - - drive = &hwif->drives[0]; - second_hwif = &ide_hwifs[hwif->index+1]; - if(hwif->is_promise2) /* we've already been found ! */ - return 1; - - if(IN_BYTE(IDE_NSECTOR_REG) == 0xFF || IN_BYTE(IDE_SECTOR_REG) == 0xFF) - { - return 0; - } - OUT_BYTE(0x08,IDE_CONTROL_REG); - if(promise_cmd(drive,PROMISE_GET_CONFIG)) { - return 0; - } - if(ide_wait_stat(drive,DATA_READY,BAD_W_STAT,WAIT_DRQ)) { - printk("%s: Failed Promise read config!\n",hwif->name); - return 0; - } - ide_input_data(drive,&ident,SECTOR_WORDS); - if(ident.id[1] != 'P' || ident.id[0] != 'T') { - return 0; - } - printk("%s: Promise caching controller, ",hwif->name); - switch(ident.type) { - case 0x43: printk("DC4030VL-2, "); break; - case 0x41: printk("DC4030VL-1, "); break; - case 0x40: printk("DC4030VL, "); break; - default: printk("unknown - type 0x%02x - please report!\n" - ,ident.type); - return 0; - } - printk("%dKB cache, ",(int)ident.cache_mem); - switch(ident.irq) { - case 0x00: hwif->irq = 14; break; - case 0x01: hwif->irq = 12; break; - default: hwif->irq = 15; break; - } - printk("on IRQ %d\n",hwif->irq); - hwif->chipset = second_hwif->chipset = ide_promise; - hwif->selectproc = second_hwif->selectproc = &promise_selectproc; -/* Shift the remaining interfaces down by one */ - for (i=MAX_HWIFS-1 ; i > hwif->index+1 ; i--) { - ide_hwif_t *h = &ide_hwifs[i]; - - printk("Shifting i/f %d values to i/f %d\n",i-1,i); - ide_init_hwif_ports(h->io_ports, (h-1)->io_ports[IDE_DATA_OFFSET], NULL); - h->io_ports[IDE_CONTROL_OFFSET] = (h-1)->io_ports[IDE_CONTROL_OFFSET]; - h->noprobe = (h-1)->noprobe; - } - second_hwif->is_promise2 = 1; - ide_init_hwif_ports(second_hwif->io_ports, hwif->io_ports[IDE_DATA_OFFSET], NULL); - second_hwif->io_ports[IDE_CONTROL_OFFSET] = hwif->io_ports[IDE_CONTROL_OFFSET]; - second_hwif->irq = hwif->irq; - for (i=0; i<2 ; i++) { - hwif->drives[i].io_32bit = 3; - second_hwif->drives[i].io_32bit = 3; - if(!ident.current_tm[i+2].cyl) second_hwif->drives[i].noprobe=1; - } - return 1; -} - -/* - * promise_read_intr() is the handler for disk read/multread interrupts - */ -static void promise_read_intr (ide_drive_t *drive) -{ - byte stat; - int i; - unsigned int sectors_left, sectors_avail, nsect; - struct request *rq; - - if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { - ide_error(drive, "promise_read_intr", stat); - return; - } - -read_again: - do { - sectors_left = IN_BYTE(IDE_NSECTOR_REG); - IN_BYTE(IDE_SECTOR_REG); - } while (IN_BYTE(IDE_NSECTOR_REG) != sectors_left); - rq = HWGROUP(drive)->rq; - sectors_avail = rq->nr_sectors - sectors_left; - -read_next: - rq = HWGROUP(drive)->rq; - if ((nsect = rq->current_nr_sectors) > sectors_avail) - nsect = sectors_avail; - sectors_avail -= nsect; - ide_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); -#ifdef DEBUG - printk("%s: promise_read: sectors(%ld-%ld), buffer=0x%08lx, " - "remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, - (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); -#endif - rq->sector += nsect; - rq->buffer += nsect<<9; - rq->errors = 0; - i = (rq->nr_sectors -= nsect); - if ((rq->current_nr_sectors -= nsect) <= 0) - ide_end_request(1, HWGROUP(drive)); - if (i > 0) { - if (sectors_avail) - goto read_next; - stat = GET_STAT(); - if(stat & DRQ_STAT) - goto read_again; - if(stat & BUSY_STAT) { - ide_set_handler (drive, &promise_read_intr, WAIT_CMD); - return; - } - printk("Ah! promise read intr: sectors left !DRQ !BUSY\n"); - ide_error(drive, "promise read intr", stat); - } -} - -/* - * promise_write_pollfunc() is the handler for disk write completion polling. - */ -static void promise_write_pollfunc (ide_drive_t *drive) -{ - int i; - ide_hwgroup_t *hwgroup = HWGROUP(drive); - struct request *rq; - - if (IN_BYTE(IDE_NSECTOR_REG) != 0) { - if (jiffies < hwgroup->poll_timeout) { - ide_set_handler (drive, &promise_write_pollfunc, 1); - return; /* continue polling... */ - } - printk("%s: write timed-out!\n",drive->name); - ide_error (drive, "write timeout", GET_STAT()); - return; - } - - ide_multwrite(drive, 4); - rq = hwgroup->rq; - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, hwgroup); - } - return; -} - -/* - * promise_write() transfers a block of one or more sectors of data to a - * drive as part of a disk write operation. All but 4 sectors are transfered - * in the first attempt, then the interface is polled (nicely!) for completion - * before the final 4 sectors are transfered. Don't ask me why, but this is - * how it's done in the drivers for other O/Ses. There is no interrupt - * generated on writes, which is why we have to do it like this. - */ -static void promise_write (ide_drive_t *drive) -{ - ide_hwgroup_t *hwgroup = HWGROUP(drive); - struct request *rq = &hwgroup->wrq; - int i; - - if (rq->nr_sectors > 4) { - ide_multwrite(drive, rq->nr_sectors - 4); - hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; - ide_set_handler (drive, &promise_write_pollfunc, 1); - return; - } else { - ide_multwrite(drive, rq->nr_sectors); - rq = hwgroup->rq; - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, hwgroup); - } - } -} - -/* - * do_promise_io() is called from do_rw_disk, having had the block number - * already set up. It issues a READ or WRITE command to the Promise - * controller, assuming LBA has been used to set up the block number. - */ -void do_promise_io (ide_drive_t *drive, struct request *rq) -{ - unsigned long timeout; - byte stat; - - if (rq->cmd == READ) { - ide_set_handler(drive, &promise_read_intr, WAIT_CMD); - OUT_BYTE(PROMISE_READ, IDE_COMMAND_REG); -/* The card's behaviour is odd at this point. If the data is - available, DRQ will be true, and no interrupt will be - generated by the card. If this is the case, we need to simulate - an interrupt. Ugh! Otherwise, if an interrupt will occur, bit0 - of the SELECT register will be high, so we can just return and - be interrupted.*/ - timeout = jiffies + HZ/20; /* 50ms wait */ - do { - stat=GET_STAT(); - if(stat & DRQ_STAT) { -/* unsigned long flags; - save_flags(flags); - cli(); - disable_irq(HWIF(drive)->irq); -*/ - ide_intr(HWIF(drive)->irq,HWGROUP(drive),NULL); -/* enable_irq(HWIF(drive)->irq); - restore_flags(flags); -*/ - return; - } - if(IN_BYTE(IDE_SELECT_REG) & 0x01) - return; - udelay(1); - } while (jiffies < timeout); - printk("%s: reading: No DRQ and not waiting - Odd!\n", - drive->name); - return; - } - if (rq->cmd == WRITE) { - OUT_BYTE(PROMISE_WRITE, IDE_COMMAND_REG); - if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { - printk("%s: no DRQ after issuing PROMISE_WRITE\n", drive->name); - return; - } - if (!drive->unmask) - cli(); - HWGROUP(drive)->wrq = *rq; /* scratchpad */ - promise_write(drive); - return; - } - printk("%s: bad command: %d\n", drive->name, rq->cmd); - ide_end_request(0, HWGROUP(drive)); -} diff -u --recursive --new-file v2.1.67/linux/drivers/block/promise.h linux/drivers/block/promise.h --- v2.1.67/linux/drivers/block/promise.h Sat Mar 16 03:52:15 1996 +++ linux/drivers/block/promise.h Wed Dec 31 16:00:00 1969 @@ -1,52 +0,0 @@ -/* - * linux/drivers/block/promise.h - * - * Copyright (C) 1995-6 Linus Torvalds & authors - */ - -/* - * Principal author: Peter Denison - */ - -#ifndef IDE_PROMISE_H -#define IDE_PROMISE_H - -#define PROMISE_EXTENDED_COMMAND 0xF0 -#define PROMISE_READ 0xF2 -#define PROMISE_WRITE 0xF3 -/* Extended commands - main command code = 0xf0 */ -#define PROMISE_GET_CONFIG 0x10 -#define PROMISE_IDENTIFY 0x20 - -struct translation_mode { - u16 cyl; - u8 head; - u8 sect; -}; - -struct dc_ident { - u8 type; - u8 unknown1; - u8 hw_revision; - u8 firmware_major; - u8 firmware_minor; - u8 bios_address; - u8 irq; - u8 unknown2; - u16 cache_mem; - u16 unknown3; - u8 id[2]; - u16 info; - struct translation_mode current_tm[4]; - u8 pad[SECTOR_WORDS*4 - 32]; -}; - -/* - * Routines exported to ide.c: - */ -void do_promise_io (ide_drive_t *, struct request *); -int promise_cmd(ide_drive_t *, byte); -void setup_dc4030 (ide_hwif_t *); -int init_dc4030 (void); - -#endif IDE_PROMISE_H diff -u --recursive --new-file v2.1.67/linux/drivers/block/rz1000.c linux/drivers/block/rz1000.c --- v2.1.67/linux/drivers/block/rz1000.c Sun Aug 4 22:12:25 1996 +++ linux/drivers/block/rz1000.c Sun Nov 30 13:48:47 1997 @@ -26,34 +26,42 @@ #include #include "ide.h" -static void ide_pci_access_error (int rc) +static void init_rz1000 (byte bus, byte fn, const char *name) { - printk("ide: pcibios access failed - %s\n", pcibios_strerror(rc)); + unsigned short reg, h; + + printk("%s: buggy IDE controller: ", name); + if (!pcibios_read_config_word (bus, fn, PCI_COMMAND, ®) && !(reg & 1)) { + printk("disabled (BIOS)\n"); + return; + } + if (!pcibios_read_config_word (bus, fn, 0x40, ®) + && !pcibios_write_config_word(bus, fn, 0x40, reg & 0xdfff)) + { + printk("disabled read-ahead\n"); + } else { + printk("\n"); + for (h = 0; h < MAX_HWIFS; ++h) { + ide_hwif_t *hwif = &ide_hwifs[h]; + if ((hwif->io_ports[IDE_DATA_OFFSET] == 0x1f0 || hwif->io_ports[IDE_DATA_OFFSET] == 0x170) + && (hwif->chipset == ide_unknown || hwif->chipset == ide_generic)) + { + hwif->chipset = ide_rz1000; + hwif->serialized = 1; + hwif->drives[0].no_unmask = 1; + hwif->drives[1].no_unmask = 1; + printk(" %s: serialized, disabled unmasking\n", hwif->name); + } + } + } } -void init_rz1000 (byte bus, byte fn) +void ide_probe_for_rz100x (void) { - int rc; - unsigned short reg; + byte index, bus, fn; - printk("ide0: buggy RZ1000 interface: "); - if ((rc = pcibios_read_config_word (bus, fn, PCI_COMMAND, ®))) { - ide_pci_access_error (rc); - } else if (!(reg & 1)) { - printk("not enabled\n"); - } else { - if ((rc = pcibios_read_config_word(bus, fn, 0x40, ®)) - || (rc = pcibios_write_config_word(bus, fn, 0x40, reg & 0xdfff))) - { - ide_hwifs[0].drives[0].no_unmask = 1; - ide_hwifs[0].drives[1].no_unmask = 1; - ide_hwifs[1].drives[0].no_unmask = 1; - ide_hwifs[1].drives[1].no_unmask = 1; - ide_hwifs[0].serialized = 1; - ide_hwifs[1].serialized = 1; - ide_pci_access_error (rc); - printk("serialized, disabled unmasking\n"); - } else - printk("disabled read-ahead\n"); - } + for (index = 0; !pcibios_find_device (PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1000, index, &bus, &fn); ++index) + init_rz1000 (bus, fn, "RZ1000"); + for (index = 0; !pcibios_find_device (PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1001, index, &bus, &fn); ++index) + init_rz1000 (bus, fn, "RZ1001"); } diff -u --recursive --new-file v2.1.67/linux/drivers/block/triton.c linux/drivers/block/triton.c --- v2.1.67/linux/drivers/block/triton.c Thu Jun 26 12:33:38 1997 +++ linux/drivers/block/triton.c Wed Dec 31 16:00:00 1969 @@ -1,631 +0,0 @@ -/* - * linux/drivers/block/triton.c Version 2.10 April 22, 1997 - * - * Copyright (c) 1995-1997 Mark Lord - * May be copied or modified under the terms of the GNU General Public License - */ - -/* - * This module provides support for the bus-master IDE DMA function - * of the Intel PCI Triton chipset families, which use the PIIX (i82371FB, - * for the 430 FX chipset), the PIIX3 (i82371SB for the 430 HX/VX and - * 440 chipsets), and the PIIX4 (i82371AB for the 430 TX chipset). - * - * "PIIX" stands for "PCI ISA IDE Xcellerator". - * - * Pretty much the same code could work for other IDE PCI bus-mastering chipsets. - * Look for DMA support for this someday in the not too distant future. - * - * DMA is supported for all IDE devices (disk drives, cdroms, tapes, floppies). - * - * Up to four drives may be enabled for DMA, and the PIIX* chips - * will arbitrate the PCI bus among them. Note that the PIIX/PIIX3 - * provides a single "line buffer" for the BM IDE function, so performance of - * multiple (two) drives doing DMA simultaneously will suffer somewhat, - * as they contest for that resource bottleneck. This is handled transparently - * inside the PIIX/PIIX3. The PIIX4 does not have this problem. - * - * By default, DMA support is prepared for use, but is currently enabled only - * for drives which support DMA mode2 (multi/single word), or which are - * recognized as "good" (see table below). Drives with only mode0 or mode1 - * (multi/single word) DMA should also work with this chipset/driver (eg. MC2112A) - * but are not enabled by default. Use "hdparm -i" to view modes supported - * by a given drive. - * - * The hdparm-2.4 (or later) utility can be used for manually enabling/disabling - * DMA support, but must be (re-)compiled against this kernel version or later. - * - * To enable DMA, use "hdparm -d1 /dev/hd?" on a per-drive basis after booting. - * If problems arise, ide.c will disable DMA operation after a few retries. - * This error recovery mechanism works and has been extremely well exercised. - * - * IDE drives, depending on their vintage, may support several different modes - * of DMA operation. The boot-time modes are indicated with a "*" in - * the "hdparm -i" listing, and can be changed with *knowledgeable* use of - * the "hdparm -X" feature. There is seldom a need to do this, as drives - * normally power-up with their "best" PIO/DMA modes enabled. - * - * Testing has been done with a rather extensive number of drives, - * with Quantum & Western Digital models generally outperforming the pack, - * and Fujitsu & Conner (and some Seagate which are really Conner) drives - * showing more lackluster throughput. - * - * Keep an eye on /var/adm/messages for "DMA disabled" messages. - * - * Some people have reported trouble with Intel Zappa motherboards. - * This can be fixed by upgrading the AMI BIOS to version 1.00.04.BS0, - * available from ftp://ftp.intel.com/pub/bios/10004bs0.exe - * (thanks to Glen Morrell for researching this). - * - * Thanks to "Christopher J. Reimer" for fixing the - * problem with some (all?) ACER motherboards/BIOSs. - * - * Thanks to "Benoit Poulot-Cazajous" for testing - * "TX" chipset compatibility and for providing patches for the "TX" chipset. - * - * And, yes, Intel Zappa boards really *do* use both PIIX IDE ports. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "ide.h" -#include "ide_modes.h" - -#define DISPLAY_PIIX_TIMINGS /* define this to display timings */ - -/* - * good_dma_drives() lists the model names (from "hdparm -i") - * of drives which do not support mode2 DMA but which are - * known to work fine with this interface under Linux. - */ -const char *good_dma_drives[] = {"Micropolis 2112A", - "CONNER CTMA 4000", - NULL}; - -/* - * Our Physical Region Descriptor (PRD) table should be large enough - * to handle the biggest I/O request we are likely to see. Since requests - * can have no more than 256 sectors, and since the typical blocksize is - * two sectors, we could get by with a limit of 128 entries here for the - * usual worst case. Most requests seem to include some contiguous blocks, - * further reducing the number of table entries required. - * - * The driver reverts to PIO mode for individual requests that exceed - * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling - * 100% of all crazy scenarios here is not necessary. - * - * As it turns out though, we must allocate a full 4KB page for this, - * so the two PRD tables (ide0 & ide1) will each get half of that, - * allowing each to have about 256 entries (8 bytes each) from this. - */ -#define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) - -/* - * Interface to access piix registers - */ -static unsigned int piix_key; - -#define PIIX_FLAGS_FAST_PIO 1 -#define PIIX_FLAGS_USE_IORDY 2 -#define PIIX_FLAGS_PREFETCH 4 -#define PIIX_FLAGS_FAST_DMA 8 - - -union chip_en_reg_u { - struct { - unsigned d0_flags :4; - unsigned d1_flags :4; - unsigned recovery :2; - unsigned reserved :2; - unsigned sample :2; - unsigned sidetim_enabled:1; - unsigned ports_enabled :1; - } piix_s; - struct { - unsigned sec_en :1; - unsigned pri_en :1; - unsigned reserved :14; - } via_s; -}; - -typedef union chip_en_reg_u piix_timing_t; - -typedef struct { - unsigned pri_recovery :2; - unsigned pri_sample :2; - unsigned sec_recovery :2; - unsigned sec_sample :2; -} piix_sidetim_t; - - -/* - * We currently can handle only one PIIX chip here - */ -static piix_pci_bus = 0; -static piix_pci_fn = 0; - -static int config_drive_for_dma (ide_drive_t *); - -/* - * dma_intr() is the handler for disk read/write DMA interrupts - */ -static void dma_intr (ide_drive_t *drive) -{ - byte stat, dma_stat; - int i; - struct request *rq = HWGROUP(drive)->rq; - unsigned short dma_base = HWIF(drive)->dma_base; - - dma_stat = inb(dma_base+2); /* get DMA status */ - outb(inb(dma_base)&~1, dma_base); /* stop DMA operation */ - stat = GET_STAT(); /* get drive status */ - if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { - if ((dma_stat & 7) == 4) { /* verify good DMA status */ - rq = HWGROUP(drive)->rq; - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, HWGROUP(drive)); - } - return; - } - printk("%s: bad DMA status: 0x%02x\n", drive->name, dma_stat); - } - sti(); - ide_error(drive, "dma_intr", stat); -} - -/* - * build_dmatable() prepares a dma request. - * Returns 0 if all went okay, returns 1 otherwise. - */ -static int build_dmatable (ide_drive_t *drive) -{ - struct request *rq = HWGROUP(drive)->rq; - struct buffer_head *bh = rq->bh; - unsigned long size, addr, *table = HWIF(drive)->dmatable; - unsigned int count = 0; - - do { - /* - * Determine addr and size of next buffer area. We assume that - * individual virtual buffers are always composed linearly in - * physical memory. For example, we assume that any 8kB buffer - * is always composed of two adjacent physical 4kB pages rather - * than two possibly non-adjacent physical 4kB pages. - */ - if (bh == NULL) { /* paging requests have (rq->bh == NULL) */ - addr = virt_to_bus (rq->buffer); - size = rq->nr_sectors << 9; - } else { - /* group sequential buffers into one large buffer */ - addr = virt_to_bus (bh->b_data); - size = bh->b_size; - while ((bh = bh->b_reqnext) != NULL) { - if ((addr + size) != virt_to_bus (bh->b_data)) - break; - size += bh->b_size; - } - } - - /* - * Fill in the dma table, without crossing any 64kB boundaries. - * We assume 16-bit alignment of all blocks. - */ - while (size) { - if (++count >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - return 1; /* revert to PIO for this request */ - } else { - unsigned long bcount = 0x10000 - (addr & 0xffff); - if (bcount > size) - bcount = size; - *table++ = addr; - *table++ = bcount & 0xffff; - addr += bcount; - size -= bcount; - } - } - } while (bh != NULL); - if (count) { - *--table |= 0x80000000; /* set End-Of-Table (EOT) bit */ - return 0; - } - printk("%s: empty DMA table?\n", drive->name); - return 1; /* let the PIO routines handle this weirdness */ -} - -/* - * piix_dmaproc() initiates/aborts DMA read/write operations on a drive. - * - * The caller is assumed to have selected the drive and programmed the drive's - * sector address using CHS or LBA. All that remains is to prepare for DMA - * and then issue the actual read/write DMA/PIO command to the drive. - * - * For ATAPI devices, we just prepare for DMA and return. The caller should - * then issue the packet command to the drive and call us again with - * ide_dma_begin afterwards. - * - * Returns 0 if all went well. - * Returns 1 if DMA read/write could not be started, in which case - * the caller should revert to PIO for the current request. - */ -static int piix_dmaproc (ide_dma_action_t func, ide_drive_t *drive) -{ - unsigned long dma_base = HWIF(drive)->dma_base; - unsigned int reading = (1 << 3); - piix_timing_t timing; - unsigned short reg; - byte dflags; - - switch (func) { - case ide_dma_off: - printk("%s: DMA disabled\n", drive->name); - case ide_dma_on: - drive->using_dma = (func == ide_dma_on); - reg = (HWIF(drive)->io_ports[IDE_DATA_OFFSET] == 0x170) ? 0x42 : 0x40; - if (pcibios_read_config_word(piix_pci_bus, piix_pci_fn, reg, (short *)&timing)) { - printk("%s: pcibios read failed\n", HWIF(drive)->name); - return 1; - } - dflags = drive->select.b.unit ? timing.piix_s.d1_flags : timing.piix_s.d0_flags; - if (dflags & PIIX_FLAGS_FAST_PIO) { - if (func == ide_dma_on && drive->media == ide_disk) - dflags |= PIIX_FLAGS_FAST_DMA; - else - dflags &= ~PIIX_FLAGS_FAST_DMA; - if (drive->select.b.unit == 0) - timing.piix_s.d0_flags = dflags; - else - timing.piix_s.d1_flags = dflags; - if (pcibios_write_config_word(piix_pci_bus, piix_pci_fn, reg, *(short *)&timing)) { - printk("%s: pcibios write failed\n", HWIF(drive)->name); - return 1; - } - } - return 0; - case ide_dma_abort: - outb(inb(dma_base)&~1, dma_base); /* stop DMA */ - return 0; - case ide_dma_check: - return config_drive_for_dma (drive); - case ide_dma_write: - reading = 0; - case ide_dma_read: - break; - case ide_dma_status_bad: - return ((inb(dma_base+2) & 7) != 4); /* verify good DMA status */ - case ide_dma_transferred: -#if 0 - return (number of bytes actually transferred); -#else - return (0); -#endif - case ide_dma_begin: - outb(inb(dma_base)|1, dma_base); /* begin DMA */ - return 0; - default: - printk("piix_dmaproc: unsupported func: %d\n", func); - return 1; - } - if (build_dmatable (drive)) - return 1; - outl(virt_to_bus (HWIF(drive)->dmatable), dma_base + 4); /* PRD table */ - outb(reading, dma_base); /* specify r/w */ - outb(inb(dma_base+2)|0x06, dma_base+2); /* clear status bits */ - if (drive->media != ide_disk) - return 0; - ide_set_handler(drive, &dma_intr, WAIT_CMD); /* issue cmd to drive */ - OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG); - outb(inb(dma_base)|1, dma_base); /* begin DMA */ - return 0; -} - -static int config_drive_for_dma (ide_drive_t *drive) -{ - const char **list; - - struct hd_driveid *id = drive->id; - if (id && (id->capability & 1)) { - /* Enable DMA on any drive that supports mode2 (multi/single word) DMA */ - if (id->field_valid & 2) - if ((id->dma_mword & 0x404) == 0x404 || (id->dma_1word & 0x404) == 0x404) - return piix_dmaproc(ide_dma_on, drive); - /* Consult the list of known "good" drives */ - list = good_dma_drives; - while (*list) { - if (!strcmp(*list++,id->model)) - return piix_dmaproc(ide_dma_on, drive); - } - } - return piix_dmaproc(ide_dma_off, drive); -} - -#ifdef DISPLAY_PIIX_TIMINGS -/* - * print_piix_drive_flags() displays the currently programmed options - * in the PIIX/PIIX3/PIIX4 for a given drive. - */ -static void print_piix_drive_flags (const char *unit, byte dflags) -{ - printk(" %s ", unit); - printk( "fastDMA=%s", (dflags & PIIX_FLAGS_FAST_PIO) ? "yes" : "no "); - printk(" PreFetch=%s", (dflags & PIIX_FLAGS_PREFETCH) ? "on " : "off"); - printk(" IORDY=%s", (dflags & PIIX_FLAGS_USE_IORDY) ? "on " : "off"); - printk(" fastPIO=%s\n", ((dflags & (PIIX_FLAGS_FAST_PIO|PIIX_FLAGS_FAST_DMA)) == PIIX_FLAGS_FAST_PIO) ? "on " : "off"); -} -#endif /* DISPLAY_PIIX_TIMINGS */ - -static void init_piix_dma (ide_hwif_t *hwif, unsigned short base) -{ - static unsigned long dmatable = 0; - - printk(" %s: BM-DMA at 0x%04x-0x%04x", hwif->name, base, base+7); - if (check_region(base, 8)) { - printk(" -- ERROR, PORTS ALREADY IN USE"); - } else { - request_region(base, 8, "IDE DMA"); - hwif->dma_base = base; - if (!dmatable) { - /* - * The BM-DMA uses a full 32-bits, so we can - * safely use __get_free_page() here instead - * of __get_dma_pages() -- no ISA limitations. - */ - dmatable = __get_free_page(GFP_KERNEL); - } - if (dmatable) { - hwif->dmatable = (unsigned long *) dmatable; - dmatable += (PRD_ENTRIES * PRD_BYTES); - outl(virt_to_bus(hwif->dmatable), base + 4); - hwif->dmaproc = &piix_dmaproc; - } - } - printk("\n"); -} - -/* The next two functions were stolen from cmd640.c, with - a few modifications */ - -static void put_piix_reg (unsigned short reg, long val) -{ - unsigned long flags; - - save_flags(flags); - cli(); - outl_p((reg & 0xfc) | piix_key, 0xcf8); - outl_p(val, (reg & 3) | 0xcfc); - restore_flags(flags); -} - -static long get_piix_reg (unsigned short reg) -{ - long b; - unsigned long flags; - - save_flags(flags); - cli(); - outl_p((reg & 0xfc) | piix_key, 0xcf8); - b = inl_p((reg & 3) | 0xcfc); - restore_flags(flags); - return b; -} - -/* - * Search for an (apparently) unused block of I/O space - * of "size" bytes in length. - */ -static short find_free_region (unsigned short size) -{ - unsigned short i, base = 0xe800; - for (base = 0xe800; base > 0; base -= 0x800) { - if (!check_region(base,size)) { - for (i = 0; i < size; i++) { - if (inb(base+i) != 0xff) - goto next; - } - return base; /* success */ - } - next: - } - return 0; /* failure */ -} - -/* - * ide_init_triton() prepares the IDE driver for DMA operation. - * This routine is called once, from ide.c during driver initialization, - * for each triton chipset which is found (unlikely to be more than one). - */ -void ide_init_triton (byte bus, byte fn) -{ - int rc = 0, h; - int dma_enabled = 0; - unsigned short pcicmd, devid; - unsigned int bmiba; - const char *chipset = "ide"; - piix_timing_t timings[2]; - - piix_pci_bus = bus; - piix_pci_fn = fn; - - if (pcibios_read_config_word(bus, fn, 0x02, &devid)) - goto quit; - - if (devid == PCI_DEVICE_ID_INTEL_82371AB) - chipset = "PIIX4"; - else if (devid == PCI_DEVICE_ID_INTEL_82371SB_1) - chipset = "PIIX3"; - else if (devid == PCI_DEVICE_ID_INTEL_82371_1) - chipset = "PIIX"; - else if (devid == PCI_DEVICE_ID_VIA_82C586_1) - chipset = "VP1"; - else { - printk("Unknown PCI IDE interface 0x%x\n", devid); - goto quit; - } - - printk("%s: bus-master IDE device on PCI bus %d function %d\n", chipset, bus, fn); - - /* - * See if IDE ports are enabled - */ - if ((rc = pcibios_read_config_word(bus, fn, 0x04, &pcicmd))) - goto quit; - if ((pcicmd & 1) == 0) { - printk("%s: IDE ports are not enabled (BIOS)\n", chipset); - goto quit; - } - if (devid == PCI_DEVICE_ID_VIA_82C586_1) { - /* pri and sec channel enables are in port 0x40 */ - if ((rc = pcibios_read_config_word(bus, fn, 0x40, (short *)&timings[0]))) - goto quit; - if ((!timings[0].via_s.pri_en && (!timings[0].via_s.sec_en))) { - printk("%s: neither IDE port is enabled\n", chipset); - goto quit; - } - } - else { /* INTEL piix */ - if ((rc = pcibios_read_config_word(bus, fn, 0x40, (short *)&timings[0]))) - goto quit; - if ((rc = pcibios_read_config_word(bus, fn, 0x42, (short *)&timings[1]))) - goto quit; - if ((!timings[0].piix_s.ports_enabled) && (!timings[1].piix_s.ports_enabled)) { - printk("%s: neither IDE port is enabled\n", chipset); - goto quit; - } - } - - /* - * See if Bus-Mastered DMA is enabled - */ - if ((pcicmd & 4) == 0) { - printk("%s: bus-master DMA feature is not enabled (BIOS)\n", chipset); - } else { - /* - * Get the bmiba base address - */ - if ((rc = pcibios_read_config_dword(bus, fn, 0x20, &bmiba))) - goto quit; - bmiba &= 0xfff0; /* extract port base address */ - if (bmiba) { - dma_enabled = 1; - } else { - unsigned short base; - printk("%s: bus-master base address is invalid (0x%04x, BIOS problem)\n", chipset, bmiba); - base = find_free_region(16); - if (base) { - printk("%s: bypassing BIOS; setting bus-master base address to 0x%04x\n", chipset, base); - piix_key = 0x80000000 + (fn * 0x100); - put_piix_reg(0x04,get_piix_reg(0x04)&~5); - put_piix_reg(0x20,(get_piix_reg(0x20)&0xFFFF000F)|base|1); - put_piix_reg(0x04,get_piix_reg(0x04)|5); - bmiba = get_piix_reg(0x20)&0x0000FFF0; - if (bmiba == base && (get_piix_reg(0x04) & 5) == 5) - dma_enabled = 1; - else - printk("%s: operation failed\n", chipset); - } - if (!dma_enabled) - printk("%s: DMA is disabled (BIOS)\n", chipset); - } - } - - /* - * Save the dma_base port addr for each interface - */ - for (h = 0; h < MAX_HWIFS; ++h) { - unsigned int pri_sec; - piix_timing_t timing; - ide_hwif_t *hwif = &ide_hwifs[h]; - switch (hwif->io_ports[IDE_DATA_OFFSET]) { - case 0x1f0: pri_sec = 0; break; - case 0x170: pri_sec = 1; break; - default: continue; - } - - if (devid == PCI_DEVICE_ID_VIA_82C586_1) { - timing = timings[0]; - switch (h) { - case 0: - if (!timing.piix_s.ports_enabled) { - printk("port 0 DMA not enabled\n"); - continue; - } - case 1: - if (!timing.piix_s.sidetim_enabled) { - printk("port 1 DMA not enabled\n"); - continue; - } - } - hwif->chipset = ide_via; - } - else { /* PIIX */ - - timing = timings[pri_sec]; - if (!timing.piix_s.ports_enabled) /* interface disabled? */ - continue; - hwif->chipset = ide_triton; - } - if (dma_enabled) - init_piix_dma(hwif, bmiba + (pri_sec ? 8 : 0)); -#ifdef DISPLAY_PIIX_TIMINGS - /* - * Display drive timings/modes - */ - { - const char *slave; - piix_sidetim_t sidetim; - byte sample = 5 - timing.piix_s.sample; - byte recovery = 4 - timing.piix_s.recovery; - unsigned int drvtim; - - if (devid == PCI_DEVICE_ID_VIA_82C586_1) { - pcibios_read_config_dword(bus, fn, 0x48, &drvtim); - if (pri_sec == 0) { - printk(" %s master: active_pulse_CLKs=%d, recovery_CLKs=%d\n", hwif->name, 1+(drvtim>>28), 1+((drvtim & 0x0f000000)>>24)); - printk(" %s slave: active_pulse_CLKs=%d, recovery_CLKs=%d\n", hwif->name, 1+((drvtim & 0xf00000)>>20), 1+((drvtim & 0x0f0000)>>16)); - continue; - } else { - printk(" %s master: active_pulse_CLKs=%d, recovery_CLKs=%d\n", hwif->name, 1+((drvtim & 0xf000)>>12), 1+((drvtim & 0x0f00)>>8)); - printk(" %s slave: active_pulse_CLKs=%d, recovery_CLKs=%d\n", hwif->name, 1+((drvtim & 0xf0)>>4), 1+(drvtim & 0x0f)); - continue; - } - } - - if ((devid == PCI_DEVICE_ID_INTEL_82371SB_1 - || devid == PCI_DEVICE_ID_INTEL_82371AB) - && timing.piix_s.sidetim_enabled - && !pcibios_read_config_byte(bus, fn, 0x44, (byte *) &sidetim)) - slave = ""; /* PIIX3 and later */ - else - slave = "/slave"; /* PIIX, or PIIX3 in compatibility mode */ - printk(" %s master%s: sample_CLKs=%d, recovery_CLKs=%d\n", hwif->name, slave, sample, recovery); - print_piix_drive_flags ("master:", timing.piix_s.d0_flags); - if (!*slave) { - if (pri_sec == 0) { - sample = 5 - sidetim.pri_sample; - recovery = 4 - sidetim.pri_recovery; - } else { - sample = 5 - sidetim.sec_sample; - recovery = 4 - sidetim.sec_recovery; - } - printk(" slave : sample_CLKs=%d, recovery_CLKs=%d\n", sample, recovery); - } - print_piix_drive_flags ("slave :", timing.piix_s.d1_flags); - } -#endif /* DISPLAY_PIIX_TIMINGS */ - } - -quit: if (rc) printk("%s: pcibios access failed - %s\n", chipset, pcibios_strerror(rc)); -} diff -u --recursive --new-file v2.1.67/linux/drivers/char/hfmodem/main.c linux/drivers/char/hfmodem/main.c --- v2.1.67/linux/drivers/char/hfmodem/main.c Mon Aug 11 14:47:04 1997 +++ linux/drivers/char/hfmodem/main.c Sun Nov 30 10:30:19 1997 @@ -136,8 +136,6 @@ #define LPT_CONTROL(iobase) (iobase+2) #define LPT_IRQ_ENABLE 0x10 -#define LPT_EXTENT 3 - #define MIDI_DATA(iobase) (iobase) #define MIDI_STATUS(iobase) (iobase+1) #define MIDI_READ_FULL 0x80 /* attention: negative logic!! */ @@ -150,33 +148,37 @@ #define SP_MIDI 4 /* ---------------------------------------------------------------------- */ -/* - * returns 0 if ok and != 0 on error; - * the same behaviour as par96_check_lpt in baycom.c - */ -__initfunc(static int check_lpt(unsigned int iobase)) +static int parptt_preempt(void *handle) { - unsigned char b1,b2; - int i; + /* we cannot relinquish the port in the middle of an operation */ + return 1; +} - if (iobase <= 0 || iobase > 0x1000-LPT_EXTENT) - return 0; - if (check_region(iobase, LPT_EXTENT)) - return 0; - b1 = inb(LPT_DATA(iobase)); - b2 = inb(LPT_CONTROL(iobase)); - outb(0xaa, LPT_DATA(iobase)); - i = inb(LPT_DATA(iobase)) == 0xaa; - outb(0x55, LPT_DATA(iobase)); - i &= inb(LPT_DATA(iobase)) == 0x55; - outb(0x0a, LPT_CONTROL(iobase)); - i &= (inb(LPT_CONTROL(iobase)) & 0xf) == 0x0a; - outb(0x05, LPT_CONTROL(iobase)); - i &= (inb(LPT_CONTROL(iobase)) & 0xf) == 0x05; - outb(b1, LPT_DATA(iobase)); - outb(b2, LPT_CONTROL(iobase)); - return !i; +/* --------------------------------------------------------------------- */ + +static void parptt_wakeup(void *handle) +{ + struct hfmodem_state *dev = (struct hfmodem_state *)handle; + + printk(KERN_DEBUG "%s: parptt: why am I being woken up?\n", hfmodem_drvname); + if (!parport_claim(dev->ptt_out.pardev)) + printk(KERN_DEBUG "%s: parptt: I'm broken.\n", hfmodem_drvname); +} + +/* --------------------------------------------------------------------- */ +__initfunc(static int check_lpt(struct hfmodem_state *dev, unsigned int iobase)) +{ + struct parport *pp = parport_enumerate(); + + while (pp && pp->base != iobase) + pp = pp->next; + if (!pp) + return 0; + if (!(dev->ptt_out.pardev = parport_register_device(pp, hfmodem_drvname, parptt_preempt, parptt_wakeup, + NULL, PARPORT_DEV_LURK, dev))) + return 0; + return 1; } /* --------------------------------------------------------------------- */ @@ -272,8 +274,7 @@ { enum uart u = c_uart_unknown; - if (dev->ptt_out.seriobase > 0 && dev->ptt_out.seriobase <= 0x1000-SER_EXTENT && - ((u = check_uart(dev->ptt_out.seriobase))) != c_uart_unknown) + if (((u = check_uart(dev->ptt_out.seriobase))) != c_uart_unknown) printk(KERN_INFO "%s: PTT output: uart found at address 0x%x type %s\n", hfmodem_drvname, dev->ptt_out.seriobase, uart_str[u]); else { @@ -282,8 +283,7 @@ hfmodem_drvname, dev->ptt_out.seriobase); dev->ptt_out.seriobase = 0; } - if (dev->ptt_out.pariobase > 0 && dev->ptt_out.pariobase <= 0x1000-LPT_EXTENT && - !check_lpt(dev->ptt_out.pariobase)) + if (check_lpt(dev, dev->ptt_out.pariobase)) printk(KERN_INFO "%s: PTT output: parallel port found at address 0x%x\n", hfmodem_drvname, dev->ptt_out.pariobase); else { @@ -291,6 +291,7 @@ printk(KERN_WARNING "%s: PTT output: no parallel port found at address 0x%x\n", hfmodem_drvname, dev->ptt_out.pariobase); dev->ptt_out.pariobase = 0; + dev->ptt_out.pardev = NULL; } if (dev->ptt_out.midiiobase > 0 && dev->ptt_out.midiiobase <= 0x1000-MIDI_EXTENT && check_midi(dev->ptt_out.midiiobase)) @@ -324,12 +325,11 @@ hfmodem_drvname, dev->ptt_out.seriobase); } if (dev->ptt_out.pariobase > 0) { - if (!check_region(dev->ptt_out.pariobase, LPT_EXTENT)) { - request_region(dev->ptt_out.pariobase, LPT_EXTENT, "hfmodem par ptt"); - dev->ptt_out.flags |= SP_PAR; - } else + if (parport_claim(dev->ptt_out.pardev)) printk(KERN_WARNING "%s: PTT output: parallel port at 0x%x busy\n", hfmodem_drvname, dev->ptt_out.pariobase); + else + dev->ptt_out.flags |= SP_PAR; } if (dev->ptt_out.midiiobase > 0) { if (!check_region(dev->ptt_out.midiiobase, MIDI_EXTENT)) { @@ -361,7 +361,7 @@ if (dev->ptt_out.flags & SP_SER) release_region(dev->ptt_out.seriobase, SER_EXTENT); if (dev->ptt_out.flags & SP_PAR) - release_region(dev->ptt_out.pariobase, LPT_EXTENT); + parport_release(dev->ptt_out.pardev); if (dev->ptt_out.flags & SP_MIDI) release_region(dev->ptt_out.midiiobase, MIDI_EXTENT); dev->ptt_out.flags = 0; @@ -671,6 +671,10 @@ void cleanup_module(void) { + struct hfmodem_state *dev = &hfmodem_state[0]; + + if (dev->ptt_out.pariobase > 0) + parport_unregister_device(dev->ptt_out.pardev); misc_deregister(&hfmodem_device); } @@ -733,4 +737,3 @@ /* --------------------------------------------------------------------- */ #endif /* MODULE */ - diff -u --recursive --new-file v2.1.67/linux/drivers/char/lp.c linux/drivers/char/lp.c --- v2.1.67/linux/drivers/char/lp.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/char/lp.c Sun Nov 30 14:00:38 1997 @@ -64,6 +64,11 @@ #undef LP_DEBUG #undef LP_READ_DEBUG +/* Magic numbers */ +#define AUTO -3 +#define OFF -2 +#define UNSPEC -1 + static inline void lp_parport_release (int minor) { parport_release (lp_table[minor].dev); @@ -163,9 +168,7 @@ static void lp_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - struct parport *pb = (struct parport *) dev_id; - struct pardevice *pd = pb->cad; - struct lp_struct *lp_dev = (struct lp_struct *) pd->private; + struct lp_struct *lp_dev = (struct lp_struct *) dev_id; if (waitqueue_active (&lp_dev->lp_wait_q)) wake_up(&lp_dev->lp_wait_q); @@ -272,11 +275,11 @@ return total_bytes_written; } -static ssize_t lp_write(struct file * file, const char * buf, size_t count, loff_t *ppos) +static ssize_t lp_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; - unsigned int minor = MINOR(inode->i_rdev); - int retv; + unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev); + ssize_t retv; if (jiffies-lp_table[minor].lastcall > LP_TIME(minor)) lp_table[minor].runchars = 0; @@ -288,7 +291,7 @@ */ lp_parport_claim (minor); - retv = lp_write_buf(minor, buf, count); + retv = lp_write_buf(minor, buf, count); lp_parport_release (minor); return retv; @@ -315,15 +318,15 @@ } /* Status readback confirming to ieee1284 */ -static ssize_t lp_read(struct file * file, char * buf, size_t count, loff_t *ppos) +static ssize_t lp_read(struct file * file, char * buf, + size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; unsigned char z=0, Byte=0, status; char *temp; - int retval; + ssize_t retval; unsigned int counter=0; unsigned int i; - unsigned int minor=MINOR(inode->i_rdev); + unsigned int minor=MINOR(file->f_dentry->d_inode->i_rdev); /* Claim Parport or sleep until it becomes available * (see lp_wakeup() for details) @@ -568,7 +571,7 @@ lp_release }; -static int parport[LP_NO] = { -1, }; +static int parport[LP_NO] = { UNSPEC, }; #ifdef MODULE #define lp_init init_module @@ -589,11 +592,11 @@ printk(KERN_INFO "lp: too many ports, %s ignored.\n", str); } else if (!strcmp(str, "auto")) { - parport[0] = -3; + parport[0] = AUTO; } else { if (ints[0] == 0 || ints[1] == 0) { /* disable driver on "lp=" or "lp=0" */ - parport[0] = -2; + parport[0] = OFF; } else { printk(KERN_WARNING "warning: 'lp=0x%x' is deprecated, ignored\n", ints[1]); } @@ -619,7 +622,7 @@ static int inline lp_searchfor(int list[], int a) { int i; - for (i = 0; i < LP_NO && list[i] != -1; i++) { + for (i = 0; i < LP_NO && list[i] != UNSPEC; i++) { if (list[i] == a) return 1; } return 0; @@ -630,15 +633,16 @@ int count = 0; struct parport *pb; - if (parport[0] == -2) return 0; + if (parport[0] == OFF) return 0; pb = parport_enumerate(); while (pb) { /* We only understand PC-style ports. */ if (pb->modes & PARPORT_MODE_PCSPP) { - if (parport[0] == -1 || lp_searchfor(parport, count) || - (parport[0] == -3 && + if (parport[0] == UNSPEC || + lp_searchfor(parport, count) || + (parport[0] == AUTO && pb->probe_info.class == PARPORT_CLASS_PRINTER)) { lp_table[count].dev = parport_register_device(pb, dev_name, @@ -646,6 +650,10 @@ lp_interrupt, PARPORT_DEV_TRAN, (void *) &lp_table[count]); lp_table[count].flags |= LP_EXIST; + init_waitqueue (&lp_table[count].lp_wait_q); + lp_parport_claim (count); + lp_reset (count); + lp_parport_release (count); printk(KERN_INFO "lp%d: using %s (%s).\n", count, pb->name, (pb->irq == PARPORT_IRQ_NONE)?"polling":"interrupt-driven"); } diff -u --recursive --new-file v2.1.67/linux/drivers/char/mem.c linux/drivers/char/mem.c --- v2.1.67/linux/drivers/char/mem.c Sat Oct 25 02:44:15 1997 +++ linux/drivers/char/mem.c Sat Nov 29 16:23:11 1997 @@ -539,7 +539,7 @@ * Some joysticks only appear when the soundcard they are * connected too is confgured. Keep the sound/joystick ordering. */ - joystick_init(); + js_init(); #endif #if CONFIG_QIC02_TAPE qic02_tape_init(); diff -u --recursive --new-file v2.1.67/linux/drivers/char/n_tty.c linux/drivers/char/n_tty.c --- v2.1.67/linux/drivers/char/n_tty.c Sat Oct 25 02:44:15 1997 +++ linux/drivers/char/n_tty.c Sun Nov 30 10:59:02 1997 @@ -695,8 +695,8 @@ int is_ignored(int sig) { - return ((current->blocked & (1<<(sig-1))) || - (current->sig->action[sig-1].sa_handler == SIG_IGN)); + return (sigismember(¤t->blocked, sig) || + current->sig->action[sig-1].sa.sa_handler == SIG_IGN); } static void n_tty_set_termios(struct tty_struct *tty, struct termios * old) diff -u --recursive --new-file v2.1.67/linux/drivers/char/pcwd.c linux/drivers/char/pcwd.c --- v2.1.67/linux/drivers/char/pcwd.c Sat Nov 29 11:25:09 1997 +++ linux/drivers/char/pcwd.c Sat Nov 29 16:36:13 1997 @@ -29,6 +29,7 @@ * 961118 Changed some verbiage on some of the output, tidied up * code bits, and added compatibility to 2.1.x. * 970912 Enabled board on open and disable on close. + * 971107 Took account of recent VFS changes (broke read). */ #include @@ -222,7 +223,7 @@ } static int pcwd_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) + unsigned int cmd, unsigned long arg) { int i, cdat, rv; static struct watchdog_info ident= @@ -359,8 +360,13 @@ return 0; } -static long pcwd_write(struct inode *inode, struct file *file, const char *buf, unsigned long len) +static ssize_t pcwd_write(struct file *file, const char *buf, size_t len, + loff_t *ppos) { + /* Can't seek (pwrite) on this device */ + if (ppos != &file->f_pos) + return -ESPIPE; + if (len) { pcwd_send_heartbeat(); @@ -381,11 +387,15 @@ return(0); } -static ssize_t pcwd_read(struct file *file, char *buf, size_t count, loff_t *ppos) +static ssize_t pcwd_read(struct file *file, char *buf, size_t count, + loff_t *ppos) { unsigned short c = inb(current_readport); unsigned char cp; + /* Can't seek (pread) on this device */ + if (ppos != &file->f_pos) + return -ESPIPE; switch(MINOR(file->f_dentry->d_inode->i_rdev)) { case TEMP_MINOR: @@ -488,11 +498,16 @@ pcwd_read, /* Read */ pcwd_write, /* Write */ NULL, /* Readdir */ - NULL, /* Select */ + NULL, /* Poll */ pcwd_ioctl, /* IOctl */ NULL, /* MMAP */ pcwd_open, /* Open */ - pcwd_close /* Close */ + pcwd_close, /* Release */ + NULL, /* Fsync */ + NULL, /* Fasync */ + NULL, /* CheckMediaChange */ + NULL, /* Revalidate */ + NULL, /* Lock */ }; static struct miscdevice pcwd_miscdev = { diff -u --recursive --new-file v2.1.67/linux/drivers/char/videodev.c linux/drivers/char/videodev.c --- v2.1.67/linux/drivers/char/videodev.c Sat Nov 29 11:25:09 1997 +++ linux/drivers/char/videodev.c Sun Nov 30 14:02:02 1997 @@ -249,7 +249,7 @@ return 0; } - +#ifdef MODULE int init_module(void) { return videodev_init(); @@ -259,6 +259,8 @@ { unregister_chrdev(VIDEO_MAJOR, "video_capture"); } + +#endif EXPORT_SYMBOL(video_register_device); EXPORT_SYMBOL(video_unregister_device); diff -u --recursive --new-file v2.1.67/linux/drivers/char/vt.c linux/drivers/char/vt.c --- v2.1.67/linux/drivers/char/vt.c Wed Sep 24 20:05:47 1997 +++ linux/drivers/char/vt.c Sun Nov 30 10:59:02 1997 @@ -725,7 +725,7 @@ extern int spawnpid, spawnsig; if (!perm) return -EPERM; - if (arg < 1 || arg > NSIG || arg == SIGKILL) + if (arg < 1 || arg > _NSIG || arg == SIGKILL) return -EINVAL; spawnpid = current->pid; spawnsig = arg; diff -u --recursive --new-file v2.1.67/linux/drivers/misc/Makefile linux/drivers/misc/Makefile --- v2.1.67/linux/drivers/misc/Makefile Tue Sep 23 16:48:47 1997 +++ linux/drivers/misc/Makefile Sat Nov 29 16:19:40 1997 @@ -30,6 +30,13 @@ M_OBJS += parport_pc.o endif endif + ifeq ($(CONFIG_PARPORT_AX),y) + LX_OBJS += parport_ax.o + else + ifeq ($(CONFIG_PARPORT_AX),m) + M_OBJS += parport_ax.o + endif + endif LX_OBJS += parport_init.o else ifeq ($(CONFIG_PARPORT),m) @@ -42,6 +49,9 @@ endif ifeq ($(CONFIG_PARPORT_PC),m) MX_OBJS += parport_pc.o + endif + ifeq ($(CONFIG_PARPORT_AX),m) + MX_OBJS += parport_ax.o endif endif diff -u --recursive --new-file v2.1.67/linux/drivers/misc/parport_ax.c linux/drivers/misc/parport_ax.c --- v2.1.67/linux/drivers/misc/parport_ax.c Wed Dec 31 16:00:00 1969 +++ linux/drivers/misc/parport_ax.c Sat Nov 29 16:19:40 1997 @@ -0,0 +1,554 @@ +/* $Id: parport_ax.c,v 1.2 1997/10/25 17:27:03 philip Exp $ + * Parallel-port routines for Sun Ultra/AX architecture + * + * Author: Eddie C. Dost + * + * based on work by: + * Phil Blundell + * Tim Waugh + * Jose Renau + * David Campbell + * Grant Guenther + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include + + +/* + * Define this if you have Devices which don't support short + * host read/write cycles. + */ +#undef HAVE_SLOW_DEVICES + + +#define DATA 0x00 +#define STATUS 0x01 +#define CONTROL 0x02 + +#define CFIFO 0x400 +#define DFIFO 0x400 +#define TFIFO 0x400 +#define CNFA 0x400 +#define CNFB 0x401 +#define ECR 0x402 + +static void +ax_null_intr_func(int irq, void *dev_id, struct pt_regs *regs) +{ + /* NULL function - Does nothing */ + return; +} + +#if 0 +static unsigned int +ax_read_configb(struct parport *p) +{ + return (unsigned int)inb(p->base + CNFB); +} +#endif + +static void +ax_write_data(struct parport *p, unsigned int d) +{ + outb(d, p->base + DATA); +} + +static unsigned int +ax_read_data(struct parport *p) +{ + return (unsigned int)inb(p->base + DATA); +} + +static void +ax_write_control(struct parport *p, unsigned int d) +{ + outb(d, p->base + CONTROL); +} + +static unsigned int +ax_read_control(struct parport *p) +{ + return (unsigned int)inb(p->base + CONTROL); +} + +static unsigned int +ax_frob_control(struct parport *p, unsigned int mask, unsigned int val) +{ + unsigned int old = (unsigned int)inb(p->base + CONTROL); + outb(((old & ~mask) ^ val), p->base + CONTROL); + return old; +} + +static void +ax_write_status(struct parport *p, unsigned int d) +{ + outb(d, p->base + STATUS); +} + +static unsigned int +ax_read_status(struct parport *p) +{ + return (unsigned int)inb(p->base + STATUS); +} + +static void +ax_write_econtrol(struct parport *p, unsigned int d) +{ + outb(d, p->base + ECR); +} + +static unsigned int +ax_read_econtrol(struct parport *p) +{ + return (unsigned int)inb(p->base + ECR); +} + +static unsigned int +ax_frob_econtrol(struct parport *p, unsigned int mask, unsigned int val) +{ + unsigned int old = (unsigned int)inb(p->base + ECR); + outb(((old & ~mask) ^ val), p->base + ECR); + return old; +} + +static void +ax_change_mode(struct parport *p, int m) +{ + ax_frob_econtrol(p, 0xe0, m << 5); +} + +static void +ax_write_fifo(struct parport *p, unsigned int v) +{ + outb(v, p->base + DFIFO); +} + +static unsigned int +ax_read_fifo(struct parport *p) +{ + return inb(p->base + DFIFO); +} + +static void +ax_disable_irq(struct parport *p) +{ + struct linux_ebus_dma *dma = p->private_data; + unsigned int dcsr; + + dcsr = readl((unsigned long)&dma->dcsr); + dcsr &= ~(EBUS_DCSR_INT_EN); + writel(dcsr, (unsigned long)&dma->dcsr); +} + +static void +ax_enable_irq(struct parport *p) +{ + struct linux_ebus_dma *dma = p->private_data; + unsigned int dcsr; + + dcsr = readl((unsigned long)&dma->dcsr); + dcsr |= EBUS_DCSR_INT_EN; + writel(dcsr, (unsigned long)&dma->dcsr); +} + +static void +ax_release_resources(struct parport *p) +{ + if (p->irq != PARPORT_IRQ_NONE) { + ax_disable_irq(p); + free_irq(p->irq, p); + } + release_region(p->base, p->size); + if (p->modes & PARPORT_MODE_PCECR) + release_region(p->base+0x400, 3); + release_region((unsigned long)p->private_data, + sizeof(struct linux_ebus_dma)); +} + +static int +ax_claim_resources(struct parport *p) +{ + /* FIXME check that resources are free */ + if (p->irq != PARPORT_IRQ_NONE) { + request_irq(p->irq, ax_null_intr_func, 0, p->name, p); + ax_enable_irq(p); + } + request_region(p->base, p->size, p->name); + if (p->modes & PARPORT_MODE_PCECR) + request_region(p->base+0x400, 3, p->name); + request_region((unsigned long)p->private_data, + sizeof(struct linux_ebus_dma), p->name); + return 0; +} + +static void +ax_save_state(struct parport *p, struct parport_state *s) +{ + s->u.pc.ctr = ax_read_control(p); + s->u.pc.ecr = ax_read_econtrol(p); +} + +static void +ax_restore_state(struct parport *p, struct parport_state *s) +{ + ax_write_control(p, s->u.pc.ctr); + ax_write_econtrol(p, s->u.pc.ecr); +} + +static unsigned int +ax_epp_read_block(struct parport *p, void *buf, unsigned int length) +{ + return 0; /* FIXME */ +} + +static unsigned int +ax_epp_write_block(struct parport *p, void *buf, unsigned int length) +{ + return 0; /* FIXME */ +} + +static unsigned int +ax_ecp_read_block(struct parport *p, void *buf, unsigned int length, + void (*fn)(struct parport *, void *, unsigned int), + void *handle) +{ + return 0; /* FIXME */ +} + +static unsigned int +ax_ecp_write_block(struct parport *p, void *buf, unsigned int length, + void (*fn)(struct parport *, void *, unsigned int), + void *handle) +{ + return 0; /* FIXME */ +} + +static int +ax_examine_irq(struct parport *p) +{ + return 0; /* FIXME */ +} + +static void +ax_inc_use_count(void) +{ +#ifdef MODULE + MOD_INC_USE_COUNT; +#endif +} + +static void +ax_dec_use_count(void) +{ +#ifdef MODULE + MOD_DEC_USE_COUNT; +#endif +} + +static struct parport_operations ax_ops = +{ + ax_write_data, + ax_read_data, + + ax_write_control, + ax_read_control, + ax_frob_control, + + ax_write_econtrol, + ax_read_econtrol, + ax_frob_econtrol, + + ax_write_status, + ax_read_status, + + ax_write_fifo, + ax_read_fifo, + + ax_change_mode, + + ax_release_resources, + ax_claim_resources, + + ax_epp_write_block, + ax_epp_read_block, + + ax_ecp_write_block, + ax_ecp_read_block, + + ax_save_state, + ax_restore_state, + + ax_enable_irq, + ax_disable_irq, + ax_examine_irq, + + ax_inc_use_count, + ax_dec_use_count +}; + + +/****************************************************** + * MODE detection section: + */ + +/* Check for ECP + * + * Old style XT ports alias io ports every 0x400, hence accessing ECR + * on these cards actually accesses the CTR. + * + * Modern cards don't do this but reading from ECR will return 0xff + * regardless of what is written here if the card does NOT support + * ECP. + * + * We will write 0x2c to ECR and 0xcc to CTR since both of these + * values are "safe" on the CTR since bits 6-7 of CTR are unused. + */ +static int parport_ECR_present(struct parport *pb) +{ + unsigned int r, octr = pb->ops->read_control(pb), + oecr = pb->ops->read_econtrol(pb); + + r = pb->ops->read_control(pb); + if ((pb->ops->read_econtrol(pb) & 0x3) == (r & 0x3)) { + pb->ops->write_control(pb, r ^ 0x2 ); /* Toggle bit 1 */ + + r = pb->ops->read_control(pb); + if ((pb->ops->read_econtrol(pb) & 0x2) == (r & 0x2)) { + pb->ops->write_control(pb, octr); + return 0; /* Sure that no ECR register exists */ + } + } + + if ((pb->ops->read_econtrol(pb) & 0x3 ) != 0x1) + return 0; + + pb->ops->write_econtrol(pb, 0x34); + if (pb->ops->read_econtrol(pb) != 0x35) + return 0; + + pb->ops->write_econtrol(pb, oecr); + pb->ops->write_control(pb, octr); + + return PARPORT_MODE_PCECR; +} + +static int parport_ECP_supported(struct parport *pb) +{ + int i, oecr = pb->ops->read_econtrol(pb); + + /* If there is no ECR, we have no hope of supporting ECP. */ + if (!(pb->modes & PARPORT_MODE_PCECR)) + return 0; + + /* + * Using LGS chipset it uses ECR register, but + * it doesn't support ECP or FIFO MODE + */ + + pb->ops->write_econtrol(pb, 0xc0); /* TEST FIFO */ + for (i=0; i < 1024 && (pb->ops->read_econtrol(pb) & 0x01); i++) + pb->ops->write_fifo(pb, 0xaa); + + pb->ops->write_econtrol(pb, oecr); + return (i == 1024) ? 0 : PARPORT_MODE_PCECP; +} + +/* Detect PS/2 support. + * + * Bit 5 (0x20) sets the PS/2 data direction; setting this high + * allows us to read data from the data lines. In theory we would get back + * 0xff but any peripheral attached to the port may drag some or all of the + * lines down to zero. So if we get back anything that isn't the contents + * of the data register we deem PS/2 support to be present. + * + * Some SPP ports have "half PS/2" ability - you can't turn off the line + * drivers, but an external peripheral with sufficiently beefy drivers of + * its own can overpower them and assert its own levels onto the bus, from + * where they can then be read back as normal. Ports with this property + * and the right type of device attached are likely to fail the SPP test, + * (as they will appear to have stuck bits) and so the fact that they might + * be misdetected here is rather academic. + */ + +static int parport_PS2_supported(struct parport *pb) +{ + int ok = 0, octr = pb->ops->read_control(pb); + + pb->ops->write_control(pb, octr | 0x20); /* try to tri-state buffer */ + + pb->ops->write_data(pb, 0x55); + if (pb->ops->read_data(pb) != 0x55) ok++; + + pb->ops->write_data(pb, 0xaa); + if (pb->ops->read_data(pb) != 0xaa) ok++; + + pb->ops->write_control(pb, octr); /* cancel input mode */ + + return ok ? PARPORT_MODE_PCPS2 : 0; +} + +static int parport_ECPPS2_supported(struct parport *pb) +{ + int mode, oecr = pb->ops->read_econtrol(pb); + + if (!(pb->modes & PARPORT_MODE_PCECR)) + return 0; + + pb->ops->write_econtrol(pb, 0x20); + + mode = parport_PS2_supported(pb); + + pb->ops->write_econtrol(pb, oecr); + return mode ? PARPORT_MODE_PCECPPS2 : 0; +} + +#define printmode(x) \ +{ \ + if (p->modes & PARPORT_MODE_PC##x) { \ + printk("%s%s", f ? "," : "", #x); \ + f++; \ + } \ +} + +int +init_one_port(struct linux_ebus_device *dev) +{ + struct parport tmpport, *p; + unsigned long base; + unsigned long config; + unsigned char tmp; + int irq, dma; + + /* Pointer to NS87303 Configuration Registers */ + config = dev->base_address[1]; + + /* Setup temporary access to Device operations */ + tmpport.base = dev->base_address[0]; + tmpport.ops = &ax_ops; + + /* Enable ECP mode, set bit 2 of the CTR first */ + tmpport.ops->write_control(&tmpport, 0x04); + tmp = ns87303_readb(config, PCR); + tmp |= (PCR_EPP_IEEE | PCR_ECP_ENABLE | PCR_ECP_CLK_ENA); + ns87303_writeb(config, PCR, tmp); + + /* LPT CTR bit 5 controls direction of parallel port */ + tmp = ns87303_readb(config, PTR); + tmp |= PTR_LPT_REG_DIR; + ns87303_writeb(config, PTR, tmp); + + /* Configure IRQ to Push Pull, Level Low */ + tmp = ns87303_readb(config, PCR); + tmp &= ~(PCR_IRQ_ODRAIN); + tmp |= PCR_IRQ_POLAR; + ns87303_writeb(config, PCR, tmp); + +#ifndef HAVE_SLOW_DEVICES + /* Enable Zero Wait State for ECP */ + tmp = ns87303_readb(config, FCR); + tmp |= FCR_ZWS_ENA; + ns87303_writeb(config, FCR, tmp); +#endif + + /* + * Now continue initializing the port + */ + base = dev->base_address[0]; + irq = dev->irqs[0]; + dma = PARPORT_DMA_AUTO; + + if (!(p = parport_register_port(base, irq, dma, &ax_ops))) + return 0; + + /* Safe away pointer to our EBus DMA */ + p->private_data = (void *)dev->base_address[2]; + + p->modes = PARPORT_MODE_PCSPP | parport_PS2_supported(p); + if (!check_region(p->base + 0x400, 3)) { + p->modes |= parport_ECR_present(p); + p->modes |= parport_ECP_supported(p); + p->modes |= parport_ECPPS2_supported(p); + } + p->size = 3; + + if (p->dma == PARPORT_DMA_AUTO) + p->dma = (p->modes & PARPORT_MODE_PCECP) ? 0 : PARPORT_DMA_NONE; + + printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base); + if (p->irq != PARPORT_IRQ_NONE) + printk(", irq %x", (unsigned int)p->irq); + if (p->dma != PARPORT_DMA_NONE) + printk(", dma %d", p->dma); + printk(" ["); + { + int f = 0; + printmode(SPP); + printmode(PS2); + printmode(ECP); + printmode(ECPPS2); + } + printk("]\n"); + parport_proc_register(p); + p->flags |= PARPORT_FLAG_COMA; + + ax_write_control(p, 0x0c); + ax_write_data(p, 0); + + if (parport_probe_hook) + (*parport_probe_hook)(p); + + return 1; +} + +int +parport_ax_init(void) +{ + struct linux_ebus *ebus; + struct linux_ebus_device *edev; + int count = 0; + + for_all_ebusdev(edev, ebus) + if (!strcmp(edev->prom_name, "ecpp")) + count += init_one_port(edev); + return count; +} + +#ifdef MODULE + +int +init_module(void) +{ + return (parport_ax_init() ? 0 : 1); +} + +void +cleanup_module(void) +{ + struct parport *p = parport_enumerate(), *tmp; + while (p) { + tmp = p->next; + if (p->modes & PARPORT_MODE_PCSPP) { + if (!(p->flags & PARPORT_FLAG_COMA)) + parport_quiesce(p); + parport_proc_unregister(p); + parport_unregister_port(p); + } + p = tmp; + } +} +#endif diff -u --recursive --new-file v2.1.67/linux/drivers/misc/parport_ieee1284.c linux/drivers/misc/parport_ieee1284.c --- v2.1.67/linux/drivers/misc/parport_ieee1284.c Wed Sep 3 20:52:42 1997 +++ linux/drivers/misc/parport_ieee1284.c Sat Nov 29 16:19:40 1997 @@ -1,4 +1,5 @@ -/* IEEE-1284 implementation for parport. +/* $Id: parport_ieee1284.c,v 1.4 1997/10/19 21:37:21 philip Exp $ + * IEEE-1284 implementation for parport. * * Authors: Phil Blundell * Carsten Gross @@ -6,83 +7,63 @@ */ #include - #include #include -#include -#include -#include #include -#include - -/* The following read functions are an implementation of a status readback - * and device id request confirming to IEEE1284-1994. - * - * These probably ought to go in some seperate file, so people like the SPARC - * don't have to pull them in. - */ /* Wait for Status line(s) to change in 35 ms - see IEEE1284-1994 page 24 to * 25 for this. After this time we can create a timeout because the - * peripheral doesn't conform to IEEE1284. We want to save CPU time: we are - * waiting a maximum time of 500 us busy (this is for speed). If there is + * peripheral doesn't conform to IEEE1284. We want to save CPU time: we are + * waiting a maximum time of 500 us busy (this is for speed). If there is * not the right answer in this time, we call schedule and other processes - * are able "to eat" the time up to 30ms. So the maximum load avarage can't - * get above 5% for a read even if the peripheral is really slow. (but your - * read gets very slow then - only about 10 characters per second. This - * should be tuneable). Thanks to Andreas who pointed me to this and ordered - * the documentation. + * are able to eat the time up to 40ms. */ int parport_wait_peripheral(struct parport *port, unsigned char mask, unsigned char result) { - int counter=0; + int counter; unsigned char status; - do { + for (counter = 0; counter < 20; counter++) { status = parport_read_status(port); + if ((status & mask) == result) + return 0; udelay(25); - counter++; - if (need_resched) + if (resched_needed()) schedule(); - } while ( ((status & mask) != result) && (counter < 20) ); - if ( (counter == 20) && ((status & mask) != result) ) { - current->state=TASK_INTERRUPTIBLE; - current->timeout=jiffies+4; - schedule(); /* wait for 4 scheduler runs (40ms) */ - status = parport_read_status(port); - if ((status & mask) != result) return 1; /* timeout */ } - return 0; /* okay right response from device */ + current->state = TASK_INTERRUPTIBLE; + current->timeout = jiffies+4; + schedule(); /* wait for 40ms */ + status = parport_read_status(port); + return ((status & mask) == result)?0:1; } -/* Test if nibble mode for status readback is okay. Returns the value false - * if the printer doesn't support readback at all. If it supports readbacks - * and printer data is available the function returns 1, otherwise 2. The - * only valid values for "mode" are 0 and 4. 0 requests normal nibble mode, - * 4 is for "request device id using nibble mode". The request for the - * device id is best done in an ioctl (or at bootup time). There is no - * check for an invalid value, the only function using this call at the - * moment is lp_read and the ioctl LPGETDEVICEID both fixed calls from - * trusted kernel. +/* Test if the peripheral is IEEE 1284 compliant. + * return values are: + * 0 - handshake failed; peripheral is not compliant (or none present) + * 1 - handshake OK; IEEE1284 peripheral present but no data available + * 2 - handshake OK; IEEE1284 peripheral and data available */ int parport_ieee1284_nibble_mode_ok(struct parport *port, unsigned char mode) { parport_write_data(port, mode); - udelay(5); - parport_write_control(port, parport_read_control(port) & ~8); /* SelectIN low */ - parport_write_control(port, parport_read_control(port) | 2); /* AutoFeed high */ - if (parport_wait_peripheral(port, 0x78, 0x38)) { /* timeout? */ - parport_write_control(port, (parport_read_control(port) & ~2) | 8); - return 0; /* first stage of negotiation failed, - * no IEEE1284 compliant device on this port - */ + udelay(500); + /* nSelectIn high, nAutoFd low */ + parport_write_control(port, (parport_read_control(port) & ~8) | 2); + if (parport_wait_peripheral(port, 0x78, 0x38)) { + parport_write_control(port, + (parport_read_control(port) & ~2) | 8); + return 0; } - parport_write_control(port, parport_read_control(port) | 1); /* Strobe high */ + /* nStrobe low */ + parport_write_control(port, parport_read_control(port) | 1); udelay(5); /* Strobe wait */ - parport_write_control(port, parport_read_control(port) & ~1); /* Strobe low */ + /* nStrobe high */ + parport_write_control(port, parport_read_control(port) & ~1); udelay(5); - parport_write_control(port, parport_read_control(port) & ~2); /* AutoFeed low */ + /* nAutoFd low */ + parport_write_control(port, parport_read_control(port) & ~2); return (parport_wait_peripheral(port, 0x20, 0))?2:1; } diff -u --recursive --new-file v2.1.67/linux/drivers/misc/parport_init.c linux/drivers/misc/parport_init.c --- v2.1.67/linux/drivers/misc/parport_init.c Wed Sep 3 20:52:42 1997 +++ linux/drivers/misc/parport_init.c Sat Nov 29 16:19:41 1997 @@ -17,13 +17,15 @@ #include #include #include +#include #ifndef MODULE -static int io[PARPORT_MAX+1] __initdata = { 0, }; -static int irq[PARPORT_MAX] __initdata = { PARPORT_IRQ_NONE, }; -static int dma[PARPORT_MAX] __initdata = { PARPORT_DMA_NONE, }; +static int io[PARPORT_MAX+1] __initdata = { [0 ... PARPORT_MAX] = 0 }; +static int irq[PARPORT_MAX] __initdata = { [0 ... PARPORT_MAX-1] = PARPORT_IRQ_NONE }; +static int dma[PARPORT_MAX] __initdata = { [0 ... PARPORT_MAX-1] = PARPORT_DMA_NONE }; extern int parport_pc_init(int *io, int *irq, int *dma); +extern int parport_ax_init(void); static int parport_setup_ptr __initdata = 0; @@ -68,11 +70,19 @@ { struct parport *pb; - if (io[0] == PARPORT_DISABLE) return 1; + if (io[0] == PARPORT_DISABLE) + return 1; + +#ifdef CONFIG_PNP_PARPORT + parport_probe_hook = &parport_probe_one; +#endif parport_proc_init(); #ifdef CONFIG_PARPORT_PC parport_pc_init(io, irq, dma); #endif +#ifdef CONFIG_PARPORT_AX + parport_ax_init(); +#endif return 0; } #endif @@ -91,6 +101,7 @@ EXPORT_SYMBOL(parport_wait_peripheral); EXPORT_SYMBOL(parport_proc_register); EXPORT_SYMBOL(parport_proc_unregister); +EXPORT_SYMBOL(parport_probe_hook); void inc_parport_count(void) { diff -u --recursive --new-file v2.1.67/linux/drivers/misc/parport_pc.c linux/drivers/misc/parport_pc.c --- v2.1.67/linux/drivers/misc/parport_pc.c Mon Nov 17 18:47:21 1997 +++ linux/drivers/misc/parport_pc.c Sat Nov 29 16:19:41 1997 @@ -837,7 +837,7 @@ } p->size = (p->modes & (PARPORT_MODE_PCEPP | PARPORT_MODE_PCECPEPP))?8:3; - printk(KERN_INFO "%s: PC-style at 0x%x", p->name, p->base); + printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base); if (p->irq == PARPORT_IRQ_AUTO) { p->irq = PARPORT_IRQ_NONE; parport_irq_probe(p); @@ -868,6 +868,10 @@ /* Done probing. Now put the port into a sensible start-up state. */ pc_write_control(p, 0xc); pc_write_data(p, 0); + + if (parport_probe_hook) + (*parport_probe_hook)(p); + return 1; } diff -u --recursive --new-file v2.1.67/linux/drivers/misc/parport_procfs.c linux/drivers/misc/parport_procfs.c --- v2.1.67/linux/drivers/misc/parport_procfs.c Wed Sep 3 20:52:42 1997 +++ linux/drivers/misc/parport_procfs.c Sat Nov 29 16:19:41 1997 @@ -125,7 +125,7 @@ struct parport *pp = (struct parport *)data; int len=0; - len += sprintf(page+len, "base:\t0x%x\n",pp->base); + len += sprintf(page+len, "base:\t0x%lx\n",pp->base); if (pp->irq == PARPORT_IRQ_NONE) len += sprintf(page+len, "irq:\tnone\n"); else diff -u --recursive --new-file v2.1.67/linux/drivers/misc/parport_share.c linux/drivers/misc/parport_share.c --- v2.1.67/linux/drivers/misc/parport_share.c Tue Sep 23 16:48:47 1997 +++ linux/drivers/misc/parport_share.c Sat Nov 29 16:19:41 1997 @@ -1,4 +1,5 @@ -/* Parallel-port resource manager code. +/* $Id: parport_share.c,v 1.8 1997/11/08 18:55:29 philip Exp $ + * Parallel-port resource manager code. * * Authors: David Campbell * Tim Waugh @@ -29,6 +30,8 @@ static struct parport *portlist = NULL, *portlist_tail = NULL; static int portcount = 0; +void (*parport_probe_hook)(struct parport *port) = NULL; + /* Return a list of all the ports we know about. */ struct parport *parport_enumerate(void) { @@ -275,11 +278,11 @@ dev->port->cad = dev; /* Swap the IRQ handlers. */ - if (dev->port->irq >= 0) { + if (dev->port->irq != PARPORT_IRQ_NONE) { free_irq(dev->port->irq, dev->port); request_irq(dev->port->irq, dev->irq_func ? dev->irq_func : parport_null_intr_func, SA_INTERRUPT, dev->name, - dev->port); + dev->private); } /* Restore control registers */ @@ -303,10 +306,10 @@ dev->port->ops->save_state(dev->port, dev->state); /* Point IRQs somewhere harmless. */ - if (dev->port->irq >= 0) { + if (dev->port->irq != PARPORT_IRQ_NONE) { free_irq(dev->port->irq, dev->port); request_irq(dev->port->irq, parport_null_intr_func, - SA_INTERRUPT, dev->port->name, dev->port); + SA_INTERRUPT, dev->port->name, NULL); } /* Walk the list, offering a wakeup callback to everybody other diff -u --recursive --new-file v2.1.67/linux/drivers/net/3c59x.c linux/drivers/net/3c59x.c --- v2.1.67/linux/drivers/net/3c59x.c Tue May 13 22:41:08 1997 +++ linux/drivers/net/3c59x.c Sun Nov 30 12:21:45 1997 @@ -38,6 +38,8 @@ #include #include +#include + #ifdef CONFIG_PCI #include #include @@ -498,7 +500,7 @@ outw(EEPROM_Read + PhysAddr01 + i, ioaddr + Wn0EepromCmd); /* Pause for at least 162 us. for the read to take place. */ for (timer = 162*4 + 400; timer >= 0; timer--) { - SLOW_DOWN_IO; + udelay(1); if ((inw(ioaddr + Wn0EepromCmd) & 0x8000) == 0) break; } diff -u --recursive --new-file v2.1.67/linux/drivers/net/Config.in linux/drivers/net/Config.in --- v2.1.67/linux/drivers/net/Config.in Sat Nov 29 11:25:09 1997 +++ linux/drivers/net/Config.in Sun Nov 30 14:00:38 1997 @@ -140,9 +140,9 @@ fi fi -#if [ ! "$CONFIG_PARPORT" = "n" ]; then -# dep_tristate 'PLIP (parallel port) support' CONFIG_PLIP $CONFIG_PARPORT -#fi +if [ ! "$CONFIG_PARPORT" = "n" ]; then + dep_tristate 'PLIP (parallel port) support' CONFIG_PLIP $CONFIG_PARPORT +fi tristate 'PPP (point-to-point) support' CONFIG_PPP if [ ! "$CONFIG_PPP" = "n" ]; then @@ -157,9 +157,15 @@ tristate 'Gracilis PackeTwin driver for AX.25' CONFIG_PT tristate 'Ottawa PI and PI2 driver for AX.25' CONFIG_PI tristate 'Z8530 SCC driver for AX.25' CONFIG_SCC + if [ "$CONFIG_SCC" != "n" ]; then + bool ' additional delay for PA0HZP OptoSCC compatible boards' CONFIG_SCC_DELAY + bool ' support for TRX that feedback the tx signal to rx' CONFIG_SCC_TRXECHO + fi fi if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - tristate 'BAYCOM ser12 and par96 driver for AX.25' CONFIG_BAYCOM + tristate 'BAYCOM ser12 fullduplex driver for AX.25' CONFIG_BAYCOM_SER_FDX + tristate 'BAYCOM ser12 halfduplex driver for AX.25' CONFIG_BAYCOM_SER_HDX + tristate 'BAYCOM picpar and par96 driver for AX.25' CONFIG_BAYCOM_PAR tristate 'Soundcard modem driver for AX.25' CONFIG_SOUNDMODEM if [ "$CONFIG_SOUNDMODEM" != "n" ]; then bool 'Soundmodem support for Soundblaster and compatible cards' CONFIG_SOUNDMODEM_SBC @@ -175,13 +181,8 @@ bool 'HFmodem support for Soundblaster and compatible cards' CONFIG_HFMODEM_SBC bool 'HFmodem support for WSS and Crystal cards' CONFIG_HFMODEM_WSS fi - tristate 'Shortwave radio modem driver' CONFIG_HFMODEM - if [ "$CONFIG_HFMODEM" != "n" ]; then - bool 'HFmodem support for Soundblaster and compatible cards' CONFIG_HFMODEM_SBC - bool 'HFmodem support for WSS and Crystal cards' CONFIG_HFMODEM_WSS - fi fi -# tristate 'STRIP (Metricom starmode radio IP)' CONFIG_STRIP + tristate 'STRIP (Metricom starmode radio IP)' CONFIG_STRIP tristate 'AT&T WaveLAN & DEC RoamAbout DS support' CONFIG_WAVELAN fi diff -u --recursive --new-file v2.1.67/linux/drivers/net/Makefile linux/drivers/net/Makefile --- v2.1.67/linux/drivers/net/Makefile Tue Sep 23 16:48:48 1997 +++ linux/drivers/net/Makefile Sun Nov 30 10:30:19 1997 @@ -73,13 +73,6 @@ L_OBJS += sk_g16.o endif -ifeq ($(CONFIG_NET_IPIP),y) -L_OBJS += tunnel.o -else - ifeq ($(CONFIG_NET_IPIP),m) - M_OBJS += tunnel.o - endif -endif ifeq ($(CONFIG_HP100),y) L_OBJS += hp100.o @@ -735,13 +728,33 @@ endif endif -ifeq ($(CONFIG_BAYCOM),y) -L_OBJS += baycom.o +ifeq ($(CONFIG_BAYCOM_SER_FDX),y) +L_OBJS += baycom_ser_fdx.o +CONFIG_HDLCDRV_BUILTIN = y +else + ifeq ($(CONFIG_BAYCOM_SER_FDX),m) + CONFIG_HDLCDRV_MODULE = y + M_OBJS += baycom_ser_fdx.o + endif +endif + +ifeq ($(CONFIG_BAYCOM_SER_HDX),y) +L_OBJS += baycom_ser_hdx.o +CONFIG_HDLCDRV_BUILTIN = y +else + ifeq ($(CONFIG_BAYCOM_SER_HDX),m) + CONFIG_HDLCDRV_MODULE = y + M_OBJS += baycom_ser_hdx.o + endif +endif + +ifeq ($(CONFIG_BAYCOM_PAR),y) +L_OBJS += baycom_par.o CONFIG_HDLCDRV_BUILTIN = y else - ifeq ($(CONFIG_BAYCOM),m) + ifeq ($(CONFIG_BAYCOM_PAR),m) CONFIG_HDLCDRV_MODULE = y - M_OBJS += baycom.o + M_OBJS += baycom_par.o endif endif diff -u --recursive --new-file v2.1.67/linux/drivers/net/at1700.c linux/drivers/net/at1700.c --- v2.1.67/linux/drivers/net/at1700.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/net/at1700.c Sun Nov 30 12:21:45 1997 @@ -102,7 +102,7 @@ #define EE_DATA_READ 0x80 /* EEPROM chip data out, in reg. 17. */ /* Delay between EEPROM clock transitions. */ -#define eeprom_delay() do { int _i = 40; while (--_i > 0) { __SLOW_DOWN_IO; }} while (0) +#define eeprom_delay() do { int _i = 40; while (--_i > 0) { inb(0x80); }} while (0) /* The EEPROM commands include the alway-set leading bit. */ #define EE_WRITE_CMD (5 << 6) diff -u --recursive --new-file v2.1.67/linux/drivers/net/baycom.c linux/drivers/net/baycom.c --- v2.1.67/linux/drivers/net/baycom.c Mon Aug 11 14:47:04 1997 +++ linux/drivers/net/baycom.c Wed Dec 31 16:00:00 1969 @@ -1,1280 +0,0 @@ -/*****************************************************************************/ - -/* - * baycom.c -- baycom ser12 and par96 radio modem driver. - * - * Copyright (C) 1996 Thomas Sailer (sailer@ife.ee.ethz.ch) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Please note that the GPL allows you to use the driver, NOT the radio. - * In order to use the radio, you need a license from the communications - * authority of your country. - * - * - * Supported modems - * - * ser12: This is a very simple 1200 baud AFSK modem. The modem consists only - * of a modulator/demodulator chip, usually a TI TCM3105. The computer - * is responsible for regenerating the receiver bit clock, as well as - * for handling the HDLC protocol. The modem connects to a serial port, - * hence the name. Since the serial port is not used as an async serial - * port, the kernel driver for serial ports cannot be used, and this - * driver only supports standard serial hardware (8250, 16450, 16550) - * - * par96: This is a modem for 9600 baud FSK compatible to the G3RUH standard. - * The modem does all the filtering and regenerates the receiver clock. - * Data is transferred from and to the PC via a shift register. - * The shift register is filled with 16 bits and an interrupt is - * signalled. The PC then empties the shift register in a burst. This - * modem connects to the parallel port, hence the name. The modem - * leaves the implementation of the HDLC protocol and the scrambler - * polynomial to the PC. This modem is no longer available (at least - * from Baycom) and has been replaced by the PICPAR modem (see below). - * You may however still build one from the schematics published in - * cq-DL :-). - * - * picpar: This is a redesign of the par96 modem by Henning Rech, DF9IC. The - * modem is protocol compatible to par96, but uses only three low - * power ICs and can therefore be fed from the parallel port and - * does not require an additional power supply. It features - * built in DCD circuitry. The driver should therefore be configured - * for hardware DCD. - * - * - * Command line options (insmod command line) - * - * mode driver mode string. Valid choices are ser12 and par96. An - * optional * enables software DCD. - * 2=par96/par97, any other value invalid - * iobase base address of the port; common values are for ser12 0x3f8, - * 0x2f8, 0x3e8, 0x2e8 and for par96/par97 0x378, 0x278, 0x3bc - * irq interrupt line of the port; common values are for ser12 3,4 - * and for par96/par97 7 - * - * - * History: - * 0.1 26.06.96 Adapted from baycom.c and made network driver interface - * 18.10.96 Changed to new user space access routines (copy_{to,from}_user) - * 0.3 26.04.97 init code/data tagged - * 0.4 08.07.97 alternative ser12 decoding algorithm (uses delta CTS ints) - */ - -/*****************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* --------------------------------------------------------------------- */ - -/* - * currently this module is supposed to support both module styles, i.e. - * the old one present up to about 2.1.9, and the new one functioning - * starting with 2.1.21. The reason is I have a kit allowing to compile - * this module also under 2.0.x which was requested by several people. - * This will go in 2.2 - */ -#include - -#if LINUX_VERSION_CODE >= 0x20100 -#include -#else -#include -#include - -#undef put_user -#undef get_user - -#define put_user(x,ptr) ({ __put_user((unsigned long)(x),(ptr),sizeof(*(ptr))); 0; }) -#define get_user(x,ptr) ({ x = ((__typeof__(*(ptr)))__get_user((ptr),sizeof(*(ptr)))); 0; }) - -extern inline int copy_from_user(void *to, const void *from, unsigned long n) -{ - int i = verify_area(VERIFY_READ, from, n); - if (i) - return i; - memcpy_fromfs(to, from, n); - return 0; -} - -extern inline int copy_to_user(void *to, const void *from, unsigned long n) -{ - int i = verify_area(VERIFY_WRITE, to, n); - if (i) - return i; - memcpy_tofs(to, from, n); - return 0; -} -#endif - -#if LINUX_VERSION_CODE >= 0x20123 -#include -#else -#define __init -#define __initdata -#define __initfunc(x) x -#endif - -/* --------------------------------------------------------------------- */ - -#define BAYCOM_DEBUG - -/* - * modem options; bit mask - */ -#define BAYCOM_OPTIONS_SOFTDCD 1 -#define BAYCOM_ALT_SER12 - -/* --------------------------------------------------------------------- */ - -static const char bc_drvname[] = "baycom"; -static const char bc_drvinfo[] = KERN_INFO "baycom: (C) 1996 Thomas Sailer, HB9JNX/AE4WA\n" -KERN_INFO "baycom: version 0.4 compiled " __TIME__ " " __DATE__ "\n"; - -/* --------------------------------------------------------------------- */ - -#define NR_PORTS 4 - -static struct device baycom_device[NR_PORTS]; - -static struct { - char *mode; - int iobase, irq; -} baycom_ports[NR_PORTS] = { { NULL, 0, 0 }, }; - -/* --------------------------------------------------------------------- */ - -#define RBR(iobase) (iobase+0) -#define THR(iobase) (iobase+0) -#define IER(iobase) (iobase+1) -#define IIR(iobase) (iobase+2) -#define FCR(iobase) (iobase+2) -#define LCR(iobase) (iobase+3) -#define MCR(iobase) (iobase+4) -#define LSR(iobase) (iobase+5) -#define MSR(iobase) (iobase+6) -#define SCR(iobase) (iobase+7) -#define DLL(iobase) (iobase+0) -#define DLM(iobase) (iobase+1) - -#define SER12_EXTENT 8 - -#define LPT_DATA(iobase) (iobase+0) -#define LPT_STATUS(iobase) (iobase+1) -#define LPT_CONTROL(iobase) (iobase+2) -#define LPT_IRQ_ENABLE 0x10 -#define PAR96_BURSTBITS 16 -#define PAR96_BURST 4 -#define PAR96_PTT 2 -#define PAR96_TXBIT 1 -#define PAR96_ACK 0x40 -#define PAR96_RXBIT 0x20 -#define PAR96_DCD 0x10 -#define PAR97_POWER 0xf8 - -#define PAR96_EXTENT 3 - -/* ---------------------------------------------------------------------- */ -/* - * Information that need to be kept for each board. - */ - -struct baycom_state { - struct hdlcdrv_state hdrv; - - unsigned int options; - - struct modem_state { - short arb_divider; - unsigned char flags; - unsigned int shreg; - struct modem_state_ser12 { - unsigned char tx_bit; - int dcd_sum0, dcd_sum1, dcd_sum2; - unsigned char last_sample; - unsigned char last_rxbit; - unsigned int dcd_shreg; - unsigned int dcd_time; - unsigned int bit_pll; -#ifdef BAYCOM_ALT_SER12 - unsigned long last_jiffies; - unsigned int pll_time; - unsigned int txshreg; -#else /* BAYCOM_ALT_SER12 */ - unsigned char interm_sample; -#endif /* BAYCOM_ALT_SER12 */ - } ser12; - struct modem_state_par96 { - int dcd_count; - unsigned int dcd_shreg; - unsigned long descram; - unsigned long scram; - } par96; - } modem; - -#ifdef BAYCOM_DEBUG - struct debug_vals { - unsigned long last_jiffies; - unsigned cur_intcnt; - unsigned last_intcnt; - int cur_pllcorr; - int last_pllcorr; - } debug_vals; -#endif /* BAYCOM_DEBUG */ -}; - -/* --------------------------------------------------------------------- */ - -#define min(a, b) (((a) < (b)) ? (a) : (b)) -#define max(a, b) (((a) > (b)) ? (a) : (b)) - -/* --------------------------------------------------------------------- */ - -static void inline baycom_int_freq(struct baycom_state *bc) -{ -#ifdef BAYCOM_DEBUG - unsigned long cur_jiffies = jiffies; - /* - * measure the interrupt frequency - */ - bc->debug_vals.cur_intcnt++; - if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { - bc->debug_vals.last_jiffies = cur_jiffies; - bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; - bc->debug_vals.cur_intcnt = 0; - bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr; - bc->debug_vals.cur_pllcorr = 0; - } -#endif /* BAYCOM_DEBUG */ -} - -/* --------------------------------------------------------------------- */ -/* - * ===================== SER12 specific routines ========================= - */ - -#ifdef BAYCOM_ALT_SER12 - -#define SER12_BAUD 1200 - -/* --------------------------------------------------------------------- */ - -extern inline unsigned int hweight16(unsigned short w) - __attribute__ ((unused)); -extern inline unsigned int hweight8(unsigned char w) - __attribute__ ((unused)); - -extern inline unsigned int hweight16(unsigned short w) -{ - unsigned short res = (w & 0x5555) + ((w >> 1) & 0x5555); - res = (res & 0x3333) + ((res >> 2) & 0x3333); - res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F); - return (res & 0x00FF) + ((res >> 8) & 0x00FF); -} - -extern inline unsigned int hweight8(unsigned char w) -{ - unsigned short res = (w & 0x55) + ((w >> 1) & 0x55); - res = (res & 0x33) + ((res >> 2) & 0x33); - return (res & 0x0F) + ((res >> 4) & 0x0F); -} - -/* --------------------------------------------------------------------- */ - -static __inline__ void ser12_rxsample(struct device *dev, struct baycom_state *bc, unsigned char news) -{ - bc->modem.ser12.dcd_shreg <<= 1; - bc->modem.ser12.bit_pll += 0x2000; - if (bc->modem.ser12.last_sample != news) { - bc->modem.ser12.last_sample = news; - bc->modem.ser12.dcd_shreg |= 1; - if (bc->modem.ser12.bit_pll < 0x9000) - bc->modem.ser12.bit_pll += 0x1000; - else - bc->modem.ser12.bit_pll -= 0x1000; - bc->modem.ser12.dcd_sum0 += 4 * hweight8(bc->modem.ser12.dcd_shreg & 0x38) - - hweight16(bc->modem.ser12.dcd_shreg & 0x7c0); - } - hdlcdrv_channelbit(&bc->hdrv, !!bc->modem.ser12.last_sample); - if ((--bc->modem.ser12.dcd_time) <= 0) { - hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 + - bc->modem.ser12.dcd_sum1 + - bc->modem.ser12.dcd_sum2) < 0); - bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1; - bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0; - bc->modem.ser12.dcd_sum0 = 2; /* slight bias */ - bc->modem.ser12.dcd_time = 120; - } - if (bc->modem.ser12.bit_pll >= 0x10000) { - bc->modem.ser12.bit_pll &= 0xffff; - bc->modem.shreg >>= 1; - if (bc->modem.ser12.last_rxbit == bc->modem.ser12.last_sample) - bc->modem.shreg |= 0x10000; - bc->modem.ser12.last_rxbit = bc->modem.ser12.last_sample; - if (bc->modem.shreg & 1) { - hdlcdrv_putbits(&bc->hdrv, bc->modem.shreg >> 1); - bc->modem.shreg = 0x10000; - } - } -} - -/* --------------------------------------------------------------------- */ - -static __inline__ void ser12_rx(struct device *dev, struct baycom_state *bc, unsigned char curs) -{ - unsigned long curjiff; - struct timeval tv; - unsigned int timediff; - - /* - * get current time - */ - curjiff = jiffies; - do_gettimeofday(&tv); - if ((signed)(curjiff - bc->modem.ser12.last_jiffies) >= HZ/4) { - /* long inactivity; clear HDLC and DCD */ - bc->modem.ser12.dcd_sum1 = 0; - bc->modem.ser12.dcd_sum2 = 0; - bc->modem.ser12.dcd_sum0 = 2; - bc->modem.ser12.dcd_time = 120; - hdlcdrv_setdcd(&bc->hdrv, 0); - hdlcdrv_putbits(&bc->hdrv, 0xffff); - bc->modem.ser12.last_jiffies = curjiff; - bc->modem.ser12.pll_time = tv.tv_usec; - } - bc->modem.ser12.last_jiffies = curjiff; - timediff = tv.tv_usec + 1000000 - bc->modem.ser12.pll_time; - timediff %= 1000000; - timediff /= 125000/SER12_BAUD; - bc->modem.ser12.pll_time = (bc->modem.ser12.pll_time + timediff * (125000/SER12_BAUD)) % 1000000; - for (; timediff > 1; timediff--) - ser12_rxsample(dev, bc, bc->modem.ser12.last_sample); - if (timediff >= 1) - ser12_rxsample(dev, bc, curs); -} - -/* --------------------------------------------------------------------- */ - -static void ser12_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct device *dev = (struct device *)dev_id; - struct baycom_state *bc = (struct baycom_state *)dev->priv; - unsigned char iir, msr = 0; - unsigned int txcount = 0; - unsigned int rxcount = 0; - - if (!dev || !bc || bc->hdrv.magic != HDLCDRV_MAGIC) - return; - - for (;;) { - iir = inb(IIR(dev->base_addr)); - if (iir & 1) - break; - switch (iir & 6) { - case 6: - inb(LSR(dev->base_addr)); - continue; - - case 4: - inb(RBR(dev->base_addr)); - continue; - - case 2: - /* - * make sure the next interrupt is generated; - * 0 must be used to power the modem; the modem draws its - * power from the TxD line - */ - outb(0x00, THR(dev->base_addr)); - bc->modem.arb_divider--; - baycom_int_freq(bc); - if (hdlcdrv_ptt(&bc->hdrv)) { - /* - * first output the last bit (!) then call HDLC transmitter, - * since this may take quite long - */ - outb(0x0e | (!!bc->modem.ser12.tx_bit), MCR(dev->base_addr)); - txcount++; - } else - outb(0x0d, MCR(dev->base_addr)); /* transmitter off */ - continue; - - default: - msr = inb(MSR(dev->base_addr)); - if (msr & 1) /* delta CTS interrupt */ - rxcount++; - continue; - } - } - if (rxcount) - ser12_rx(dev, bc, msr & 0x10); - if (txcount) { -#ifdef BAYCOM_DEBUG - if (bc->debug_vals.cur_pllcorr < txcount) - bc->debug_vals.cur_pllcorr = txcount; -#endif /* BAYCOM_DEBUG */ - if (bc->modem.ser12.txshreg <= 1) - bc->modem.ser12.txshreg = 0x10000 | hdlcdrv_getbits(&bc->hdrv); - bc->modem.ser12.tx_bit = !(bc->modem.ser12.tx_bit ^ (bc->modem.ser12.txshreg & 1)); - bc->modem.ser12.txshreg >>= 1; - } - sti(); - if (bc->modem.arb_divider <= 0) { - bc->modem.arb_divider = SER12_BAUD/100; - hdlcdrv_arbitrate(dev, &bc->hdrv); - } - hdlcdrv_transmitter(dev, &bc->hdrv); - hdlcdrv_receiver(dev, &bc->hdrv); -} - -/* --------------------------------------------------------------------- */ -#else /* BAYCOM_ALT_SER12 */ - -static void inline ser12_set_divisor(struct device *dev, - unsigned char divisor) -{ - outb(0x81, LCR(dev->base_addr)); /* DLAB = 1 */ - outb(divisor, DLL(dev->base_addr)); - outb(0, DLM(dev->base_addr)); - outb(0x01, LCR(dev->base_addr)); /* word length = 6 */ - /* - * make sure the next interrupt is generated; - * 0 must be used to power the modem; the modem draws its - * power from the TxD line - */ - outb(0x00, THR(dev->base_addr)); - /* - * it is important not to set the divider while transmitting; - * this reportedly makes some UARTs generating interrupts - * in the hundredthousands per second region - * Reported by: Ignacio.Arenaza@studi.epfl.ch (Ignacio Arenaza Nuno) - */ -} - -/* --------------------------------------------------------------------- */ - -/* - * must call the TX arbitrator every 10ms - */ -#define SER12_ARB_DIVIDER(bc) ((bc->options & BAYCOM_OPTIONS_SOFTDCD) ? \ - 36 : 24) -#define SER12_DCD_INTERVAL(bc) ((bc->options & BAYCOM_OPTIONS_SOFTDCD) ? \ - 240 : 12) - -static inline void ser12_tx(struct device *dev, struct baycom_state *bc) -{ - /* one interrupt per channel bit */ - ser12_set_divisor(dev, 12); - /* - * first output the last bit (!) then call HDLC transmitter, - * since this may take quite long - */ - outb(0x0e | (!!bc->modem.ser12.tx_bit), MCR(dev->base_addr)); - if (bc->modem.shreg <= 1) - bc->modem.shreg = 0x10000 | hdlcdrv_getbits(&bc->hdrv); - bc->modem.ser12.tx_bit = !(bc->modem.ser12.tx_bit ^ - (bc->modem.shreg & 1)); - bc->modem.shreg >>= 1; -} - -/* --------------------------------------------------------------------- */ - -static inline void ser12_rx(struct device *dev, struct baycom_state *bc) -{ - unsigned char cur_s; - /* - * do demodulator - */ - cur_s = inb(MSR(dev->base_addr)) & 0x10; /* the CTS line */ - hdlcdrv_channelbit(&bc->hdrv, cur_s); - bc->modem.ser12.dcd_shreg = (bc->modem.ser12.dcd_shreg << 1) | - (cur_s != bc->modem.ser12.last_sample); - bc->modem.ser12.last_sample = cur_s; - if(bc->modem.ser12.dcd_shreg & 1) { - if (bc->options & BAYCOM_OPTIONS_SOFTDCD) { - unsigned int dcdspos, dcdsneg; - - dcdspos = dcdsneg = 0; - dcdspos += ((bc->modem.ser12.dcd_shreg >> 1) & 1); - if (!(bc->modem.ser12.dcd_shreg & 0x7ffffffe)) - dcdspos += 2; - dcdsneg += ((bc->modem.ser12.dcd_shreg >> 2) & 1); - dcdsneg += ((bc->modem.ser12.dcd_shreg >> 3) & 1); - dcdsneg += ((bc->modem.ser12.dcd_shreg >> 4) & 1); - - bc->modem.ser12.dcd_sum0 += 16*dcdspos - dcdsneg; - } else - bc->modem.ser12.dcd_sum0--; - } - if(!bc->modem.ser12.dcd_time) { - hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 + - bc->modem.ser12.dcd_sum1 + - bc->modem.ser12.dcd_sum2) < 0); - bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1; - bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0; - /* offset to ensure DCD off on silent input */ - bc->modem.ser12.dcd_sum0 = 2; - bc->modem.ser12.dcd_time = SER12_DCD_INTERVAL(bc); - } - bc->modem.ser12.dcd_time--; - if (bc->options & BAYCOM_OPTIONS_SOFTDCD) { - /* - * PLL code for the improved software DCD algorithm - */ - if (bc->modem.ser12.interm_sample) { - /* - * intermediate sample; set timing correction to normal - */ - ser12_set_divisor(dev, 4); - } else { - /* - * do PLL correction and call HDLC receiver - */ - switch (bc->modem.ser12.dcd_shreg & 7) { - case 1: /* transition too late */ - ser12_set_divisor(dev, 5); -#ifdef BAYCOM_DEBUG - bc->debug_vals.cur_pllcorr++; -#endif /* BAYCOM_DEBUG */ - break; - case 4: /* transition too early */ - ser12_set_divisor(dev, 3); -#ifdef BAYCOM_DEBUG - bc->debug_vals.cur_pllcorr--; -#endif /* BAYCOM_DEBUG */ - break; - default: - ser12_set_divisor(dev, 4); - break; - } - bc->modem.shreg >>= 1; - if (bc->modem.ser12.last_sample == - bc->modem.ser12.last_rxbit) - bc->modem.shreg |= 0x10000; - bc->modem.ser12.last_rxbit = - bc->modem.ser12.last_sample; - } - if (++bc->modem.ser12.interm_sample >= 3) - bc->modem.ser12.interm_sample = 0; - /* - * DCD stuff - */ - if (bc->modem.ser12.dcd_shreg & 1) { - unsigned int dcdspos, dcdsneg; - - dcdspos = dcdsneg = 0; - dcdspos += ((bc->modem.ser12.dcd_shreg >> 1) & 1); - dcdspos += (!(bc->modem.ser12.dcd_shreg & 0x7ffffffe)) - << 1; - dcdsneg += ((bc->modem.ser12.dcd_shreg >> 2) & 1); - dcdsneg += ((bc->modem.ser12.dcd_shreg >> 3) & 1); - dcdsneg += ((bc->modem.ser12.dcd_shreg >> 4) & 1); - - bc->modem.ser12.dcd_sum0 += 16*dcdspos - dcdsneg; - } - } else { - /* - * PLL algorithm for the hardware squelch DCD algorithm - */ - if (bc->modem.ser12.interm_sample) { - /* - * intermediate sample; set timing correction to normal - */ - ser12_set_divisor(dev, 6); - } else { - /* - * do PLL correction and call HDLC receiver - */ - switch (bc->modem.ser12.dcd_shreg & 3) { - case 1: /* transition too late */ - ser12_set_divisor(dev, 7); -#ifdef BAYCOM_DEBUG - bc->debug_vals.cur_pllcorr++; -#endif /* BAYCOM_DEBUG */ - break; - case 2: /* transition too early */ - ser12_set_divisor(dev, 5); -#ifdef BAYCOM_DEBUG - bc->debug_vals.cur_pllcorr--; -#endif /* BAYCOM_DEBUG */ - break; - default: - ser12_set_divisor(dev, 6); - break; - } - bc->modem.shreg >>= 1; - if (bc->modem.ser12.last_sample == - bc->modem.ser12.last_rxbit) - bc->modem.shreg |= 0x10000; - bc->modem.ser12.last_rxbit = - bc->modem.ser12.last_sample; - } - bc->modem.ser12.interm_sample = !bc->modem.ser12.interm_sample; - /* - * DCD stuff - */ - bc->modem.ser12.dcd_sum0 -= (bc->modem.ser12.dcd_shreg & 1); - } - outb(0x0d, MCR(dev->base_addr)); /* transmitter off */ - if (bc->modem.shreg & 1) { - hdlcdrv_putbits(&bc->hdrv, bc->modem.shreg >> 1); - bc->modem.shreg = 0x10000; - } - if(!bc->modem.ser12.dcd_time) { - hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 + - bc->modem.ser12.dcd_sum1 + - bc->modem.ser12.dcd_sum2) < 0); - bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1; - bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0; - /* offset to ensure DCD off on silent input */ - bc->modem.ser12.dcd_sum0 = 2; - bc->modem.ser12.dcd_time = SER12_DCD_INTERVAL(bc); - } - bc->modem.ser12.dcd_time--; -} - -/* --------------------------------------------------------------------- */ - -static void ser12_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct device *dev = (struct device *)dev_id; - struct baycom_state *bc = (struct baycom_state *)dev->priv; - - if (!dev || !bc || bc->hdrv.magic != HDLCDRV_MAGIC) - return; - - baycom_int_freq(bc); - /* - * check if transmitter active - */ - if (hdlcdrv_ptt(&bc->hdrv)) - ser12_tx(dev, bc); - else { - ser12_rx(dev, bc); - if (--bc->modem.arb_divider <= 0) { - bc->modem.arb_divider = SER12_ARB_DIVIDER(bc); - sti(); - hdlcdrv_arbitrate(dev, &bc->hdrv); - } - } - sti(); - hdlcdrv_transmitter(dev, &bc->hdrv); - hdlcdrv_receiver(dev, &bc->hdrv); -} -#endif /* BAYCOM_ALT_SER12 */ - -/* --------------------------------------------------------------------- */ - -enum uart { c_uart_unknown, c_uart_8250, - c_uart_16450, c_uart_16550, c_uart_16550A}; -static const char *uart_str[] = { - "unknown", "8250", "16450", "16550", "16550A" -}; - -static enum uart ser12_check_uart(unsigned int iobase) -{ - unsigned char b1,b2,b3; - enum uart u; - enum uart uart_tab[] = - { c_uart_16450, c_uart_unknown, c_uart_16550, c_uart_16550A }; - - b1 = inb(MCR(iobase)); - outb(b1 | 0x10, MCR(iobase)); /* loopback mode */ - b2 = inb(MSR(iobase)); - outb(0x1a, MCR(iobase)); - b3 = inb(MSR(iobase)) & 0xf0; - outb(b1, MCR(iobase)); /* restore old values */ - outb(b2, MSR(iobase)); - if (b3 != 0x90) - return c_uart_unknown; - inb(RBR(iobase)); - inb(RBR(iobase)); - outb(0x01, FCR(iobase)); /* enable FIFOs */ - u = uart_tab[(inb(IIR(iobase)) >> 6) & 3]; - if (u == c_uart_16450) { - outb(0x5a, SCR(iobase)); - b1 = inb(SCR(iobase)); - outb(0xa5, SCR(iobase)); - b2 = inb(SCR(iobase)); - if ((b1 != 0x5a) || (b2 != 0xa5)) - u = c_uart_8250; - } - return u; -} - -/* --------------------------------------------------------------------- */ - -static int ser12_open(struct device *dev) -{ - struct baycom_state *bc = (struct baycom_state *)dev->priv; - enum uart u; - - if (!dev || !bc) - return -ENXIO; - if (!dev->base_addr || dev->base_addr > 0x1000-SER12_EXTENT || - dev->irq < 2 || dev->irq > 15) - return -ENXIO; - if (check_region(dev->base_addr, SER12_EXTENT)) - return -EACCES; - memset(&bc->modem, 0, sizeof(bc->modem)); - bc->hdrv.par.bitrate = 1200; - if ((u = ser12_check_uart(dev->base_addr)) == c_uart_unknown) - return -EIO; - outb(0, FCR(dev->base_addr)); /* disable FIFOs */ - outb(0x0d, MCR(dev->base_addr)); - outb(0x0d, MCR(dev->base_addr)); - outb(0, IER(dev->base_addr)); - if (request_irq(dev->irq, ser12_interrupt, SA_INTERRUPT, - "baycom_ser12", dev)) - return -EBUSY; - request_region(dev->base_addr, SER12_EXTENT, "baycom_ser12"); -#ifdef BAYCOM_ALT_SER12 - bc->hdrv.par.bitrate = SER12_BAUD; - /* - * set the SIO to 6 Bits/character and 19600 baud, so that - * we get exactly (hopefully) one interrupt per radio symbol - */ - outb(0x81, LCR(dev->base_addr)); /* DLAB = 1 */ - outb(115200/8/SER12_BAUD, DLL(dev->base_addr)); - outb(0, DLM(dev->base_addr)); - outb(0x01, LCR(dev->base_addr)); /* word length = 6 */ - /* - * enable transmitter empty interrupt and modem status interrupt - */ - outb(0x0a, IER(dev->base_addr)); - /* - * make sure the next interrupt is generated; - * 0 must be used to power the modem; the modem draws its - * power from the TxD line - */ - outb(0x00, THR(dev->base_addr)); - printk(KERN_INFO "%s: ser12(alt modem) at iobase 0x%lx irq %u options " - "0x%x uart %s\n", bc_drvname, dev->base_addr, dev->irq, - bc->options, uart_str[u]); -#else /* BAYCOM_ALT_SER12 */ - /* - * enable transmitter empty interrupt - */ - outb(2, IER(dev->base_addr)); - /* - * set the SIO to 6 Bits/character and 19200 or 28800 baud, so that - * we get exactly (hopefully) 2 or 3 interrupts per radio symbol, - * depending on the usage of the software DCD routine - */ - ser12_set_divisor(dev, (bc->options & BAYCOM_OPTIONS_SOFTDCD) ? 4 : 6); - printk(KERN_INFO "%s: ser12 at iobase 0x%lx irq %u options " - "0x%x uart %s\n", bc_drvname, dev->base_addr, dev->irq, - bc->options, uart_str[u]); -#endif /* BAYCOM_ALT_SER12 */ - MOD_INC_USE_COUNT; - return 0; -} - -/* --------------------------------------------------------------------- */ - -static int ser12_close(struct device *dev) -{ - struct baycom_state *bc = (struct baycom_state *)dev->priv; - - if (!dev || !bc) - return -EINVAL; - /* - * disable interrupts - */ - outb(0, IER(dev->base_addr)); - outb(1, MCR(dev->base_addr)); - free_irq(dev->irq, dev); - release_region(dev->base_addr, SER12_EXTENT); - printk(KERN_INFO "%s: close ser12 at iobase 0x%lx irq %u\n", - bc_drvname, dev->base_addr, dev->irq); - MOD_DEC_USE_COUNT; - return 0; -} - -/* --------------------------------------------------------------------- */ -/* - * ===================== PAR96 specific routines ========================= - */ - -#define PAR96_DESCRAM_TAP1 0x20000 -#define PAR96_DESCRAM_TAP2 0x01000 -#define PAR96_DESCRAM_TAP3 0x00001 - -#define PAR96_DESCRAM_TAPSH1 17 -#define PAR96_DESCRAM_TAPSH2 12 -#define PAR96_DESCRAM_TAPSH3 0 - -#define PAR96_SCRAM_TAP1 0x20000 /* X^17 */ -#define PAR96_SCRAM_TAPN 0x00021 /* X^0+X^5 */ - -/* --------------------------------------------------------------------- */ - -static inline void par96_tx(struct device *dev, struct baycom_state *bc) -{ - int i; - unsigned int data = hdlcdrv_getbits(&bc->hdrv); - - for(i = 0; i < PAR96_BURSTBITS; i++, data >>= 1) { - unsigned char val = PAR97_POWER; - bc->modem.par96.scram = ((bc->modem.par96.scram << 1) | - (bc->modem.par96.scram & 1)); - if (!(data & 1)) - bc->modem.par96.scram ^= 1; - if (bc->modem.par96.scram & (PAR96_SCRAM_TAP1 << 1)) - bc->modem.par96.scram ^= - (PAR96_SCRAM_TAPN << 1); - if (bc->modem.par96.scram & (PAR96_SCRAM_TAP1 << 2)) - val |= PAR96_TXBIT; - outb(val, LPT_DATA(dev->base_addr)); - outb(val | PAR96_BURST, LPT_DATA(dev->base_addr)); - } -} - -/* --------------------------------------------------------------------- */ - -static inline void par96_rx(struct device *dev, struct baycom_state *bc) -{ - int i; - unsigned int data, mask, mask2, descx; - - /* - * do receiver; differential decode and descramble on the fly - */ - for(data = i = 0; i < PAR96_BURSTBITS; i++) { - bc->modem.par96.descram = (bc->modem.par96.descram << 1); - if (inb(LPT_STATUS(dev->base_addr)) & PAR96_RXBIT) - bc->modem.par96.descram |= 1; - descx = bc->modem.par96.descram ^ - (bc->modem.par96.descram >> 1); - /* now the diff decoded data is inverted in descram */ - outb(PAR97_POWER | PAR96_PTT, LPT_DATA(dev->base_addr)); - descx ^= ((descx >> PAR96_DESCRAM_TAPSH1) ^ - (descx >> PAR96_DESCRAM_TAPSH2)); - data >>= 1; - if (!(descx & 1)) - data |= 0x8000; - outb(PAR97_POWER | PAR96_PTT | PAR96_BURST, - LPT_DATA(dev->base_addr)); - } - hdlcdrv_putbits(&bc->hdrv, data); - /* - * do DCD algorithm - */ - if (bc->options & BAYCOM_OPTIONS_SOFTDCD) { - bc->modem.par96.dcd_shreg = (bc->modem.par96.dcd_shreg >> 16) - | (data << 16); - /* search for flags and set the dcd counter appropriately */ - for(mask = 0x1fe00, mask2 = 0xfc00, i = 0; - i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1) - if ((bc->modem.par96.dcd_shreg & mask) == mask2) - bc->modem.par96.dcd_count = HDLCDRV_MAXFLEN+4; - /* check for abort/noise sequences */ - for(mask = 0x1fe00, mask2 = 0x1fe00, i = 0; - i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1) - if (((bc->modem.par96.dcd_shreg & mask) == mask2) && - (bc->modem.par96.dcd_count >= 0)) - bc->modem.par96.dcd_count -= HDLCDRV_MAXFLEN-10; - /* decrement and set the dcd variable */ - if (bc->modem.par96.dcd_count >= 0) - bc->modem.par96.dcd_count -= 2; - hdlcdrv_setdcd(&bc->hdrv, bc->modem.par96.dcd_count > 0); - } else { - hdlcdrv_setdcd(&bc->hdrv, !!(inb(LPT_STATUS(dev->base_addr)) - & PAR96_DCD)); - } -} - -/* --------------------------------------------------------------------- */ - -static void par96_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct device *dev = (struct device *)dev_id; - struct baycom_state *bc = (struct baycom_state *)dev->priv; - - if (!dev || !bc || bc->hdrv.magic != HDLCDRV_MAGIC) - return; - - baycom_int_freq(bc); - /* - * check if transmitter active - */ - if (hdlcdrv_ptt(&bc->hdrv)) - par96_tx(dev, bc); - else { - par96_rx(dev, bc); - if (--bc->modem.arb_divider <= 0) { - bc->modem.arb_divider = 6; - sti(); - hdlcdrv_arbitrate(dev, &bc->hdrv); - } - } - sti(); - hdlcdrv_transmitter(dev, &bc->hdrv); - hdlcdrv_receiver(dev, &bc->hdrv); -} - -/* --------------------------------------------------------------------- */ - -static int par96_check_lpt(unsigned int iobase) -{ - unsigned char b1,b2; - int i; - - b1 = inb(LPT_DATA(iobase)); - b2 = inb(LPT_CONTROL(iobase)); - outb(0xaa, LPT_DATA(iobase)); - i = inb(LPT_DATA(iobase)) == 0xaa; - outb(0x55, LPT_DATA(iobase)); - i &= inb(LPT_DATA(iobase)) == 0x55; - outb(0x0a, LPT_CONTROL(iobase)); - i &= (inb(LPT_CONTROL(iobase)) & 0xf) == 0x0a; - outb(0x05, LPT_CONTROL(iobase)); - i &= (inb(LPT_CONTROL(iobase)) & 0xf) == 0x05; - outb(b1, LPT_DATA(iobase)); - outb(b2, LPT_CONTROL(iobase)); - return !i; -} - -/* --------------------------------------------------------------------- */ - -static int par96_open(struct device *dev) -{ - struct baycom_state *bc = (struct baycom_state *)dev->priv; - - if (!dev || !bc) - return -ENXIO; - if (!dev->base_addr || dev->base_addr > 0x1000-PAR96_EXTENT || - dev->irq < 2 || dev->irq > 15) - return -ENXIO; - if (check_region(dev->base_addr, PAR96_EXTENT)) - return -EACCES; - memset(&bc->modem, 0, sizeof(bc->modem)); - bc->hdrv.par.bitrate = 9600; - if (par96_check_lpt(dev->base_addr)) - return -EIO; - /* disable interrupt */ - outb(0, LPT_CONTROL(dev->base_addr)); - /* switch off PTT */ - outb(PAR96_PTT | PAR97_POWER, LPT_DATA(dev->base_addr)); - printk(KERN_INFO "%s: par96 at iobase 0x%lx irq %u options 0x%x\n", - bc_drvname, dev->base_addr, dev->irq, bc->options); - if (request_irq(dev->irq, par96_interrupt, SA_INTERRUPT, - "baycom_par96", dev)) - return -EBUSY; - request_region(dev->base_addr, PAR96_EXTENT, "baycom_par96"); - /* enable interrupt */ - outb(LPT_IRQ_ENABLE, LPT_CONTROL(dev->base_addr)); - MOD_INC_USE_COUNT; - return 0; -} - -/* --------------------------------------------------------------------- */ - -static int par96_close(struct device *dev) -{ - struct baycom_state *bc = (struct baycom_state *)dev->priv; - - if (!dev || !bc) - return -EINVAL; - /* disable interrupt */ - outb(0, LPT_CONTROL(dev->base_addr)); - /* switch off PTT */ - outb(PAR96_PTT | PAR97_POWER, LPT_DATA(dev->base_addr)); - free_irq(dev->irq, dev); - release_region(dev->base_addr, PAR96_EXTENT); - printk(KERN_INFO "%s: close par96 at iobase 0x%lx irq %u\n", - bc_drvname, dev->base_addr, dev->irq); - MOD_DEC_USE_COUNT; - return 0; -} - -/* --------------------------------------------------------------------- */ -/* - * ===================== hdlcdrv driver interface ========================= - */ - -/* --------------------------------------------------------------------- */ - -static int baycom_ioctl(struct device *dev, struct ifreq *ifr, - struct hdlcdrv_ioctl *hi, int cmd); - -/* --------------------------------------------------------------------- */ - -static struct hdlcdrv_ops ser12_ops = { - bc_drvname, - bc_drvinfo, - ser12_open, - ser12_close, - baycom_ioctl -}; - -/* --------------------------------------------------------------------- */ - -static struct hdlcdrv_ops par96_ops = { - bc_drvname, - bc_drvinfo, - par96_open, - par96_close, - baycom_ioctl -}; - -/* --------------------------------------------------------------------- */ - -static struct hdlcdrv_ops dummy_ops = { - bc_drvname, - bc_drvinfo, - NULL, - NULL, - baycom_ioctl -}; - -/* --------------------------------------------------------------------- */ - -static int baycom_setmode(struct baycom_state *bc, char *modestr) -{ - struct hdlcdrv_ops *newops = NULL; - unsigned long flags; - - if (!strncmp(modestr, "off", 3)) - newops = &dummy_ops; - else if (!strncmp(modestr, "ser12", 5)) - newops = &ser12_ops; - else if (!strncmp(modestr, "par96", 5)) - newops = &par96_ops; - else - return -EINVAL; - save_flags(flags); - cli(); - bc->hdrv.ops = newops; - bc->options = !!strchr(modestr, '*'); - restore_flags(flags); - return 0; -} - -/* --------------------------------------------------------------------- */ - -static int baycom_ioctl(struct device *dev, struct ifreq *ifr, - struct hdlcdrv_ioctl *hi, int cmd) -{ - struct baycom_state *bc; - struct baycom_ioctl bi; - int cmd2; - - if (!dev || !dev->priv || - ((struct baycom_state *)dev->priv)->hdrv.magic != HDLCDRV_MAGIC) { - printk(KERN_ERR "bc_ioctl: invalid device struct\n"); - return -EINVAL; - } - bc = (struct baycom_state *)dev->priv; - - if (cmd != SIOCDEVPRIVATE) - return -ENOIOCTLCMD; - if (get_user(cmd2, (int *)ifr->ifr_data)) - return -EFAULT; - switch (hi->cmd) { - default: - break; - - case HDLCDRVCTL_GETMODE: - if (bc->hdrv.ops == &ser12_ops) - strcpy(hi->data.modename, "ser12"); - else if (bc->hdrv.ops == &par96_ops) - strcpy(hi->data.modename, "par96"); - else if (bc->hdrv.ops == &dummy_ops) - strcpy(hi->data.modename, "off"); - else - strcpy(hi->data.modename, "invalid"); - if (bc->options & 1) - strcat(hi->data.modename, "*"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) - return -EFAULT; - return 0; - - case HDLCDRVCTL_SETMODE: - if (!suser() || dev->start) - return -EACCES; - hi->data.modename[sizeof(hi->data.modename)-1] = '\0'; - return baycom_setmode(bc, hi->data.modename); - - case HDLCDRVCTL_MODELIST: - strcpy(hi->data.modename, "ser12,par96"); - if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) - return -EFAULT; - return 0; - - case HDLCDRVCTL_MODEMPARMASK: - return HDLCDRV_PARMASK_IOBASE | HDLCDRV_PARMASK_IRQ; - - } - - if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) - return -EFAULT; - switch (bi.cmd) { - default: - return -ENOIOCTLCMD; - -#ifdef BAYCOM_DEBUG - case BAYCOMCTL_GETDEBUG: - bi.data.dbg.debug1 = bc->hdrv.ptt_keyed; - bi.data.dbg.debug2 = bc->debug_vals.last_intcnt; - bi.data.dbg.debug3 = bc->debug_vals.last_pllcorr; - break; -#endif /* BAYCOM_DEBUG */ - - } - if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) - return -EFAULT; - return 0; - -} - -/* --------------------------------------------------------------------- */ - -__initfunc(int baycom_init(void)) -{ - int i, j, found = 0; - char set_hw = 1; - struct baycom_state *bc; - char ifname[HDLCDRV_IFNAMELEN]; - - - printk(bc_drvinfo); - /* - * register net devices - */ - for (i = 0; i < NR_PORTS; i++) { - struct device *dev = baycom_device+i; - sprintf(ifname, "bc%d", i); - - if (!baycom_ports[i].mode) - set_hw = 0; - if (!set_hw) - baycom_ports[i].iobase = baycom_ports[i].irq = 0; - j = hdlcdrv_register_hdlcdrv(dev, &dummy_ops, - sizeof(struct baycom_state), - ifname, baycom_ports[i].iobase, - baycom_ports[i].irq, 0); - if (!j) { - bc = (struct baycom_state *)dev->priv; - if (set_hw && baycom_setmode(bc, baycom_ports[i].mode)) - set_hw = 0; - found++; - } else { - printk(KERN_WARNING "%s: cannot register net device\n", - bc_drvname); - } - } - if (!found) - return -ENXIO; - return 0; -} - -/* --------------------------------------------------------------------- */ - -#ifdef MODULE - -/* - * command line settable parameters - */ -static char *mode = NULL; -static int iobase = 0x3f8; -static int irq = 4; - -#if LINUX_VERSION_CODE >= 0x20115 - -MODULE_PARM(mode, "s"); -MODULE_PARM_DESC(mode, "baycom operating mode; eg. ser12* or par96"); -MODULE_PARM(iobase, "i"); -MODULE_PARM_DESC(iobase, "baycom io base address"); -MODULE_PARM(irq, "i"); -MODULE_PARM_DESC(irq, "baycom irq number"); - -MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu"); -MODULE_DESCRIPTION("Baycom ser12, par96 and picpar amateur radio modem driver"); - -#endif - -__initfunc(int init_module(void)) -{ - baycom_ports[0].mode = mode; - baycom_ports[0].iobase = iobase; - baycom_ports[0].irq = irq; - baycom_ports[1].mode = NULL; - - return baycom_init(); -} - -/* --------------------------------------------------------------------- */ - -void cleanup_module(void) -{ - int i; - - for(i = 0; i < NR_PORTS; i++) { - struct device *dev = baycom_device+i; - struct baycom_state *bc = (struct baycom_state *)dev->priv; - - if (bc) { - if (bc->hdrv.magic != HDLCDRV_MAGIC) - printk(KERN_ERR "baycom: invalid magic in " - "cleanup_module\n"); - else - hdlcdrv_unregister_hdlcdrv(dev); - } - } -} - -#else /* MODULE */ -/* --------------------------------------------------------------------- */ -/* - * format: baycom=io,irq,mode - * mode: {ser12,par96}[*] - * * indicates sofware DCD - */ - -__initfunc(void baycom_setup(char *str, int *ints)) -{ - int i; - - for (i = 0; (i < NR_PORTS) && (baycom_ports[i].mode); i++); - if ((i >= NR_PORTS) || (ints[0] < 2)) { - printk(KERN_INFO "%s: too many or invalid interface " - "specifications\n", bc_drvname); - return; - } - baycom_ports[i].mode = str; - baycom_ports[i].iobase = ints[1]; - baycom_ports[i].irq = ints[2]; - if (i < NR_PORTS-1) - baycom_ports[i+1].mode = NULL; -} - -#endif /* MODULE */ -/* --------------------------------------------------------------------- */ diff -u --recursive --new-file v2.1.67/linux/drivers/net/baycom_par.c linux/drivers/net/baycom_par.c --- v2.1.67/linux/drivers/net/baycom_par.c Wed Dec 31 16:00:00 1969 +++ linux/drivers/net/baycom_par.c Sun Nov 30 10:30:19 1997 @@ -0,0 +1,661 @@ +/*****************************************************************************/ + +/* + * baycom_par.c -- baycom par96 and picpar radio modem driver. + * + * Copyright (C) 1997 Thomas Sailer (sailer@ife.ee.ethz.ch) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Please note that the GPL allows you to use the driver, NOT the radio. + * In order to use the radio, you need a license from the communications + * authority of your country. + * + * + * Supported modems + * + * par96: This is a modem for 9600 baud FSK compatible to the G3RUH standard. + * The modem does all the filtering and regenerates the receiver clock. + * Data is transferred from and to the PC via a shift register. + * The shift register is filled with 16 bits and an interrupt is + * signalled. The PC then empties the shift register in a burst. This + * modem connects to the parallel port, hence the name. The modem + * leaves the implementation of the HDLC protocol and the scrambler + * polynomial to the PC. This modem is no longer available (at least + * from Baycom) and has been replaced by the PICPAR modem (see below). + * You may however still build one from the schematics published in + * cq-DL :-). + * + * picpar: This is a redesign of the par96 modem by Henning Rech, DF9IC. The + * modem is protocol compatible to par96, but uses only three low + * power ICs and can therefore be fed from the parallel port and + * does not require an additional power supply. It features + * built in DCD circuitry. The driver should therefore be configured + * for hardware DCD. + * + * + * Command line options (insmod command line) + * + * mode driver mode string. Valid choices are par96 and picpar. + * iobase base address of the port; common values are 0x378, 0x278, 0x3bc + * + * + * History: + * 0.1 26.06.96 Adapted from baycom.c and made network driver interface + * 18.10.96 Changed to new user space access routines (copy_{to,from}_user) + * 0.3 26.04.97 init code/data tagged + * 0.4 08.07.97 alternative ser12 decoding algorithm (uses delta CTS ints) + * 0.5 11.11.97 split into separate files for ser12/par96 + */ + +/*****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* --------------------------------------------------------------------- */ + +/* + * currently this module is supposed to support both module styles, i.e. + * the old one present up to about 2.1.9, and the new one functioning + * starting with 2.1.21. The reason is I have a kit allowing to compile + * this module also under 2.0.x which was requested by several people. + * This will go in 2.2 + */ +#include + +#if LINUX_VERSION_CODE >= 0x20100 +#include +#else +#include +#include + +#undef put_user +#undef get_user + +#define put_user(x,ptr) ({ __put_user((unsigned long)(x),(ptr),sizeof(*(ptr))); 0; }) +#define get_user(x,ptr) ({ x = ((__typeof__(*(ptr)))__get_user((ptr),sizeof(*(ptr)))); 0; }) + +extern __inline__ int copy_from_user(void *to, const void *from, unsigned long n) +{ + int i = verify_area(VERIFY_READ, from, n); + if (i) + return i; + memcpy_fromfs(to, from, n); + return 0; +} + +extern __inline__ int copy_to_user(void *to, const void *from, unsigned long n) +{ + int i = verify_area(VERIFY_WRITE, to, n); + if (i) + return i; + memcpy_tofs(to, from, n); + return 0; +} +#endif + +#if LINUX_VERSION_CODE >= 0x20123 +#include +#else +#define __init +#define __initdata +#define __initfunc(x) x +#endif + +/* --------------------------------------------------------------------- */ + +#define BAYCOM_DEBUG + +/* + * modem options; bit mask + */ +#define BAYCOM_OPTIONS_SOFTDCD 1 + +/* --------------------------------------------------------------------- */ + +static const char bc_drvname[] = "baycom_par"; +static const char bc_drvinfo[] = KERN_INFO "baycom_par: (C) 1997 Thomas Sailer, HB9JNX/AE4WA\n" +KERN_INFO "baycom_par: version 0.5 compiled " __TIME__ " " __DATE__ "\n"; + +/* --------------------------------------------------------------------- */ + +#define NR_PORTS 4 + +static struct device baycom_device[NR_PORTS]; + +static struct { + const char *mode; + int iobase; +} baycom_ports[NR_PORTS] = { { NULL, 0 }, }; + +/* --------------------------------------------------------------------- */ + +#define SER12_EXTENT 8 + +#define LPT_DATA(dev) ((dev)->base_addr+0) +#define LPT_STATUS(dev) ((dev)->base_addr+1) +#define LPT_CONTROL(dev) ((dev)->base_addr+2) +#define LPT_IRQ_ENABLE 0x10 + +#define PAR96_BURSTBITS 16 +#define PAR96_BURST 4 +#define PAR96_PTT 2 +#define PAR96_TXBIT 1 +#define PAR96_ACK 0x40 +#define PAR96_RXBIT 0x20 +#define PAR96_DCD 0x10 +#define PAR97_POWER 0xf8 + +/* ---------------------------------------------------------------------- */ +/* + * Information that need to be kept for each board. + */ + +struct baycom_state { + struct hdlcdrv_state hdrv; + + struct pardevice *pdev; + unsigned int options; + + struct modem_state { + short arb_divider; + unsigned char flags; + unsigned int shreg; + struct modem_state_par96 { + int dcd_count; + unsigned int dcd_shreg; + unsigned long descram; + unsigned long scram; + } par96; + } modem; + +#ifdef BAYCOM_DEBUG + struct debug_vals { + unsigned long last_jiffies; + unsigned cur_intcnt; + unsigned last_intcnt; + int cur_pllcorr; + int last_pllcorr; + } debug_vals; +#endif /* BAYCOM_DEBUG */ +}; + +/* --------------------------------------------------------------------- */ + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +/* --------------------------------------------------------------------- */ + +static void __inline__ baycom_int_freq(struct baycom_state *bc) +{ +#ifdef BAYCOM_DEBUG + unsigned long cur_jiffies = jiffies; + /* + * measure the interrupt frequency + */ + bc->debug_vals.cur_intcnt++; + if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { + bc->debug_vals.last_jiffies = cur_jiffies; + bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; + bc->debug_vals.cur_intcnt = 0; + bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr; + bc->debug_vals.cur_pllcorr = 0; + } +#endif /* BAYCOM_DEBUG */ +} + +/* --------------------------------------------------------------------- */ +/* + * ===================== PAR96 specific routines ========================= + */ + +#define PAR96_DESCRAM_TAP1 0x20000 +#define PAR96_DESCRAM_TAP2 0x01000 +#define PAR96_DESCRAM_TAP3 0x00001 + +#define PAR96_DESCRAM_TAPSH1 17 +#define PAR96_DESCRAM_TAPSH2 12 +#define PAR96_DESCRAM_TAPSH3 0 + +#define PAR96_SCRAM_TAP1 0x20000 /* X^17 */ +#define PAR96_SCRAM_TAPN 0x00021 /* X^0+X^5 */ + +/* --------------------------------------------------------------------- */ + +static __inline__ void par96_tx(struct device *dev, struct baycom_state *bc) +{ + int i; + unsigned int data = hdlcdrv_getbits(&bc->hdrv); + + for(i = 0; i < PAR96_BURSTBITS; i++, data >>= 1) { + unsigned char val = PAR97_POWER; + bc->modem.par96.scram = ((bc->modem.par96.scram << 1) | + (bc->modem.par96.scram & 1)); + if (!(data & 1)) + bc->modem.par96.scram ^= 1; + if (bc->modem.par96.scram & (PAR96_SCRAM_TAP1 << 1)) + bc->modem.par96.scram ^= + (PAR96_SCRAM_TAPN << 1); + if (bc->modem.par96.scram & (PAR96_SCRAM_TAP1 << 2)) + val |= PAR96_TXBIT; + outb(val, LPT_DATA(dev)); + outb(val | PAR96_BURST, LPT_DATA(dev)); + } +} + +/* --------------------------------------------------------------------- */ + +static __inline__ void par96_rx(struct device *dev, struct baycom_state *bc) +{ + int i; + unsigned int data, mask, mask2, descx; + + /* + * do receiver; differential decode and descramble on the fly + */ + for(data = i = 0; i < PAR96_BURSTBITS; i++) { + bc->modem.par96.descram = (bc->modem.par96.descram << 1); + if (inb(LPT_STATUS(dev)) & PAR96_RXBIT) + bc->modem.par96.descram |= 1; + descx = bc->modem.par96.descram ^ + (bc->modem.par96.descram >> 1); + /* now the diff decoded data is inverted in descram */ + outb(PAR97_POWER | PAR96_PTT, LPT_DATA(dev)); + descx ^= ((descx >> PAR96_DESCRAM_TAPSH1) ^ + (descx >> PAR96_DESCRAM_TAPSH2)); + data >>= 1; + if (!(descx & 1)) + data |= 0x8000; + outb(PAR97_POWER | PAR96_PTT | PAR96_BURST, LPT_DATA(dev)); + } + hdlcdrv_putbits(&bc->hdrv, data); + /* + * do DCD algorithm + */ + if (bc->options & BAYCOM_OPTIONS_SOFTDCD) { + bc->modem.par96.dcd_shreg = (bc->modem.par96.dcd_shreg >> 16) + | (data << 16); + /* search for flags and set the dcd counter appropriately */ + for(mask = 0x1fe00, mask2 = 0xfc00, i = 0; + i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1) + if ((bc->modem.par96.dcd_shreg & mask) == mask2) + bc->modem.par96.dcd_count = HDLCDRV_MAXFLEN+4; + /* check for abort/noise sequences */ + for(mask = 0x1fe00, mask2 = 0x1fe00, i = 0; + i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1) + if (((bc->modem.par96.dcd_shreg & mask) == mask2) && + (bc->modem.par96.dcd_count >= 0)) + bc->modem.par96.dcd_count -= HDLCDRV_MAXFLEN-10; + /* decrement and set the dcd variable */ + if (bc->modem.par96.dcd_count >= 0) + bc->modem.par96.dcd_count -= 2; + hdlcdrv_setdcd(&bc->hdrv, bc->modem.par96.dcd_count > 0); + } else { + hdlcdrv_setdcd(&bc->hdrv, !!(inb(LPT_STATUS(dev)) & PAR96_DCD)); + } +} + +/* --------------------------------------------------------------------- */ + +static void par96_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct parport *pp = (struct parport *)dev_id; + struct pardevice *pd = pp->cad; + struct device *dev = (struct device *)pd->private; + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (!dev || !bc || bc->hdrv.magic != HDLCDRV_MAGIC) + return; + + baycom_int_freq(bc); + /* + * check if transmitter active + */ + if (hdlcdrv_ptt(&bc->hdrv)) + par96_tx(dev, bc); + else { + par96_rx(dev, bc); + if (--bc->modem.arb_divider <= 0) { + bc->modem.arb_divider = 6; + sti(); + hdlcdrv_arbitrate(dev, &bc->hdrv); + } + } + sti(); + hdlcdrv_transmitter(dev, &bc->hdrv); + hdlcdrv_receiver(dev, &bc->hdrv); +} + +/* --------------------------------------------------------------------- */ + +static int par96_preempt(void *handle) +{ + /* we cannot relinquish the port in the middle of an operation */ + return 1; +} + +/* --------------------------------------------------------------------- */ + +static void par96_wakeup(void *handle) +{ + struct device *dev = (struct device *)handle; + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + printk(KERN_DEBUG "baycom_par: %s: why am I being woken up?\n", dev->name); + if (!parport_claim(bc->pdev)) + printk(KERN_DEBUG "baycom_par: %s: I'm broken.\n", dev->name); +} + +/* --------------------------------------------------------------------- */ + +static int par96_open(struct device *dev) +{ + struct baycom_state *bc = (struct baycom_state *)dev->priv; + struct parport *pp = parport_enumerate(); + + if (!dev || !bc) + return -ENXIO; + while (pp && pp->base != dev->base_addr) + pp = pp->next; + if (!pp) { + printk(KERN_ERR "baycom_par: parport at 0x%lx unknown\n", dev->base_addr); + return -ENXIO; + } + if (pp->irq < 0) { + printk(KERN_ERR "baycom_par: parport at 0x%x has no irq\n", pp->base); + return -ENXIO; + } + memset(&bc->modem, 0, sizeof(bc->modem)); + bc->hdrv.par.bitrate = 9600; + if (!(bc->pdev = parport_register_device(pp, dev->name, par96_preempt, par96_wakeup, + par96_interrupt, PARPORT_DEV_LURK, dev))) { + printk(KERN_ERR "baycom_par: cannot register parport at 0x%x\n", pp->base); + return -ENXIO; + } + if (parport_claim(bc->pdev)) { + printk(KERN_ERR "baycom_par: parport at 0x%x busy\n", pp->base); + parport_unregister_device(bc->pdev); + return -EBUSY; + } + dev->irq = pp->irq; + /* bc->pdev->port->ops->change_mode(bc->pdev->port, PARPORT_MODE_PCSPP); not yet implemented */ + /* switch off PTT */ + outb(PAR96_PTT | PAR97_POWER, LPT_DATA(dev)); + /*bc->pdev->port->ops->enable_irq(bc->pdev->port); not yet implemented */ + outb(LPT_IRQ_ENABLE, LPT_CONTROL(dev)); + printk(KERN_INFO "%s: par96 at iobase 0x%lx irq %u options 0x%x\n", + bc_drvname, dev->base_addr, dev->irq, bc->options); + MOD_INC_USE_COUNT; + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int par96_close(struct device *dev) +{ + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (!dev || !bc) + return -EINVAL; + /* disable interrupt */ + outb(0, LPT_CONTROL(dev)); + /*bc->pdev->port->ops->disable_irq(bc->pdev->port); not yet implemented */ + /* switch off PTT */ + outb(PAR96_PTT | PAR97_POWER, LPT_DATA(dev)); + parport_release(bc->pdev); + parport_unregister_device(bc->pdev); + printk(KERN_INFO "%s: close par96 at iobase 0x%lx irq %u\n", + bc_drvname, dev->base_addr, dev->irq); + MOD_DEC_USE_COUNT; + return 0; +} + +/* --------------------------------------------------------------------- */ +/* + * ===================== hdlcdrv driver interface ========================= + */ + +static int baycom_ioctl(struct device *dev, struct ifreq *ifr, + struct hdlcdrv_ioctl *hi, int cmd); + +/* --------------------------------------------------------------------- */ + +static struct hdlcdrv_ops par96_ops = { + bc_drvname, + bc_drvinfo, + par96_open, + par96_close, + baycom_ioctl +}; + +/* --------------------------------------------------------------------- */ + +static int baycom_setmode(struct baycom_state *bc, const char *modestr) +{ + if (!strncmp(modestr, "picpar", 6)) + bc->options = 0; + else if (!strncmp(modestr, "par96", 5)) + bc->options = BAYCOM_OPTIONS_SOFTDCD; + else + bc->options = !!strchr(modestr, '*'); + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int baycom_ioctl(struct device *dev, struct ifreq *ifr, + struct hdlcdrv_ioctl *hi, int cmd) +{ + struct baycom_state *bc; + struct baycom_ioctl bi; + int cmd2; + + if (!dev || !dev->priv || + ((struct baycom_state *)dev->priv)->hdrv.magic != HDLCDRV_MAGIC) { + printk(KERN_ERR "bc_ioctl: invalid device struct\n"); + return -EINVAL; + } + bc = (struct baycom_state *)dev->priv; + + if (cmd != SIOCDEVPRIVATE) + return -ENOIOCTLCMD; + if (get_user(cmd2, (int *)ifr->ifr_data)) + return -EFAULT; + switch (hi->cmd) { + default: + break; + + case HDLCDRVCTL_GETMODE: + strcpy(hi->data.modename, bc->options ? "par96" : "picpar"); + if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + return -EFAULT; + return 0; + + case HDLCDRVCTL_SETMODE: + if (!suser() || dev->start) + return -EACCES; + hi->data.modename[sizeof(hi->data.modename)-1] = '\0'; + return baycom_setmode(bc, hi->data.modename); + + case HDLCDRVCTL_MODELIST: + strcpy(hi->data.modename, "par96,picpar"); + if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + return -EFAULT; + return 0; + + case HDLCDRVCTL_MODEMPARMASK: + return HDLCDRV_PARMASK_IOBASE; + + } + + if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) + return -EFAULT; + switch (bi.cmd) { + default: + return -ENOIOCTLCMD; + +#ifdef BAYCOM_DEBUG + case BAYCOMCTL_GETDEBUG: + bi.data.dbg.debug1 = bc->hdrv.ptt_keyed; + bi.data.dbg.debug2 = bc->debug_vals.last_intcnt; + bi.data.dbg.debug3 = bc->debug_vals.last_pllcorr; + break; +#endif /* BAYCOM_DEBUG */ + + } + if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) + return -EFAULT; + return 0; + +} + +/* --------------------------------------------------------------------- */ + +__initfunc(int baycom_par_init(void)) +{ + int i, j, found = 0; + char set_hw = 1; + struct baycom_state *bc; + char ifname[HDLCDRV_IFNAMELEN]; + + + printk(bc_drvinfo); + /* + * register net devices + */ + for (i = 0; i < NR_PORTS; i++) { + struct device *dev = baycom_device+i; + sprintf(ifname, "bcp%d", i); + + if (!baycom_ports[i].mode) + set_hw = 0; + if (!set_hw) + baycom_ports[i].iobase = 0; + j = hdlcdrv_register_hdlcdrv(dev, &par96_ops, + sizeof(struct baycom_state), + ifname, baycom_ports[i].iobase, 0, 0); + if (!j) { + bc = (struct baycom_state *)dev->priv; + if (set_hw && baycom_setmode(bc, baycom_ports[i].mode)) + set_hw = 0; + found++; + } else { + printk(KERN_WARNING "%s: cannot register net device\n", + bc_drvname); + } + } + if (!found) + return -ENXIO; + return 0; +} + +/* --------------------------------------------------------------------- */ + +#ifdef MODULE + +/* + * command line settable parameters + */ +static const char *mode[NR_PORTS] = { "picpar", }; +static int iobase[NR_PORTS] = { 0x378, }; + +#if LINUX_VERSION_CODE >= 0x20115 + +MODULE_PARM(mode, "1-" __MODULE_STRING(NR_PORTS) "s"); +MODULE_PARM_DESC(mode, "baycom operating mode; eg. par96 or picpar"); +MODULE_PARM(iobase, "1-" __MODULE_STRING(NR_PORTS) "i"); +MODULE_PARM_DESC(iobase, "baycom io base address"); + +MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu"); +MODULE_DESCRIPTION("Baycom par96 and picpar amateur radio modem driver"); + +#endif + +__initfunc(int init_module(void)) +{ + int i; + + for (i = 0; (i < NR_PORTS) && (mode[i]); i++) { + baycom_ports[i].mode = mode[i]; + baycom_ports[i].iobase = iobase[i]; + } + if (i < NR_PORTS-1) + baycom_ports[i+1].mode = NULL; + return baycom_par_init(); +} + +/* --------------------------------------------------------------------- */ + +void cleanup_module(void) +{ + int i; + + for(i = 0; i < NR_PORTS; i++) { + struct device *dev = baycom_device+i; + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (bc) { + if (bc->hdrv.magic != HDLCDRV_MAGIC) + printk(KERN_ERR "baycom: invalid magic in " + "cleanup_module\n"); + else + hdlcdrv_unregister_hdlcdrv(dev); + } + } +} + +#else /* MODULE */ +/* --------------------------------------------------------------------- */ +/* + * format: baycom_par=io,mode + * mode: par96,picpar + */ + +__initfunc(void baycom_par_setup(char *str, int *ints)) +{ + int i; + + for (i = 0; (i < NR_PORTS) && (baycom_ports[i].mode); i++); + if ((i >= NR_PORTS) || (ints[0] < 1)) { + printk(KERN_INFO "%s: too many or invalid interface " + "specifications\n", bc_drvname); + return; + } + baycom_ports[i].mode = str; + baycom_ports[i].iobase = ints[1]; + if (i < NR_PORTS-1) + baycom_ports[i+1].mode = NULL; +} + +#endif /* MODULE */ +/* --------------------------------------------------------------------- */ diff -u --recursive --new-file v2.1.67/linux/drivers/net/baycom_ser_fdx.c linux/drivers/net/baycom_ser_fdx.c --- v2.1.67/linux/drivers/net/baycom_ser_fdx.c Wed Dec 31 16:00:00 1969 +++ linux/drivers/net/baycom_ser_fdx.c Sun Nov 30 10:30:19 1997 @@ -0,0 +1,762 @@ +/*****************************************************************************/ + +/* + * baycom_ser_fdx.c -- baycom ser12 fullduplex radio modem driver. + * + * Copyright (C) 1997 Thomas Sailer (sailer@ife.ee.ethz.ch) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Please note that the GPL allows you to use the driver, NOT the radio. + * In order to use the radio, you need a license from the communications + * authority of your country. + * + * + * Supported modems + * + * ser12: This is a very simple 1200 baud AFSK modem. The modem consists only + * of a modulator/demodulator chip, usually a TI TCM3105. The computer + * is responsible for regenerating the receiver bit clock, as well as + * for handling the HDLC protocol. The modem connects to a serial port, + * hence the name. Since the serial port is not used as an async serial + * port, the kernel driver for serial ports cannot be used, and this + * driver only supports standard serial hardware (8250, 16450, 16550A) + * + * + * Command line options (insmod command line) + * + * mode * enables software DCD. + * iobase base address of the port; common values are 0x3f8, 0x2f8, 0x3e8, 0x2e8 + * baud baud rate (between 300 and 4800) + * irq interrupt line of the port; common values are 4,3 + * + * + * History: + * 0.1 26.06.96 Adapted from baycom.c and made network driver interface + * 18.10.96 Changed to new user space access routines (copy_{to,from}_user) + * 0.3 26.04.97 init code/data tagged + * 0.4 08.07.97 alternative ser12 decoding algorithm (uses delta CTS ints) + * 0.5 11.11.97 ser12/par96 split into separate files + */ + +/*****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* --------------------------------------------------------------------- */ + +/* + * currently this module is supposed to support both module styles, i.e. + * the old one present up to about 2.1.9, and the new one functioning + * starting with 2.1.21. The reason is I have a kit allowing to compile + * this module also under 2.0.x which was requested by several people. + * This will go in 2.2 + */ +#include + +#if LINUX_VERSION_CODE >= 0x20100 +#include +#else +#include +#include + +#undef put_user +#undef get_user + +#define put_user(x,ptr) ({ __put_user((unsigned long)(x),(ptr),sizeof(*(ptr))); 0; }) +#define get_user(x,ptr) ({ x = ((__typeof__(*(ptr)))__get_user((ptr),sizeof(*(ptr)))); 0; }) + +extern inline int copy_from_user(void *to, const void *from, unsigned long n) +{ + int i = verify_area(VERIFY_READ, from, n); + if (i) + return i; + memcpy_fromfs(to, from, n); + return 0; +} + +extern inline int copy_to_user(void *to, const void *from, unsigned long n) +{ + int i = verify_area(VERIFY_WRITE, to, n); + if (i) + return i; + memcpy_tofs(to, from, n); + return 0; +} +#endif + +#if LINUX_VERSION_CODE >= 0x20123 +#include +#else +#define __init +#define __initdata +#define __initfunc(x) x +#endif + +/* --------------------------------------------------------------------- */ + +#define BAYCOM_DEBUG + +/* + * modem options; bit mask + */ +#define BAYCOM_OPTIONS_SOFTDCD 1 + +/* --------------------------------------------------------------------- */ + +static const char bc_drvname[] = "baycom_ser_fdx"; +static const char bc_drvinfo[] = KERN_INFO "baycom_ser_fdx: (C) 1997 Thomas Sailer, HB9JNX/AE4WA\n" +KERN_INFO "baycom_ser_fdx: version 0.5 compiled " __TIME__ " " __DATE__ "\n"; + +/* --------------------------------------------------------------------- */ + +#define NR_PORTS 4 + +static struct device baycom_device[NR_PORTS]; + +static struct { + char *mode; + int iobase, irq, baud; +} baycom_ports[NR_PORTS] = { { NULL, 0, 0 }, }; + +/* --------------------------------------------------------------------- */ + +#define RBR(iobase) (iobase+0) +#define THR(iobase) (iobase+0) +#define IER(iobase) (iobase+1) +#define IIR(iobase) (iobase+2) +#define FCR(iobase) (iobase+2) +#define LCR(iobase) (iobase+3) +#define MCR(iobase) (iobase+4) +#define LSR(iobase) (iobase+5) +#define MSR(iobase) (iobase+6) +#define SCR(iobase) (iobase+7) +#define DLL(iobase) (iobase+0) +#define DLM(iobase) (iobase+1) + +#define SER12_EXTENT 8 + +/* ---------------------------------------------------------------------- */ +/* + * Information that need to be kept for each board. + */ + +struct baycom_state { + struct hdlcdrv_state hdrv; + + unsigned int baud, baud_us8, baud_arbdiv; + unsigned int options; + + struct modem_state { + short arb_divider; + unsigned char flags; + unsigned int shreg; + struct modem_state_ser12 { + unsigned char tx_bit; + int dcd_sum0, dcd_sum1, dcd_sum2; + unsigned char last_sample; + unsigned char last_rxbit; + unsigned int dcd_shreg; + unsigned int dcd_time; + unsigned int bit_pll; + unsigned long last_jiffies; + unsigned int pll_time; + unsigned int txshreg; + } ser12; + } modem; + +#ifdef BAYCOM_DEBUG + struct debug_vals { + unsigned long last_jiffies; + unsigned cur_intcnt; + unsigned last_intcnt; + int cur_pllcorr; + int last_pllcorr; + } debug_vals; +#endif /* BAYCOM_DEBUG */ +}; + +/* --------------------------------------------------------------------- */ + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +/* --------------------------------------------------------------------- */ + +static void inline baycom_int_freq(struct baycom_state *bc) +{ +#ifdef BAYCOM_DEBUG + unsigned long cur_jiffies = jiffies; + /* + * measure the interrupt frequency + */ + bc->debug_vals.cur_intcnt++; + if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { + bc->debug_vals.last_jiffies = cur_jiffies; + bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; + bc->debug_vals.cur_intcnt = 0; + bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr; + bc->debug_vals.cur_pllcorr = 0; + } +#endif /* BAYCOM_DEBUG */ +} + +/* --------------------------------------------------------------------- */ +/* + * ===================== SER12 specific routines ========================= + */ + +/* --------------------------------------------------------------------- */ + +extern inline unsigned int hweight16(unsigned short w) + __attribute__ ((unused)); +extern inline unsigned int hweight8(unsigned char w) + __attribute__ ((unused)); + +extern inline unsigned int hweight16(unsigned short w) +{ + unsigned short res = (w & 0x5555) + ((w >> 1) & 0x5555); + res = (res & 0x3333) + ((res >> 2) & 0x3333); + res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F); + return (res & 0x00FF) + ((res >> 8) & 0x00FF); +} + +extern inline unsigned int hweight8(unsigned char w) +{ + unsigned short res = (w & 0x55) + ((w >> 1) & 0x55); + res = (res & 0x33) + ((res >> 2) & 0x33); + return (res & 0x0F) + ((res >> 4) & 0x0F); +} + +/* --------------------------------------------------------------------- */ + +static __inline__ void ser12_rxsample(struct device *dev, struct baycom_state *bc, unsigned char news) +{ + bc->modem.ser12.dcd_shreg <<= 1; + bc->modem.ser12.bit_pll += 0x2000; + if (bc->modem.ser12.last_sample != news) { + bc->modem.ser12.last_sample = news; + bc->modem.ser12.dcd_shreg |= 1; + if (bc->modem.ser12.bit_pll < 0x9000) + bc->modem.ser12.bit_pll += 0x1000; + else + bc->modem.ser12.bit_pll -= 0x1000; + bc->modem.ser12.dcd_sum0 += 4 * hweight8(bc->modem.ser12.dcd_shreg & 0x38) + - hweight16(bc->modem.ser12.dcd_shreg & 0x7c0); + } + hdlcdrv_channelbit(&bc->hdrv, !!bc->modem.ser12.last_sample); + if ((--bc->modem.ser12.dcd_time) <= 0) { + hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 + + bc->modem.ser12.dcd_sum1 + + bc->modem.ser12.dcd_sum2) < 0); + bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1; + bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0; + bc->modem.ser12.dcd_sum0 = 2; /* slight bias */ + bc->modem.ser12.dcd_time = 120; + } + if (bc->modem.ser12.bit_pll >= 0x10000) { + bc->modem.ser12.bit_pll &= 0xffff; + bc->modem.shreg >>= 1; + if (bc->modem.ser12.last_rxbit == bc->modem.ser12.last_sample) + bc->modem.shreg |= 0x10000; + bc->modem.ser12.last_rxbit = bc->modem.ser12.last_sample; + if (bc->modem.shreg & 1) { + hdlcdrv_putbits(&bc->hdrv, bc->modem.shreg >> 1); + bc->modem.shreg = 0x10000; + } + } +} + +/* --------------------------------------------------------------------- */ + +static __inline__ void ser12_rx(struct device *dev, struct baycom_state *bc, unsigned char curs) +{ + unsigned long curjiff; + struct timeval tv; + unsigned int timediff; + + /* + * get current time + */ + curjiff = jiffies; + do_gettimeofday(&tv); + if ((signed)(curjiff - bc->modem.ser12.last_jiffies) >= HZ/4) { + /* long inactivity; clear HDLC and DCD */ + bc->modem.ser12.dcd_sum1 = 0; + bc->modem.ser12.dcd_sum2 = 0; + bc->modem.ser12.dcd_sum0 = 2; + bc->modem.ser12.dcd_time = 120; + hdlcdrv_setdcd(&bc->hdrv, 0); + hdlcdrv_putbits(&bc->hdrv, 0xffff); + bc->modem.ser12.last_jiffies = curjiff; + bc->modem.ser12.pll_time = tv.tv_usec; + } + bc->modem.ser12.last_jiffies = curjiff; + timediff = tv.tv_usec + 1000000 - bc->modem.ser12.pll_time; + timediff %= 1000000; + timediff /= bc->baud_us8; + bc->modem.ser12.pll_time = (bc->modem.ser12.pll_time + timediff * (bc->baud_us8)) % 1000000; + for (; timediff > 1; timediff--) + ser12_rxsample(dev, bc, bc->modem.ser12.last_sample); + if (timediff >= 1) + ser12_rxsample(dev, bc, curs); +} + +/* --------------------------------------------------------------------- */ + +static void ser12_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct device *dev = (struct device *)dev_id; + struct baycom_state *bc = (struct baycom_state *)dev->priv; + unsigned char iir, msr = 0; + unsigned int txcount = 0; + unsigned int rxcount = 0; + + if (!dev || !bc || bc->hdrv.magic != HDLCDRV_MAGIC) + return; + + for (;;) { + iir = inb(IIR(dev->base_addr)); + if (iir & 1) + break; + switch (iir & 6) { + case 6: + inb(LSR(dev->base_addr)); + continue; + + case 4: + inb(RBR(dev->base_addr)); + continue; + + case 2: + /* + * make sure the next interrupt is generated; + * 0 must be used to power the modem; the modem draws its + * power from the TxD line + */ + outb(0x00, THR(dev->base_addr)); + bc->modem.arb_divider--; + baycom_int_freq(bc); + if (hdlcdrv_ptt(&bc->hdrv)) { + /* + * first output the last bit (!) then call HDLC transmitter, + * since this may take quite long + */ + outb(0x0e | (!!bc->modem.ser12.tx_bit), MCR(dev->base_addr)); + txcount++; + } else + outb(0x0d, MCR(dev->base_addr)); /* transmitter off */ + continue; + + default: + msr = inb(MSR(dev->base_addr)); + if (msr & 1) /* delta CTS interrupt */ + rxcount++; + continue; + } + } + if (rxcount) + ser12_rx(dev, bc, msr & 0x10); + if (txcount) { +#ifdef BAYCOM_DEBUG + if (bc->debug_vals.cur_pllcorr < txcount) + bc->debug_vals.cur_pllcorr = txcount; +#endif /* BAYCOM_DEBUG */ + if (bc->modem.ser12.txshreg <= 1) + bc->modem.ser12.txshreg = 0x10000 | hdlcdrv_getbits(&bc->hdrv); + bc->modem.ser12.tx_bit = !(bc->modem.ser12.tx_bit ^ (bc->modem.ser12.txshreg & 1)); + bc->modem.ser12.txshreg >>= 1; + } + sti(); + if (bc->modem.arb_divider <= 0) { + bc->modem.arb_divider = bc->baud_arbdiv; + hdlcdrv_arbitrate(dev, &bc->hdrv); + } + hdlcdrv_transmitter(dev, &bc->hdrv); + hdlcdrv_receiver(dev, &bc->hdrv); +} + +/* --------------------------------------------------------------------- */ + +enum uart { c_uart_unknown, c_uart_8250, + c_uart_16450, c_uart_16550, c_uart_16550A}; +static const char *uart_str[] = { + "unknown", "8250", "16450", "16550", "16550A" +}; + +static enum uart ser12_check_uart(unsigned int iobase) +{ + unsigned char b1,b2,b3; + enum uart u; + enum uart uart_tab[] = + { c_uart_16450, c_uart_unknown, c_uart_16550, c_uart_16550A }; + + b1 = inb(MCR(iobase)); + outb(b1 | 0x10, MCR(iobase)); /* loopback mode */ + b2 = inb(MSR(iobase)); + outb(0x1a, MCR(iobase)); + b3 = inb(MSR(iobase)) & 0xf0; + outb(b1, MCR(iobase)); /* restore old values */ + outb(b2, MSR(iobase)); + if (b3 != 0x90) + return c_uart_unknown; + inb(RBR(iobase)); + inb(RBR(iobase)); + outb(0x01, FCR(iobase)); /* enable FIFOs */ + u = uart_tab[(inb(IIR(iobase)) >> 6) & 3]; + if (u == c_uart_16450) { + outb(0x5a, SCR(iobase)); + b1 = inb(SCR(iobase)); + outb(0xa5, SCR(iobase)); + b2 = inb(SCR(iobase)); + if ((b1 != 0x5a) || (b2 != 0xa5)) + u = c_uart_8250; + } + return u; +} + +/* --------------------------------------------------------------------- */ + +static int ser12_open(struct device *dev) +{ + struct baycom_state *bc = (struct baycom_state *)dev->priv; + enum uart u; + + if (!dev || !bc) + return -ENXIO; + if (!dev->base_addr || dev->base_addr > 0x1000-SER12_EXTENT || + dev->irq < 2 || dev->irq > 15) + return -ENXIO; + if (bc->baud < 300 || bc->baud > 4800) + return -EINVAL; + if (check_region(dev->base_addr, SER12_EXTENT)) + return -EACCES; + memset(&bc->modem, 0, sizeof(bc->modem)); + bc->hdrv.par.bitrate = bc->baud; + bc->baud_us8 = 125000/bc->baud; + bc->baud_arbdiv = bc->baud/100; + if ((u = ser12_check_uart(dev->base_addr)) == c_uart_unknown) + return -EIO; + outb(0, FCR(dev->base_addr)); /* disable FIFOs */ + outb(0x0d, MCR(dev->base_addr)); + outb(0x0d, MCR(dev->base_addr)); + outb(0, IER(dev->base_addr)); + if (request_irq(dev->irq, ser12_interrupt, SA_INTERRUPT, + "baycom_ser_fdx", dev)) + return -EBUSY; + request_region(dev->base_addr, SER12_EXTENT, "baycom_ser_fdx"); + /* + * set the SIO to 6 Bits/character and 19600 baud, so that + * we get exactly (hopefully) one interrupt per radio symbol + */ + outb(0x81, LCR(dev->base_addr)); /* DLAB = 1 */ + outb(115200/8/bc->baud, DLL(dev->base_addr)); + outb(0, DLM(dev->base_addr)); + outb(0x01, LCR(dev->base_addr)); /* word length = 6 */ + /* + * enable transmitter empty interrupt and modem status interrupt + */ + outb(0x0a, IER(dev->base_addr)); + /* + * make sure the next interrupt is generated; + * 0 must be used to power the modem; the modem draws its + * power from the TxD line + */ + outb(0x00, THR(dev->base_addr)); + printk(KERN_INFO "%s: ser_fdx at iobase 0x%lx irq %u options " + "0x%x baud %u uart %s\n", bc_drvname, dev->base_addr, dev->irq, + bc->options, bc->baud, uart_str[u]); + MOD_INC_USE_COUNT; + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int ser12_close(struct device *dev) +{ + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (!dev || !bc) + return -EINVAL; + /* + * disable interrupts + */ + outb(0, IER(dev->base_addr)); + outb(1, MCR(dev->base_addr)); + free_irq(dev->irq, dev); + release_region(dev->base_addr, SER12_EXTENT); + printk(KERN_INFO "%s: close ser_fdx at iobase 0x%lx irq %u\n", + bc_drvname, dev->base_addr, dev->irq); + MOD_DEC_USE_COUNT; + return 0; +} + +/* --------------------------------------------------------------------- */ +/* + * ===================== hdlcdrv driver interface ========================= + */ + +/* --------------------------------------------------------------------- */ + +static int baycom_ioctl(struct device *dev, struct ifreq *ifr, + struct hdlcdrv_ioctl *hi, int cmd); + +/* --------------------------------------------------------------------- */ + +static struct hdlcdrv_ops ser12_ops = { + bc_drvname, + bc_drvinfo, + ser12_open, + ser12_close, + baycom_ioctl +}; + +/* --------------------------------------------------------------------- */ + +static int baycom_setmode(struct baycom_state *bc, const char *modestr) +{ + unsigned int baud; + + if (!strncmp(modestr, "ser", 3)) { + baud = simple_strtoul(modestr+3, NULL, 10); + if (baud >= 3 && baud <= 48) + bc->baud = baud*100; + } + bc->options = !!strchr(modestr, '*'); + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int baycom_ioctl(struct device *dev, struct ifreq *ifr, + struct hdlcdrv_ioctl *hi, int cmd) +{ + struct baycom_state *bc; + struct baycom_ioctl bi; + int cmd2; + + if (!dev || !dev->priv || + ((struct baycom_state *)dev->priv)->hdrv.magic != HDLCDRV_MAGIC) { + printk(KERN_ERR "bc_ioctl: invalid device struct\n"); + return -EINVAL; + } + bc = (struct baycom_state *)dev->priv; + + if (cmd != SIOCDEVPRIVATE) + return -ENOIOCTLCMD; + if (get_user(cmd2, (int *)ifr->ifr_data)) + return -EFAULT; + switch (hi->cmd) { + default: + break; + + case HDLCDRVCTL_GETMODE: + sprintf(hi->data.modename, "ser%u", bc->baud / 100); + if (bc->options & 1) + strcat(hi->data.modename, "*"); + if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + return -EFAULT; + return 0; + + case HDLCDRVCTL_SETMODE: + if (!suser() || dev->start) + return -EACCES; + hi->data.modename[sizeof(hi->data.modename)-1] = '\0'; + return baycom_setmode(bc, hi->data.modename); + + case HDLCDRVCTL_MODELIST: + strcpy(hi->data.modename, "ser12,ser3,ser24"); + if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + return -EFAULT; + return 0; + + case HDLCDRVCTL_MODEMPARMASK: + return HDLCDRV_PARMASK_IOBASE | HDLCDRV_PARMASK_IRQ; + + } + + if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) + return -EFAULT; + switch (bi.cmd) { + default: + return -ENOIOCTLCMD; + +#ifdef BAYCOM_DEBUG + case BAYCOMCTL_GETDEBUG: + bi.data.dbg.debug1 = bc->hdrv.ptt_keyed; + bi.data.dbg.debug2 = bc->debug_vals.last_intcnt; + bi.data.dbg.debug3 = bc->debug_vals.last_pllcorr; + break; +#endif /* BAYCOM_DEBUG */ + + } + if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) + return -EFAULT; + return 0; + +} + +/* --------------------------------------------------------------------- */ + +__initfunc(int baycom_ser_fdx_init(void)) +{ + int i, j, found = 0; + char set_hw = 1; + struct baycom_state *bc; + char ifname[HDLCDRV_IFNAMELEN]; + + + printk(bc_drvinfo); + /* + * register net devices + */ + for (i = 0; i < NR_PORTS; i++) { + struct device *dev = baycom_device+i; + sprintf(ifname, "bcsf%d", i); + + if (!baycom_ports[i].mode) + set_hw = 0; + if (!set_hw) + baycom_ports[i].iobase = baycom_ports[i].irq = 0; + j = hdlcdrv_register_hdlcdrv(dev, &ser12_ops, + sizeof(struct baycom_state), + ifname, baycom_ports[i].iobase, + baycom_ports[i].irq, 0); + if (!j) { + bc = (struct baycom_state *)dev->priv; + if (set_hw && baycom_setmode(bc, baycom_ports[i].mode)) + set_hw = 0; + bc->baud = baycom_ports[i].baud; + found++; + } else { + printk(KERN_WARNING "%s: cannot register net device\n", + bc_drvname); + } + } + if (!found) + return -ENXIO; + return 0; +} + +/* --------------------------------------------------------------------- */ + +#ifdef MODULE + +/* + * command line settable parameters + */ +static char *mode[NR_PORTS] = { "ser12*", }; +static int iobase[NR_PORTS] = { 0x3f8, }; +static int irq[NR_PORTS] = { 4, }; +static int baud[NR_PORTS] = { [0 ... NR_PORTS-1] = 1200 }; + +#if LINUX_VERSION_CODE >= 0x20115 + +MODULE_PARM(mode, "1-" __MODULE_STRING(NR_PORTS) "s"); +MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD"); +MODULE_PARM(iobase, "1-" __MODULE_STRING(NR_PORTS) "i"); +MODULE_PARM_DESC(iobase, "baycom io base address"); +MODULE_PARM(irq, "1-" __MODULE_STRING(NR_PORTS) "i"); +MODULE_PARM_DESC(irq, "baycom irq number"); +MODULE_PARM(baud, "1-" __MODULE_STRING(NR_PORTS) "i"); +MODULE_PARM_DESC(baud, "baycom baud rate (300 to 4800)"); + +MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu"); +MODULE_DESCRIPTION("Baycom ser12 full duplex amateur radio modem driver"); + +#endif + +__initfunc(int init_module(void)) +{ + int i; + + for (i = 0; (i < NR_PORTS) && (mode[i]); i++) { + baycom_ports[i].mode = mode[i]; + baycom_ports[i].iobase = iobase[i]; + baycom_ports[i].irq = irq[i]; + baycom_ports[i].baud = baud[i]; + } + if (i < NR_PORTS-1) + baycom_ports[i+1].mode = NULL; + return baycom_ser_fdx_init(); +} + +/* --------------------------------------------------------------------- */ + +void cleanup_module(void) +{ + int i; + + for(i = 0; i < NR_PORTS; i++) { + struct device *dev = baycom_device+i; + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (bc) { + if (bc->hdrv.magic != HDLCDRV_MAGIC) + printk(KERN_ERR "baycom: invalid magic in " + "cleanup_module\n"); + else + hdlcdrv_unregister_hdlcdrv(dev); + } + } +} + +#else /* MODULE */ +/* --------------------------------------------------------------------- */ +/* + * format: baycom_ser_=io,irq,mode + * mode: [*] + * * indicates sofware DCD + */ + +__initfunc(void baycom_ser_fdx_setup(char *str, int *ints)) +{ + int i; + + for (i = 0; (i < NR_PORTS) && (baycom_ports[i].mode); i++); + if ((i >= NR_PORTS) || (ints[0] < 2)) { + printk(KERN_INFO "%s: too many or invalid interface " + "specifications\n", bc_drvname); + return; + } + baycom_ports[i].mode = str; + baycom_ports[i].iobase = ints[1]; + baycom_ports[i].irq = ints[2]; + if (ints[0] >= 3) + baycom_ports[i].baud = ints[3]; + else + baycom_ports[i].baud = 1200; + if (i < NR_PORTS-1) + baycom_ports[i+1].mode = NULL; +} + +#endif /* MODULE */ +/* --------------------------------------------------------------------- */ diff -u --recursive --new-file v2.1.67/linux/drivers/net/baycom_ser_hdx.c linux/drivers/net/baycom_ser_hdx.c --- v2.1.67/linux/drivers/net/baycom_ser_hdx.c Wed Dec 31 16:00:00 1969 +++ linux/drivers/net/baycom_ser_hdx.c Sun Nov 30 10:30:19 1997 @@ -0,0 +1,792 @@ +/*****************************************************************************/ + +/* + * baycom_ser_hdx.c -- baycom ser12 halfduplex radio modem driver. + * + * Copyright (C) 1997 Thomas Sailer (sailer@ife.ee.ethz.ch) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Please note that the GPL allows you to use the driver, NOT the radio. + * In order to use the radio, you need a license from the communications + * authority of your country. + * + * + * Supported modems + * + * ser12: This is a very simple 1200 baud AFSK modem. The modem consists only + * of a modulator/demodulator chip, usually a TI TCM3105. The computer + * is responsible for regenerating the receiver bit clock, as well as + * for handling the HDLC protocol. The modem connects to a serial port, + * hence the name. Since the serial port is not used as an async serial + * port, the kernel driver for serial ports cannot be used, and this + * driver only supports standard serial hardware (8250, 16450, 16550A) + * + * + * Command line options (insmod command line) + * + * mode * enables software DCD. + * iobase base address of the port; common values are 0x3f8, 0x2f8, 0x3e8, 0x2e8 + * irq interrupt line of the port; common values are 4,3 + * + * + * History: + * 0.1 26.06.96 Adapted from baycom.c and made network driver interface + * 18.10.96 Changed to new user space access routines (copy_{to,from}_user) + * 0.3 26.04.97 init code/data tagged + * 0.4 08.07.97 alternative ser12 decoding algorithm (uses delta CTS ints) + * 0.5 11.11.97 ser12/par96 split into separate files + */ + +/*****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* --------------------------------------------------------------------- */ + +/* + * currently this module is supposed to support both module styles, i.e. + * the old one present up to about 2.1.9, and the new one functioning + * starting with 2.1.21. The reason is I have a kit allowing to compile + * this module also under 2.0.x which was requested by several people. + * This will go in 2.2 + */ +#include + +#if LINUX_VERSION_CODE >= 0x20100 +#include +#else +#include +#include + +#undef put_user +#undef get_user + +#define put_user(x,ptr) ({ __put_user((unsigned long)(x),(ptr),sizeof(*(ptr))); 0; }) +#define get_user(x,ptr) ({ x = ((__typeof__(*(ptr)))__get_user((ptr),sizeof(*(ptr)))); 0; }) + +extern inline int copy_from_user(void *to, const void *from, unsigned long n) +{ + int i = verify_area(VERIFY_READ, from, n); + if (i) + return i; + memcpy_fromfs(to, from, n); + return 0; +} + +extern inline int copy_to_user(void *to, const void *from, unsigned long n) +{ + int i = verify_area(VERIFY_WRITE, to, n); + if (i) + return i; + memcpy_tofs(to, from, n); + return 0; +} +#endif + +#if LINUX_VERSION_CODE >= 0x20123 +#include +#else +#define __init +#define __initdata +#define __initfunc(x) x +#endif + +/* --------------------------------------------------------------------- */ + +#define BAYCOM_DEBUG + +/* + * modem options; bit mask + */ +#define BAYCOM_OPTIONS_SOFTDCD 1 + +/* --------------------------------------------------------------------- */ + +static const char bc_drvname[] = "baycom_ser_hdx"; +static const char bc_drvinfo[] = KERN_INFO "baycom_ser_hdx: (C) 1997 Thomas Sailer, HB9JNX/AE4WA\n" +KERN_INFO "baycom_ser_hdx: version 0.5 compiled " __TIME__ " " __DATE__ "\n"; + +/* --------------------------------------------------------------------- */ + +#define NR_PORTS 4 + +static struct device baycom_device[NR_PORTS]; + +static struct { + char *mode; + int iobase, irq; +} baycom_ports[NR_PORTS] = { { NULL, 0, 0 }, }; + +/* --------------------------------------------------------------------- */ + +#define RBR(iobase) (iobase+0) +#define THR(iobase) (iobase+0) +#define IER(iobase) (iobase+1) +#define IIR(iobase) (iobase+2) +#define FCR(iobase) (iobase+2) +#define LCR(iobase) (iobase+3) +#define MCR(iobase) (iobase+4) +#define LSR(iobase) (iobase+5) +#define MSR(iobase) (iobase+6) +#define SCR(iobase) (iobase+7) +#define DLL(iobase) (iobase+0) +#define DLM(iobase) (iobase+1) + +#define SER12_EXTENT 8 + +/* ---------------------------------------------------------------------- */ +/* + * Information that need to be kept for each board. + */ + +struct baycom_state { + struct hdlcdrv_state hdrv; + + unsigned int options; + + struct modem_state { + short arb_divider; + unsigned char flags; + unsigned int shreg; + struct modem_state_ser12 { + unsigned char tx_bit; + int dcd_sum0, dcd_sum1, dcd_sum2; + unsigned char last_sample; + unsigned char last_rxbit; + unsigned int dcd_shreg; + unsigned int dcd_time; + unsigned int bit_pll; + unsigned char interm_sample; + } ser12; + } modem; + +#ifdef BAYCOM_DEBUG + struct debug_vals { + unsigned long last_jiffies; + unsigned cur_intcnt; + unsigned last_intcnt; + int cur_pllcorr; + int last_pllcorr; + } debug_vals; +#endif /* BAYCOM_DEBUG */ +}; + +/* --------------------------------------------------------------------- */ + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +/* --------------------------------------------------------------------- */ + +static void inline baycom_int_freq(struct baycom_state *bc) +{ +#ifdef BAYCOM_DEBUG + unsigned long cur_jiffies = jiffies; + /* + * measure the interrupt frequency + */ + bc->debug_vals.cur_intcnt++; + if ((cur_jiffies - bc->debug_vals.last_jiffies) >= HZ) { + bc->debug_vals.last_jiffies = cur_jiffies; + bc->debug_vals.last_intcnt = bc->debug_vals.cur_intcnt; + bc->debug_vals.cur_intcnt = 0; + bc->debug_vals.last_pllcorr = bc->debug_vals.cur_pllcorr; + bc->debug_vals.cur_pllcorr = 0; + } +#endif /* BAYCOM_DEBUG */ +} + +/* --------------------------------------------------------------------- */ +/* + * ===================== SER12 specific routines ========================= + */ + +static void inline ser12_set_divisor(struct device *dev, + unsigned char divisor) +{ + outb(0x81, LCR(dev->base_addr)); /* DLAB = 1 */ + outb(divisor, DLL(dev->base_addr)); + outb(0, DLM(dev->base_addr)); + outb(0x01, LCR(dev->base_addr)); /* word length = 6 */ + /* + * make sure the next interrupt is generated; + * 0 must be used to power the modem; the modem draws its + * power from the TxD line + */ + outb(0x00, THR(dev->base_addr)); + /* + * it is important not to set the divider while transmitting; + * this reportedly makes some UARTs generating interrupts + * in the hundredthousands per second region + * Reported by: Ignacio.Arenaza@studi.epfl.ch (Ignacio Arenaza Nuno) + */ +} + +/* --------------------------------------------------------------------- */ + +/* + * must call the TX arbitrator every 10ms + */ +#define SER12_ARB_DIVIDER(bc) ((bc->options & BAYCOM_OPTIONS_SOFTDCD) ? \ + 36 : 24) +#define SER12_DCD_INTERVAL(bc) ((bc->options & BAYCOM_OPTIONS_SOFTDCD) ? \ + 240 : 12) + +static inline void ser12_tx(struct device *dev, struct baycom_state *bc) +{ + /* one interrupt per channel bit */ + ser12_set_divisor(dev, 12); + /* + * first output the last bit (!) then call HDLC transmitter, + * since this may take quite long + */ + outb(0x0e | (!!bc->modem.ser12.tx_bit), MCR(dev->base_addr)); + if (bc->modem.shreg <= 1) + bc->modem.shreg = 0x10000 | hdlcdrv_getbits(&bc->hdrv); + bc->modem.ser12.tx_bit = !(bc->modem.ser12.tx_bit ^ + (bc->modem.shreg & 1)); + bc->modem.shreg >>= 1; +} + +/* --------------------------------------------------------------------- */ + +static inline void ser12_rx(struct device *dev, struct baycom_state *bc) +{ + unsigned char cur_s; + /* + * do demodulator + */ + cur_s = inb(MSR(dev->base_addr)) & 0x10; /* the CTS line */ + hdlcdrv_channelbit(&bc->hdrv, cur_s); + bc->modem.ser12.dcd_shreg = (bc->modem.ser12.dcd_shreg << 1) | + (cur_s != bc->modem.ser12.last_sample); + bc->modem.ser12.last_sample = cur_s; + if(bc->modem.ser12.dcd_shreg & 1) { + if (bc->options & BAYCOM_OPTIONS_SOFTDCD) { + unsigned int dcdspos, dcdsneg; + + dcdspos = dcdsneg = 0; + dcdspos += ((bc->modem.ser12.dcd_shreg >> 1) & 1); + if (!(bc->modem.ser12.dcd_shreg & 0x7ffffffe)) + dcdspos += 2; + dcdsneg += ((bc->modem.ser12.dcd_shreg >> 2) & 1); + dcdsneg += ((bc->modem.ser12.dcd_shreg >> 3) & 1); + dcdsneg += ((bc->modem.ser12.dcd_shreg >> 4) & 1); + + bc->modem.ser12.dcd_sum0 += 16*dcdspos - dcdsneg; + } else + bc->modem.ser12.dcd_sum0--; + } + if(!bc->modem.ser12.dcd_time) { + hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 + + bc->modem.ser12.dcd_sum1 + + bc->modem.ser12.dcd_sum2) < 0); + bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1; + bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0; + /* offset to ensure DCD off on silent input */ + bc->modem.ser12.dcd_sum0 = 2; + bc->modem.ser12.dcd_time = SER12_DCD_INTERVAL(bc); + } + bc->modem.ser12.dcd_time--; + if (bc->options & BAYCOM_OPTIONS_SOFTDCD) { + /* + * PLL code for the improved software DCD algorithm + */ + if (bc->modem.ser12.interm_sample) { + /* + * intermediate sample; set timing correction to normal + */ + ser12_set_divisor(dev, 4); + } else { + /* + * do PLL correction and call HDLC receiver + */ + switch (bc->modem.ser12.dcd_shreg & 7) { + case 1: /* transition too late */ + ser12_set_divisor(dev, 5); +#ifdef BAYCOM_DEBUG + bc->debug_vals.cur_pllcorr++; +#endif /* BAYCOM_DEBUG */ + break; + case 4: /* transition too early */ + ser12_set_divisor(dev, 3); +#ifdef BAYCOM_DEBUG + bc->debug_vals.cur_pllcorr--; +#endif /* BAYCOM_DEBUG */ + break; + default: + ser12_set_divisor(dev, 4); + break; + } + bc->modem.shreg >>= 1; + if (bc->modem.ser12.last_sample == + bc->modem.ser12.last_rxbit) + bc->modem.shreg |= 0x10000; + bc->modem.ser12.last_rxbit = + bc->modem.ser12.last_sample; + } + if (++bc->modem.ser12.interm_sample >= 3) + bc->modem.ser12.interm_sample = 0; + /* + * DCD stuff + */ + if (bc->modem.ser12.dcd_shreg & 1) { + unsigned int dcdspos, dcdsneg; + + dcdspos = dcdsneg = 0; + dcdspos += ((bc->modem.ser12.dcd_shreg >> 1) & 1); + dcdspos += (!(bc->modem.ser12.dcd_shreg & 0x7ffffffe)) + << 1; + dcdsneg += ((bc->modem.ser12.dcd_shreg >> 2) & 1); + dcdsneg += ((bc->modem.ser12.dcd_shreg >> 3) & 1); + dcdsneg += ((bc->modem.ser12.dcd_shreg >> 4) & 1); + + bc->modem.ser12.dcd_sum0 += 16*dcdspos - dcdsneg; + } + } else { + /* + * PLL algorithm for the hardware squelch DCD algorithm + */ + if (bc->modem.ser12.interm_sample) { + /* + * intermediate sample; set timing correction to normal + */ + ser12_set_divisor(dev, 6); + } else { + /* + * do PLL correction and call HDLC receiver + */ + switch (bc->modem.ser12.dcd_shreg & 3) { + case 1: /* transition too late */ + ser12_set_divisor(dev, 7); +#ifdef BAYCOM_DEBUG + bc->debug_vals.cur_pllcorr++; +#endif /* BAYCOM_DEBUG */ + break; + case 2: /* transition too early */ + ser12_set_divisor(dev, 5); +#ifdef BAYCOM_DEBUG + bc->debug_vals.cur_pllcorr--; +#endif /* BAYCOM_DEBUG */ + break; + default: + ser12_set_divisor(dev, 6); + break; + } + bc->modem.shreg >>= 1; + if (bc->modem.ser12.last_sample == + bc->modem.ser12.last_rxbit) + bc->modem.shreg |= 0x10000; + bc->modem.ser12.last_rxbit = + bc->modem.ser12.last_sample; + } + bc->modem.ser12.interm_sample = !bc->modem.ser12.interm_sample; + /* + * DCD stuff + */ + bc->modem.ser12.dcd_sum0 -= (bc->modem.ser12.dcd_shreg & 1); + } + outb(0x0d, MCR(dev->base_addr)); /* transmitter off */ + if (bc->modem.shreg & 1) { + hdlcdrv_putbits(&bc->hdrv, bc->modem.shreg >> 1); + bc->modem.shreg = 0x10000; + } + if(!bc->modem.ser12.dcd_time) { + hdlcdrv_setdcd(&bc->hdrv, (bc->modem.ser12.dcd_sum0 + + bc->modem.ser12.dcd_sum1 + + bc->modem.ser12.dcd_sum2) < 0); + bc->modem.ser12.dcd_sum2 = bc->modem.ser12.dcd_sum1; + bc->modem.ser12.dcd_sum1 = bc->modem.ser12.dcd_sum0; + /* offset to ensure DCD off on silent input */ + bc->modem.ser12.dcd_sum0 = 2; + bc->modem.ser12.dcd_time = SER12_DCD_INTERVAL(bc); + } + bc->modem.ser12.dcd_time--; +} + +/* --------------------------------------------------------------------- */ + +static void ser12_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct device *dev = (struct device *)dev_id; + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (!dev || !bc || bc->hdrv.magic != HDLCDRV_MAGIC) + return; + + baycom_int_freq(bc); + /* + * check if transmitter active + */ + if (hdlcdrv_ptt(&bc->hdrv)) + ser12_tx(dev, bc); + else { + ser12_rx(dev, bc); + if (--bc->modem.arb_divider <= 0) { + bc->modem.arb_divider = SER12_ARB_DIVIDER(bc); + sti(); + hdlcdrv_arbitrate(dev, &bc->hdrv); + } + } + sti(); + hdlcdrv_transmitter(dev, &bc->hdrv); + hdlcdrv_receiver(dev, &bc->hdrv); +} + +/* --------------------------------------------------------------------- */ + +enum uart { c_uart_unknown, c_uart_8250, + c_uart_16450, c_uart_16550, c_uart_16550A}; +static const char *uart_str[] = { + "unknown", "8250", "16450", "16550", "16550A" +}; + +static enum uart ser12_check_uart(unsigned int iobase) +{ + unsigned char b1,b2,b3; + enum uart u; + enum uart uart_tab[] = + { c_uart_16450, c_uart_unknown, c_uart_16550, c_uart_16550A }; + + b1 = inb(MCR(iobase)); + outb(b1 | 0x10, MCR(iobase)); /* loopback mode */ + b2 = inb(MSR(iobase)); + outb(0x1a, MCR(iobase)); + b3 = inb(MSR(iobase)) & 0xf0; + outb(b1, MCR(iobase)); /* restore old values */ + outb(b2, MSR(iobase)); + if (b3 != 0x90) + return c_uart_unknown; + inb(RBR(iobase)); + inb(RBR(iobase)); + outb(0x01, FCR(iobase)); /* enable FIFOs */ + u = uart_tab[(inb(IIR(iobase)) >> 6) & 3]; + if (u == c_uart_16450) { + outb(0x5a, SCR(iobase)); + b1 = inb(SCR(iobase)); + outb(0xa5, SCR(iobase)); + b2 = inb(SCR(iobase)); + if ((b1 != 0x5a) || (b2 != 0xa5)) + u = c_uart_8250; + } + return u; +} + +/* --------------------------------------------------------------------- */ + +static int ser12_open(struct device *dev) +{ + struct baycom_state *bc = (struct baycom_state *)dev->priv; + enum uart u; + + if (!dev || !bc) + return -ENXIO; + if (!dev->base_addr || dev->base_addr > 0x1000-SER12_EXTENT || + dev->irq < 2 || dev->irq > 15) + return -ENXIO; + if (check_region(dev->base_addr, SER12_EXTENT)) + return -EACCES; + memset(&bc->modem, 0, sizeof(bc->modem)); + bc->hdrv.par.bitrate = 1200; + if ((u = ser12_check_uart(dev->base_addr)) == c_uart_unknown) + return -EIO; + outb(0, FCR(dev->base_addr)); /* disable FIFOs */ + outb(0x0d, MCR(dev->base_addr)); + outb(0x0d, MCR(dev->base_addr)); + outb(0, IER(dev->base_addr)); + if (request_irq(dev->irq, ser12_interrupt, SA_INTERRUPT, + "baycom_ser12", dev)) + return -EBUSY; + request_region(dev->base_addr, SER12_EXTENT, "baycom_ser12"); + /* + * enable transmitter empty interrupt + */ + outb(2, IER(dev->base_addr)); + /* + * set the SIO to 6 Bits/character and 19200 or 28800 baud, so that + * we get exactly (hopefully) 2 or 3 interrupts per radio symbol, + * depending on the usage of the software DCD routine + */ + ser12_set_divisor(dev, (bc->options & BAYCOM_OPTIONS_SOFTDCD) ? 4 : 6); + printk(KERN_INFO "%s: ser12 at iobase 0x%lx irq %u options " + "0x%x uart %s\n", bc_drvname, dev->base_addr, dev->irq, + bc->options, uart_str[u]); + MOD_INC_USE_COUNT; + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int ser12_close(struct device *dev) +{ + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (!dev || !bc) + return -EINVAL; + /* + * disable interrupts + */ + outb(0, IER(dev->base_addr)); + outb(1, MCR(dev->base_addr)); + free_irq(dev->irq, dev); + release_region(dev->base_addr, SER12_EXTENT); + printk(KERN_INFO "%s: close ser12 at iobase 0x%lx irq %u\n", + bc_drvname, dev->base_addr, dev->irq); + MOD_DEC_USE_COUNT; + return 0; +} + +/* --------------------------------------------------------------------- */ +/* + * ===================== hdlcdrv driver interface ========================= + */ + +/* --------------------------------------------------------------------- */ + +static int baycom_ioctl(struct device *dev, struct ifreq *ifr, + struct hdlcdrv_ioctl *hi, int cmd); + +/* --------------------------------------------------------------------- */ + +static struct hdlcdrv_ops ser12_ops = { + bc_drvname, + bc_drvinfo, + ser12_open, + ser12_close, + baycom_ioctl +}; + +/* --------------------------------------------------------------------- */ + +static int baycom_setmode(struct baycom_state *bc, const char *modestr) +{ + bc->options = !!strchr(modestr, '*'); + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int baycom_ioctl(struct device *dev, struct ifreq *ifr, + struct hdlcdrv_ioctl *hi, int cmd) +{ + struct baycom_state *bc; + struct baycom_ioctl bi; + int cmd2; + + if (!dev || !dev->priv || + ((struct baycom_state *)dev->priv)->hdrv.magic != HDLCDRV_MAGIC) { + printk(KERN_ERR "bc_ioctl: invalid device struct\n"); + return -EINVAL; + } + bc = (struct baycom_state *)dev->priv; + + if (cmd != SIOCDEVPRIVATE) + return -ENOIOCTLCMD; + if (get_user(cmd2, (int *)ifr->ifr_data)) + return -EFAULT; + switch (hi->cmd) { + default: + break; + + case HDLCDRVCTL_GETMODE: + strcpy(hi->data.modename, "ser12"); + if (bc->options & 1) + strcat(hi->data.modename, "*"); + if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + return -EFAULT; + return 0; + + case HDLCDRVCTL_SETMODE: + if (!suser() || dev->start) + return -EACCES; + hi->data.modename[sizeof(hi->data.modename)-1] = '\0'; + return baycom_setmode(bc, hi->data.modename); + + case HDLCDRVCTL_MODELIST: + strcpy(hi->data.modename, "ser12"); + if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl))) + return -EFAULT; + return 0; + + case HDLCDRVCTL_MODEMPARMASK: + return HDLCDRV_PARMASK_IOBASE | HDLCDRV_PARMASK_IRQ; + + } + + if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi))) + return -EFAULT; + switch (bi.cmd) { + default: + return -ENOIOCTLCMD; + +#ifdef BAYCOM_DEBUG + case BAYCOMCTL_GETDEBUG: + bi.data.dbg.debug1 = bc->hdrv.ptt_keyed; + bi.data.dbg.debug2 = bc->debug_vals.last_intcnt; + bi.data.dbg.debug3 = bc->debug_vals.last_pllcorr; + break; +#endif /* BAYCOM_DEBUG */ + + } + if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi))) + return -EFAULT; + return 0; + +} + +/* --------------------------------------------------------------------- */ + +__initfunc(int baycom_ser_hdx_init(void)) +{ + int i, j, found = 0; + char set_hw = 1; + struct baycom_state *bc; + char ifname[HDLCDRV_IFNAMELEN]; + + + printk(bc_drvinfo); + /* + * register net devices + */ + for (i = 0; i < NR_PORTS; i++) { + struct device *dev = baycom_device+i; + sprintf(ifname, "bcsh%d", i); + + if (!baycom_ports[i].mode) + set_hw = 0; + if (!set_hw) + baycom_ports[i].iobase = baycom_ports[i].irq = 0; + j = hdlcdrv_register_hdlcdrv(dev, &ser12_ops, + sizeof(struct baycom_state), + ifname, baycom_ports[i].iobase, + baycom_ports[i].irq, 0); + if (!j) { + bc = (struct baycom_state *)dev->priv; + if (set_hw && baycom_setmode(bc, baycom_ports[i].mode)) + set_hw = 0; + found++; + } else { + printk(KERN_WARNING "%s: cannot register net device\n", + bc_drvname); + } + } + if (!found) + return -ENXIO; + return 0; +} + +/* --------------------------------------------------------------------- */ + +#ifdef MODULE + +/* + * command line settable parameters + */ +static char *mode[NR_PORTS] = { "ser12*", }; +static int iobase[NR_PORTS] = { 0x3f8, }; +static int irq[NR_PORTS] = { 4, }; + +#if LINUX_VERSION_CODE >= 0x20115 + +MODULE_PARM(mode, "1-" __MODULE_STRING(NR_PORTS) "s"); +MODULE_PARM_DESC(mode, "baycom operating mode; * for software DCD"); +MODULE_PARM(iobase, "1-" __MODULE_STRING(NR_PORTS) "i"); +MODULE_PARM_DESC(iobase, "baycom io base address"); +MODULE_PARM(irq, "1-" __MODULE_STRING(NR_PORTS) "i"); +MODULE_PARM_DESC(irq, "baycom irq number"); + +MODULE_AUTHOR("Thomas M. Sailer, sailer@ife.ee.ethz.ch, hb9jnx@hb9w.che.eu"); +MODULE_DESCRIPTION("Baycom ser12 half duplex amateur radio modem driver"); + +#endif + +__initfunc(int init_module(void)) +{ + int i; + + for (i = 0; (i < NR_PORTS) && (mode[i]); i++) { + baycom_ports[i].mode = mode[i]; + baycom_ports[i].iobase = iobase[i]; + baycom_ports[i].irq = irq[i]; + } + if (i < NR_PORTS-1) + baycom_ports[i+1].mode = NULL; + return baycom_ser_hdx_init(); +} + +/* --------------------------------------------------------------------- */ + +void cleanup_module(void) +{ + int i; + + for(i = 0; i < NR_PORTS; i++) { + struct device *dev = baycom_device+i; + struct baycom_state *bc = (struct baycom_state *)dev->priv; + + if (bc) { + if (bc->hdrv.magic != HDLCDRV_MAGIC) + printk(KERN_ERR "baycom: invalid magic in " + "cleanup_module\n"); + else + hdlcdrv_unregister_hdlcdrv(dev); + } + } +} + +#else /* MODULE */ +/* --------------------------------------------------------------------- */ +/* + * format: baycom_ser_=io,irq,mode + * mode: [*] + * * indicates sofware DCD + */ + +__initfunc(void baycom_ser_hdx_setup(char *str, int *ints)) +{ + int i; + + for (i = 0; (i < NR_PORTS) && (baycom_ports[i].mode); i++); + if ((i >= NR_PORTS) || (ints[0] < 2)) { + printk(KERN_INFO "%s: too many or invalid interface " + "specifications\n", bc_drvname); + return; + } + baycom_ports[i].mode = str; + baycom_ports[i].iobase = ints[1]; + baycom_ports[i].irq = ints[2]; + if (i < NR_PORTS-1) + baycom_ports[i+1].mode = NULL; +} + +#endif /* MODULE */ +/* --------------------------------------------------------------------- */ diff -u --recursive --new-file v2.1.67/linux/drivers/net/de600.c linux/drivers/net/de600.c --- v2.1.67/linux/drivers/net/de600.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/net/de600.c Sun Nov 30 12:21:45 1997 @@ -39,8 +39,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************/ -/* Add another "; SLOW_DOWN_IO" here if your adapter won't work OK: */ -#define DE600_SLOW_DOWN SLOW_DOWN_IO; SLOW_DOWN_IO; SLOW_DOWN_IO +/* Add more time here if your adapter won't work OK: */ +#define DE600_SLOW_DOWN udelay(delay_time) /* * If you still have trouble reading/writing to the adapter, @@ -104,6 +104,7 @@ #include #include #include +#include #include #include @@ -112,6 +113,9 @@ static unsigned int de600_debug = DE600_DEBUG; MODULE_PARM(de600_debug, "i"); + +static unsigned int delay_time = 10; +MODULE_PARM(delay_time, "i"); #ifdef FAKE_SMALL_MAX static unsigned long de600_rspace(struct sock *sk); diff -u --recursive --new-file v2.1.67/linux/drivers/net/e2100.c linux/drivers/net/e2100.c --- v2.1.67/linux/drivers/net/e2100.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/net/e2100.c Sun Nov 30 12:21:45 1997 @@ -150,7 +150,7 @@ /* Verify by making certain that there is a 8390 at there. */ outb(E8390_NODMA + E8390_STOP, ioaddr); - SLOW_DOWN_IO; + udelay(1); /* we want to delay one I/O cycle - which is 2MHz */ status = inb(ioaddr); if (status != 0x21 && status != 0x23) return ENODEV; diff -u --recursive --new-file v2.1.67/linux/drivers/net/eepro.c linux/drivers/net/eepro.c --- v2.1.67/linux/drivers/net/eepro.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/net/eepro.c Sun Nov 30 12:21:45 1997 @@ -102,6 +102,7 @@ #include #include #include +#include #include #include @@ -652,8 +653,8 @@ outb(SEL_RESET_CMD, ioaddr); /* We are supposed to wait for 2 us after a SEL_RESET */ - SLOW_DOWN_IO; - SLOW_DOWN_IO; + + udelay(2); lp->tx_start = lp->tx_end = XMT_LOWER_LIMIT << 8; /* or = RCV_RAM */ lp->tx_last = 0; @@ -695,8 +696,7 @@ /* Try to restart the adaptor. */ outb(SEL_RESET_CMD, ioaddr); /* We are supposed to wait for 2 us after a SEL_RESET */ - SLOW_DOWN_IO; - SLOW_DOWN_IO; + udelay(2); /* Do I also need to flush the transmit buffers here? YES? */ lp->tx_start = lp->tx_end = rcv_ram; @@ -824,9 +824,9 @@ /* Update the statistics here. What statistics? */ /* We are supposed to wait for 200 us after a RESET */ - SLOW_DOWN_IO; - SLOW_DOWN_IO; /* May not be enough? */ + udelay(200); + MOD_DEC_USE_COUNT; return 0; } @@ -933,8 +933,7 @@ /* Acknowledge that the MC setup is done */ do { /* We should be doing this in the eepro_interrupt()! */ - SLOW_DOWN_IO; - SLOW_DOWN_IO; + udelay(2); if (inb(ioaddr + STATUS_REG) & 0x08) { i = inb(ioaddr); @@ -962,7 +961,7 @@ /* IMPORTANT - the 82595 will be set to Bank 0 after the eeprom is read */ /* The delay between EEPROM clock transitions. */ -#define eeprom_delay() { int _i = 40; while (--_i > 0) { __SLOW_DOWN_IO; }} +#define eeprom_delay() { udelay(40); } #define EE_READ_CMD (6 << 6) int diff -u --recursive --new-file v2.1.67/linux/drivers/net/eepro100.c linux/drivers/net/eepro100.c --- v2.1.67/linux/drivers/net/eepro100.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/eepro100.c Sun Nov 30 12:21:45 1997 @@ -61,6 +61,7 @@ #include #include #include +#include #include /* Processor type for cache alignment. */ #include #include diff -u --recursive --new-file v2.1.67/linux/drivers/net/eexpress.h linux/drivers/net/eexpress.h --- v2.1.67/linux/drivers/net/eexpress.h Sat Sep 28 12:06:12 1996 +++ linux/drivers/net/eexpress.h Sun Nov 30 12:21:45 1997 @@ -37,7 +37,7 @@ #define ASIC_RST 0x40 #define i586_RST 0x80 -#define eeprom_delay() { int _i = 40; while (--_i>0) { __SLOW_DOWN_IO; }} +#define eeprom_delay() { udelay(40); } /* * i82586 Memory Configuration diff -u --recursive --new-file v2.1.67/linux/drivers/net/hdlcdrv.c linux/drivers/net/hdlcdrv.c --- v2.1.67/linux/drivers/net/hdlcdrv.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/hdlcdrv.c Sun Nov 30 10:30:19 1997 @@ -143,9 +143,9 @@ * io regions, irqs and dma channels */ -static char ax25_bcast[7] = +static char ax25_bcast[AX25_ADDR_LEN] = {'Q' << 1, 'S' << 1, 'T' << 1, ' ' << 1, ' ' << 1, ' ' << 1, '0' << 1}; -static char ax25_test[7] = +static char ax25_nocall[AX25_ADDR_LEN] = {'L' << 1, 'I' << 1, 'N' << 1, 'U' << 1, 'X' << 1, ' ' << 1, '1' << 1}; /* --------------------------------------------------------------------- */ @@ -890,11 +890,11 @@ dev->set_mac_address = hdlcdrv_set_mac_address; dev->type = ARPHRD_AX25; /* AF_AX25 device */ - dev->hard_header_len = 73; /* We do digipeaters now */ - dev->mtu = 1500; /* eth_mtu is the default */ - dev->addr_len = 7; /* sizeof an ax.25 address */ - memcpy(dev->broadcast, ax25_bcast, 7); - memcpy(dev->dev_addr, ax25_test, 7); + dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN; + dev->mtu = AX25_DEF_PACLEN; /* eth_mtu is the default */ + dev->addr_len = AX25_ADDR_LEN; /* sizeof an ax.25 address */ + memcpy(dev->broadcast, ax25_bcast, AX25_ADDR_LEN); + memcpy(dev->dev_addr, ax25_nocall, AX25_ADDR_LEN); /* New style flags */ dev->flags = 0; diff -u --recursive --new-file v2.1.67/linux/drivers/net/hp-plus.c linux/drivers/net/hp-plus.c --- v2.1.67/linux/drivers/net/hp-plus.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/net/hp-plus.c Sun Nov 30 12:21:45 1997 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -266,7 +267,7 @@ /* Reset the 8390 and HP chip. */ option_reg = inw(ioaddr + HPP_OPTION); outw(option_reg & ~(NICReset + ChipReset), ioaddr + HPP_OPTION); - SLOW_DOWN_IO; SLOW_DOWN_IO; + udelay(5); /* Unreset the board and enable interrupts. */ outw(option_reg | (EnableIRQ + NICReset + ChipReset), ioaddr + HPP_OPTION); @@ -307,12 +308,11 @@ outw(option_reg & ~(NICReset + ChipReset), ioaddr + HPP_OPTION); /* Pause a few cycles for the hardware reset to take place. */ - SLOW_DOWN_IO; - SLOW_DOWN_IO; + udelay(5); ei_status.txing = 0; outw(option_reg | (EnableIRQ + NICReset + ChipReset), ioaddr + HPP_OPTION); - SLOW_DOWN_IO; SLOW_DOWN_IO; + udelay(5); if ((inb_p(ioaddr+NIC_OFFSET+EN0_ISR) & ENISR_RESET) == 0) diff -u --recursive --new-file v2.1.67/linux/drivers/net/hp.c linux/drivers/net/hp.c --- v2.1.67/linux/drivers/net/hp.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/net/hp.c Sun Nov 30 12:21:45 1997 @@ -233,11 +233,10 @@ outb_p(0x00, hp_base + HP_CONFIGURE); ei_status.txing = 0; /* Pause just a few cycles for the hardware reset to take place. */ - SLOW_DOWN_IO; - SLOW_DOWN_IO; + udelay(5); outb_p(saved_config, hp_base + HP_CONFIGURE); - SLOW_DOWN_IO; SLOW_DOWN_IO; + udelay(5); if ((inb_p(hp_base+NIC_OFFSET+EN0_ISR) & ENISR_RESET) == 0) printk("%s: hp_reset_8390() did not complete.\n", dev->name); diff -u --recursive --new-file v2.1.67/linux/drivers/net/net_init.c linux/drivers/net/net_init.c --- v2.1.67/linux/drivers/net/net_init.c Mon Nov 3 13:04:26 1997 +++ linux/drivers/net/net_init.c Sun Nov 30 14:00:38 1997 @@ -38,8 +38,8 @@ #include #include #include -#include #include +#include /* The network devices currently exist only in the socket namespace, so these entries are unused. The only ones that make sense are @@ -112,7 +112,7 @@ new_device = 1; } - found: /* From the double loop above. */ +found: /* From the double loop above. */ if (dev->name && ((dev->name[0] == '\0') || (dev->name[0] == ' '))) { @@ -126,14 +126,9 @@ ether_setup(dev); /* Hmmm, should this be called here? */ - if (new_device) { - /* Append the device to the device queue. */ - struct device **old_devp = &dev_base; - while ((*old_devp)->next) - old_devp = & (*old_devp)->next; - (*old_devp)->next = dev; - dev->next = 0; - } + if (new_device) + register_netdevice(dev); + return dev; } @@ -173,8 +168,6 @@ int i; /* Fill in the fields of the device structure with ethernet-generic values. This should be in a common file instead of per-driver. */ - - dev_init_buffers(dev); /* register boot-defined "eth" devices */ if (dev->name && (strncmp(dev->name, "eth", 3) == 0)) { @@ -195,6 +188,7 @@ dev->set_mac_address = eth_mac_addr; dev->hard_header_cache = eth_header_cache; dev->header_cache_update= eth_header_cache_update; + dev->hard_header_parse = eth_header_parse; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; @@ -206,11 +200,8 @@ /* New-style flags. */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - dev->family = AF_INET; - dev->pa_addr = 0; - dev->pa_brdaddr = 0; - dev->pa_mask = 0; - dev->pa_alen = 4; + + dev_init_buffers(dev); } #ifdef CONFIG_FDDI @@ -222,8 +213,6 @@ * This should be in a common file instead of per-driver. */ - dev_init_buffers(dev); - dev->change_mtu = fddi_change_mtu; dev->hard_header = fddi_header; dev->rebuild_header = fddi_rebuild_header; @@ -238,11 +227,9 @@ /* New-style flags */ dev->flags = IFF_BROADCAST | IFF_MULTICAST; - dev->family = AF_INET; - dev->pa_addr = 0; - dev->pa_brdaddr = 0; - dev->pa_mask = 0; - dev->pa_alen = 4; + + dev_init_buffers(dev); + return; } @@ -264,8 +251,6 @@ void ltalk_setup(struct device *dev) { /* Fill in the fields of the device structure with localtalk-generic values. */ - - dev_init_buffers(dev); dev->change_mtu = ltalk_change_mtu; dev->hard_header = NULL; @@ -283,11 +268,8 @@ dev->broadcast[0] = 0xFF; dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP; - dev->family = AF_APPLETALK; - dev->pa_addr = 0; - dev->pa_brdaddr = 0; - dev->pa_mask = 0; - dev->pa_alen = 1; + + dev_init_buffers(dev); } #endif @@ -309,134 +291,61 @@ return 0; } -int register_netdev(struct device *dev) +static int etherdev_get_index(struct device *dev) { - struct device *d = dev_base; - unsigned long flags; int i=MAX_ETH_CARDS; - save_flags(flags); - cli(); - - if (dev) { - if (dev->name && - ((dev->name[0] == '\0') || (dev->name[0] == ' '))) { - for (i = 0; i < MAX_ETH_CARDS; ++i) - if (ethdev_index[i] == NULL) { - sprintf(dev->name, "eth%d", i); - printk("loading device '%s'...\n", dev->name); - ethdev_index[i] = dev; - break; - } - } - - if (dev->init) { - sti(); /* device probes assume interrupts enabled */ - if (dev->init(dev) != 0) { - if (i < MAX_ETH_CARDS) ethdev_index[i] = NULL; - restore_flags(flags); - return -EIO; - } - cli(); - } - - /* Add device to end of chain */ - if (dev_base) { - while (d->next) - d = d->next; - d->next = dev; + for (i = 0; i < MAX_ETH_CARDS; ++i) { + if (ethdev_index[i] == NULL) { + sprintf(dev->name, "eth%d", i); + printk("loading device '%s'...\n", dev->name); + ethdev_index[i] = dev; + return i; } - else - dev_base = dev; - dev->next = NULL; - dev->ifindex = dev_new_index(); } - restore_flags(flags); - return 0; + return -1; } -void unregister_netdev(struct device *dev) +static void etherdev_put_index(struct device *dev) { - struct device *d = dev_base; - unsigned long flags; int i; - - save_flags(flags); - cli(); - - if (dev == NULL) - { - printk("was NULL\n"); - restore_flags(flags); - return; - } - /* else */ - if (dev->start) - printk("ERROR '%s' busy and not MOD_IN_USE.\n", dev->name); - - /* - * must jump over main_device+aliases - * avoid alias devices unregistration so that only - * net_alias module manages them - */ -#ifdef CONFIG_NET_ALIAS - if (dev_base == dev) - dev_base = net_alias_nextdev(dev); - else - { - while(d && (net_alias_nextdev(d) != dev)) /* skip aliases */ - d = net_alias_nextdev(d); - - if (d && (net_alias_nextdev(d) == dev)) - { - /* - * Critical: Bypass by consider devices as blocks (maindev+aliases) - */ - net_alias_nextdev_set(d, net_alias_nextdev(dev)); - } -#else - if (dev_base == dev) - dev_base = dev->next; - else - { - while (d && (d->next != dev)) - d = d->next; - - if (d && (d->next == dev)) - { - d->next = dev->next; - } -#endif - else - { - printk("unregister_netdev: '%s' not found\n", dev->name); - restore_flags(flags); - return; - } - } - for (i = 0; i < MAX_ETH_CARDS; ++i) - { - if (ethdev_index[i] == dev) - { + for (i = 0; i < MAX_ETH_CARDS; ++i) { + if (ethdev_index[i] == dev) { ethdev_index[i] = NULL; break; } } +} + +int register_netdev(struct device *dev) +{ + int i=-1; - restore_flags(flags); + rtnl_lock(); - /* - * You can i.e use a interfaces in a route though it is not up. - * We call close_dev (which is changed: it will down a device even if - * dev->flags==0 (but it will not call dev->stop if IFF_UP - * is not set). - * This will call notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev), - * dev_mc_discard(dev), .... - */ - - dev_close(dev); + if (dev->name && + (dev->name[0] == '\0' || dev->name[0] == ' ')) + i = etherdev_get_index(dev); + + if (register_netdevice(dev)) { + if (i >= 0) + etherdev_put_index(dev); + rtnl_unlock(); + return -EIO; + } + rtnl_unlock(); + return 0; +} + +void unregister_netdev(struct device *dev) +{ + rtnl_lock(); + unregister_netdevice(dev); + etherdev_put_index(dev); + rtnl_unlock(); } + #ifdef CONFIG_TR /* The list of used and available "tr" slots */ #define MAX_TR_CARDS 16 @@ -488,15 +397,6 @@ break; } - if (new_device) { - /* Append the device to the device queue. */ - struct device **old_devp = &dev_base; - - while ((*old_devp)->next) - old_devp = & (*old_devp)->next; - (*old_devp)->next = dev; - dev->next = 0; - } dev->hard_header = tr_header; dev->rebuild_header = tr_rebuild_header; @@ -511,11 +411,9 @@ /* New-style flags. */ dev->flags = IFF_BROADCAST; - dev->family = AF_INET; - dev->pa_addr = 0; - dev->pa_brdaddr = 0; - dev->pa_mask = 0; - dev->pa_alen = 4; + + if (new_device) + register_netdevice(dev); return dev; } @@ -553,99 +451,21 @@ int register_trdev(struct device *dev) { - unsigned long flags; - dev_init_buffers(dev); - save_flags(flags); - - if (dev && dev->init) { - sti(); /* device probes assume interrupts enabled */ - if (dev->init(dev) != 0) { - unregister_trdev(dev); - restore_flags(flags); - return -EIO; - } - cli(); - + if (dev->init && dev->init(dev) != 0) { + unregister_trdev(dev); + return -EIO; } - restore_flags(flags); return 0; } void unregister_trdev(struct device *dev) { - struct device *d = dev_base; - unsigned long flags; - - save_flags(flags); - cli(); - - if (dev == NULL) - { - printk("was NULL\n"); - restore_flags(flags); - return; - } - /* else */ - if (dev->start) - printk("ERROR '%s' busy and not MOD_IN_USE.\n", dev->name); - - /* - * must jump over main_device+aliases - * avoid alias devices unregistration so that only - * net_alias module manages them - */ -#ifdef CONFIG_NET_ALIAS - if (dev_base == dev) - dev_base = net_alias_nextdev(dev); - else - { - while(d && (net_alias_nextdev(d) != dev)) /* skip aliases */ - d = net_alias_nextdev(d); - - if (d && (net_alias_nextdev(d) == dev)) - { - /* - * Critical: Bypass by consider devices as blocks (maindev+aliases) - */ - net_alias_nextdev_set(d, net_alias_nextdev(dev)); - } -#else - if (dev_base == dev) - dev_base = dev->next; - else - { - while (d && (d->next != dev)) - d = d->next; - - if (d && (d->next == dev)) - { - d->next = dev->next; - } -#endif - else - { - printk("unregister_trdev: '%s' not found\n", dev->name); - restore_flags(flags); - return; - } - } - + rtnl_lock(); + unregister_netdevice(dev); + rtnl_unlock(); tr_freedev(dev); - - restore_flags(flags); - - /* - * You can i.e use a interfaces in a route though it is not up. - * We call close_dev (which is changed: it will down a device even if - * dev->flags==0 (but it will not call dev->stop if IFF_UP - * is not set). - * This will call notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev), - * dev_mc_discard(dev), .... - */ - - dev_close(dev); } #endif @@ -655,6 +475,5 @@ * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/net/inet -Wall -Wstrict-prototypes -O6 -m486 -c net_init.c" * version-control: t * kept-new-versions: 5 - * tab-width: 4 * End: */ diff -u --recursive --new-file v2.1.67/linux/drivers/net/plip.c linux/drivers/net/plip.c --- v2.1.67/linux/drivers/net/plip.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/plip.c Sun Nov 30 14:00:38 1997 @@ -1,5 +1,3 @@ -#warning This wont work until we merge the networking changes -#if 0 /* $Id: plip.c,v 1.3.6.2 1997/04/16 15:07:56 phil Exp $ */ /* PLIP: A parallel port "network" driver for Linux. */ /* This driver is for parallel port with 5-bit cable (LapLink (R) cable). */ @@ -1238,4 +1236,3 @@ * compile-command: "gcc -DMODULE -DMODVERSIONS -D__KERNEL__ -Wall -Wstrict-prototypes -O2 -g -fomit-frame-pointer -pipe -m486 -c plip.c" * End: */ -#endif diff -u --recursive --new-file v2.1.67/linux/drivers/net/ppp.c linux/drivers/net/ppp.c --- v2.1.67/linux/drivers/net/ppp.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/ppp.c Sun Nov 30 14:00:38 1997 @@ -83,6 +83,7 @@ #include #include #include +#include #include #include #include @@ -682,10 +683,12 @@ if (tty != NULL && tty->disc_data == ppp) tty->disc_data = NULL; /* Break the tty->ppp link */ + rtnl_lock(); /* Strong layering violation. */ - if (dev && dev->flags & IFF_UP) { - dev_close (dev); /* close the device properly */ - } + if (dev && dev->flags & IFF_UP) { + dev_close (dev); /* close the device properly */ + } + rtnl_unlock(); ppp_free_buf (ppp->rbuf); ppp_free_buf (ppp->wbuf); @@ -3017,8 +3020,8 @@ */ if (!ppp->inuse) { dev_kfree_skb (skb, FREE_WRITE); - printk("I am dying to know, are you still alive?\n"); -#ifdef main_got_it_is_something + printk(KERN_WARNING "ppp: I am dying to know, are you still alive?\n"); +#if 0 dev_close (dev); #endif return 0; diff -u --recursive --new-file v2.1.67/linux/drivers/net/scc.c linux/drivers/net/scc.c --- v2.1.67/linux/drivers/net/scc.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/scc.c Sat Nov 29 16:29:37 1997 @@ -1,4 +1,4 @@ -#define RCS_ID "$Id: scc.c,v 1.69 1997/04/06 19:22:45 jreuter Exp jreuter $" +#define RCS_ID "$Id: scc.c,v 1.71 1997/11/29 19:59:20 jreuter Exp jreuter $" #define VERSION "3.0" #define BANNER "Z8530 SCC driver version "VERSION".dl1bke (experimental) by DL1BKE\n" @@ -89,6 +89,7 @@ 970108 - Fixed the remaining problems. 970402 - Hopefully fixed the problems with the new *_timer() routines, added calibration code. + 971012 - made SCC_DELAY a CONFIG option, added CONFIG_SCC_TRXECHO Thanks to all who contributed to this driver with ideas and bug reports! @@ -113,14 +114,14 @@ vy 73, Joerg Reuter ampr-net: dl1bke@db0pra.ampr.org AX-25 : DL1BKE @ DB0ACH.#NRW.DEU.EU - Internet: jreuter@lykos.oche.de + Internet: jreuter@poboxes.com + www : http://www.rat.de/jr */ /* ----------------------------------------------------------------------- */ -#undef SCC_DELAY /* perhaps your ISA bus is a *bit* too fast? */ -#undef SCC_LDELAY 1 /* slow it even a bit more down */ -#undef DONT_CHECK /* don't look if the SCCs you specified are available */ +#undef SCC_LDELAY 1 /* slow it even a bit more down */ +#undef DONT_CHECK /* don't look if the SCCs you specified are available */ #define MAXSCC 4 /* number of max. supported chips */ #define BUFSIZE 384 /* must not exceed 4096 */ @@ -224,7 +225,7 @@ static int Nchips = 0; static io_port Vector_Latch = 0; -MODULE_AUTHOR("Joerg Reuter "); +MODULE_AUTHOR("Joerg Reuter "); MODULE_DESCRIPTION("Network Device Driver for Z8530 based HDLC cards for Amateur Packet Radio"); MODULE_SUPPORTED_DEVICE("scc"); @@ -936,8 +937,10 @@ { /* force simplex operation */ if (tx) { +#ifdef CONFIG_SCC_TRXECHO cl(scc, R3, RxENABLE|ENT_HM); /* switch off receiver */ cl(scc, R15, DCDIE); /* No DCD changes, please */ +#endif set_brg(scc, time_const); /* reprogram baudrate generator */ /* DPLL -> Rx clk, BRG -> Tx CLK, TRxC mode output, TRxC = BRG */ @@ -951,29 +954,34 @@ /* DPLL -> Rx clk, DPLL -> Tx CLK, TRxC mode output, TRxC = DPLL */ wr(scc, R11, RCDPLL|TCDPLL|TRxCOI|TRxCDP); - +#ifdef CONFIG_SCC_TRXECHO or(scc,R3,RxENABLE|ENT_HM); or(scc,R15, DCDIE); +#endif } } else { if (tx) { +#ifdef CONFIG_SCC_TRXECHO if (scc->kiss.fulldup == KISS_DUPLEX_HALF) { cl(scc, R3, RxENABLE); cl(scc, R15, DCDIE); } +#endif or(scc,R5,RTS|TxENAB); /* enable tx */ } else { cl(scc,R5,RTS|TxENAB); /* disable tx */ - + +#ifdef CONFIG_SCC_TRXECHO if (scc->kiss.fulldup == KISS_DUPLEX_HALF) { or(scc, R3, RxENABLE|ENT_HM); or(scc, R15, DCDIE); } +#endif } } @@ -2198,7 +2206,7 @@ result = scc_init(); if (result == 0) - printk(KERN_INFO "Copyright 1993,1997 Joerg Reuter DL1BKE (jreuter@lykos.tng.oche.de)\n"); + printk(KERN_INFO "Copyright 1993,1997 Joerg Reuter DL1BKE (jreuter@poboxes.com)\n"); return result; } diff -u --recursive --new-file v2.1.67/linux/drivers/net/slip.c linux/drivers/net/slip.c --- v2.1.67/linux/drivers/net/slip.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/slip.c Sun Nov 30 14:00:38 1997 @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -733,6 +734,7 @@ return; } + rtnl_lock(); if (sl->dev->flags & IFF_UP) { /* STRONG layering violation! --ANK */ @@ -749,6 +751,8 @@ (void)del_timer (&sl->outfill_timer); #endif sl_free(sl); + unregister_netdevice(sl->dev); + rtnl_unlock(); MOD_DEC_USE_COUNT; } diff -u --recursive --new-file v2.1.67/linux/drivers/net/strip.c linux/drivers/net/strip.c --- v2.1.67/linux/drivers/net/strip.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/strip.c Sun Nov 30 14:00:38 1997 @@ -1,5 +1,3 @@ -#warning "will not compile until the networking is merged" -#if 0 /* * Copyright 1996 The Board of Trustees of The Leland Stanford * Junior University. All Rights Reserved. @@ -2782,4 +2780,3 @@ printk(KERN_INFO "STRIP: Module Unloaded\n"); } #endif /* MODULE */ -#endif diff -u --recursive --new-file v2.1.67/linux/drivers/net/tlan.c linux/drivers/net/tlan.c --- v2.1.67/linux/drivers/net/tlan.c Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/tlan.c Sun Nov 30 12:21:45 1997 @@ -244,12 +244,11 @@ priv->nextDevice = TLanDevices; TLanDevices = dev; TLanDevicesInstalled++; - printk("TLAN: %s irq=%2d io=%04x, %s\n", dev->name, (int) irq, io_base, TLan -DeviceList[dl_ix].deviceName ); + printk("TLAN: %s irq=%2d io=%04x, %s\n", dev->name, (int) irq, io_base, TLanDeviceList[dl_ix].deviceName ); } } - // printk( "TLAN: Found %d device(s).\n", TLanDevicesInstalled ); + /* printk( "TLAN: Found %d device(s).\n", TLanDevicesInstalled ); */ return ( ( TLanDevicesInstalled >= 0 ) ? 0 : -ENODEV ); @@ -399,8 +398,7 @@ * **************************************************************/ -int TLan_PciProbe( u8 *pci_bus, u8 *pci_dfn, u8 *pci_irq, u8 *pci_rev, u32 *pci_ -io_base, u32 *dl_ix ) +int TLan_PciProbe( u8 *pci_bus, u8 *pci_dfn, u8 *pci_irq, u8 *pci_rev, u32 *pci_io_base, u32 *dl_ix ) { static int dl_index = 0; static int pci_index = 0; @@ -438,10 +436,8 @@ pcibios_read_config_byte ( *pci_bus, *pci_dfn, PCI_REVISION_ID, pci_rev); pcibios_read_config_byte ( *pci_bus, *pci_dfn, PCI_INTERRUPT_LINE, pci_irq); pcibios_read_config_word ( *pci_bus, *pci_dfn, PCI_COMMAND, &pci_command); - pcibios_read_config_dword( *pci_bus, *pci_dfn, PCI_BASE_ADDRESS_0, pci_io_ba -se); - pcibios_read_config_byte ( *pci_bus, *pci_dfn, PCI_LATENCY_TIMER, &pci_laten -cy); + pcibios_read_config_dword( *pci_bus, *pci_dfn, PCI_BASE_ADDRESS_0, pci_io_base); + pcibios_read_config_byte ( *pci_bus, *pci_dfn, PCI_LATENCY_TIMER, &pci_latency); if (pci_latency < 0x10) { pcibios_write_config_byte( *pci_bus, *pci_dfn, PCI_LATENCY_TIMER, 0xff); @@ -452,8 +448,7 @@ pcibios_read_config_dword( *pci_bus, *pci_dfn, reg, pci_io_base); if ((pci_command & PCI_COMMAND_IO) && (*pci_io_base & 0x3)) { *pci_io_base &= PCI_BASE_ADDRESS_IO_MASK; - TLAN_DBG( TLAN_DEBUG_GNRL, "TLAN: IO mapping is available at %x.\n", *pc -i_io_base); + TLAN_DBG( TLAN_DEBUG_GNRL, "TLAN: IO mapping is available at %x.\n", *pci_io_base); break; } else { *pci_io_base = 0; @@ -592,11 +587,9 @@ TLanPrivateInfo *priv = (TLanPrivateInfo *) dev->priv; priv->tlanRev = TLan_DioRead8( dev->base_addr, TLAN_DEF_REVISION ); - err = request_irq( dev->irq, TLan_HandleInterrupt, SA_SHIRQ, TLanSignature, dev - ); + err = request_irq( dev->irq, TLan_HandleInterrupt, SA_SHIRQ, TLanSignature, dev); if ( err ) { - printk( "TLAN: Cannot open %s because IRQ %d is already in use.\n", dev->name -, dev->irq ); + printk( "TLAN: Cannot open %s because IRQ %d is already in use.\n", dev->name , dev->irq ); return -EAGAIN; } @@ -631,8 +624,7 @@ outl( TLAN_HC_GO | TLAN_HC_RT, dev->base_addr + TLAN_HOST_CMD ); } - TLAN_DBG( TLAN_DEBUG_GNRL, "TLAN: Device %s opened. Revision = %x\n", dev->na -me, priv->tlanRev ); + TLAN_DBG( TLAN_DEBUG_GNRL, "TLAN: Device %s opened. Revision = %x\n", dev->name, priv->tlanRev ); return 0; @@ -677,8 +669,7 @@ tail_list = priv->txList + priv->txTail; if ( tail_list->cStat != TLAN_CSTAT_UNUSED ) { - TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: %s is busy (Head=%d Tail=%d)\n", dev -->name, priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: %s is busy (Head=%d Tail=%d)\n", dev->name, priv->txHead, priv->txTail ); dev->tbusy = 1; priv->txBusyCount++; return 1; @@ -698,22 +689,19 @@ tail_list->buffer[1].count = 0; tail_list->buffer[1].address = 0; } - // are we transferring? + /* are we transferring? */ cli(); tail_list->cStat = TLAN_CSTAT_READY; if ( ! priv->txInProgress ) { priv->txInProgress = 1; outw( 0x4, dev->base_addr + TLAN_HOST_INT ); - TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Starting TX on buffer %d\n", priv->t -xTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Starting TX on buffer %d\n", priv->txTail ); outl( virt_to_bus( tail_list ), dev->base_addr + TLAN_CH_PARM ); outl( TLAN_HC_GO | TLAN_HC_ACK, dev->base_addr + TLAN_HOST_CMD ); } else { - TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Adding buffer %d to TX channel\n", p -riv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Adding buffer %d to TX channel\n", priv->txTail ); if ( priv->txTail == 0 ) - ( priv->txList + ( TLAN_NUM_TX_LISTS - 1 ) )->forward = virt_to_bus( tail_lis -t ); + ( priv->txList + ( TLAN_NUM_TX_LISTS - 1 ) )->forward = virt_to_bus( tail_list ); else ( priv->txList + ( priv->txTail - 1 ) )->forward = virt_to_bus( tail_list ); } @@ -764,14 +752,13 @@ dev = (struct device *) dev_id; if ( dev->interrupt ) - printk( "TLAN: Re-entering interrupt handler for %s: %d.\n" , dev->name, dev -->interrupt ); + printk( "TLAN: Re-entering interrupt handler for %s: %d.\n" , dev->name, dev->interrupt ); dev->interrupt++; cli(); host_int = inw( dev->base_addr + TLAN_HOST_INT ); - outw( host_int, dev->base_addr + TLAN_HOST_INT ); // Deactivate Ints + outw( host_int, dev->base_addr + TLAN_HOST_INT ); /* Deactivate Ints */ type = ( host_int & TLAN_HI_IT_MASK ) >> 2; @@ -852,10 +839,8 @@ /* Should only read stats if open ? */ TLan_ReadAndClearStats( dev, TLAN_RECORD ); - TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: %s EOC count = %d\n", dev->name, priv- ->rxEocCount ); - TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: %s Busy count = %d\n", dev->name, pri -v->txBusyCount ); + TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: %s EOC count = %d\n", dev->name, priv->rxEocCount ); + TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: %s Busy count = %d\n", dev->name, priv->txBusyCount ); if ( debug & TLAN_DEBUG_GNRL ) { TLan_PrintDio( dev->base_addr ); TLan_PhyPrint( dev ); @@ -971,7 +956,7 @@ u32 TLan_HandleInvalid( struct device *dev, u16 host_int ) { host_int = 0; - // printk( "TLAN: Invalid interrupt on %s.\n", dev->name ); + /* printk( "TLAN: Invalid interrupt on %s.\n", dev->name ); */ return 0; } /* TLan_HandleInvalid */ @@ -1008,8 +993,7 @@ TLanList *head_list; u32 ack = 1; - TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Handling TX EOF (Head=%d Tail=%d)\n", - priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Handling TX EOF (Head=%d Tail=%d)\n", priv->txHead, priv->txTail ); host_int = 0; head_list = priv->txList + priv->txHead; if ( head_list->cStat & TLAN_CSTAT_EOC ) @@ -1017,7 +1001,7 @@ if ( ! head_list->cStat & TLAN_CSTAT_FRM_CMP ) { printk( "TLAN: Received interrupt for uncompleted TX frame.\n" ); } - // printk( "Ack %d CSTAT=%hx\n", priv->txHead, head_list->cStat ); + /* printk( "Ack %d CSTAT=%hx\n", priv->txHead, head_list->cStat ); */ #if LINUX_KERNEL_VERSION > 0x20100 priv->stats->tx_bytes += head_list->frameSize; @@ -1029,8 +1013,7 @@ if ( priv->txHead >= TLAN_NUM_TX_LISTS ) priv->txHead = 0; if ( eoc ) { - TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Handling TX EOC (Head=%d Tail=%d)\n" -, priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Handling TX EOC (Head=%d Tail=%d)\n", priv->txHead, priv->txTail ); head_list = priv->txList + priv->txHead; if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) { outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); @@ -1042,14 +1025,14 @@ TLan_DioWrite8( dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT ); if ( priv->phyFlags & TLAN_PHY_ACTIVITY ) { if ( priv->timerSetAt == 0 ) { - // printk("TxEOF Starting timer...\n"); + /* printk("TxEOF Starting timer...\n"); */ priv->timerSetAt = jiffies; priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY; priv->timerType = TLAN_TIMER_ACT; add_timer( &priv->timer ); } else if ( priv->timerType == TLAN_TIMER_ACT ) { priv->timerSetAt = jiffies; - // printk("TxEOF continuing timer...\n"); + /* printk("TxEOF continuing timer...\n"); */ } } @@ -1125,8 +1108,7 @@ TLanList *tail_list; void *t; - TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: Handling RX EOF (Head=%d Tail=%d)\n", -priv->rxHead, priv->rxTail ); + TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: Handling RX EOF (Head=%d Tail=%d)\n", priv->rxHead, priv->rxTail ); host_int = 0; head_list = priv->rxList + priv->rxHead; tail_list = priv->rxList + priv->rxTail; @@ -1143,7 +1125,7 @@ skb->dev = dev; skb_reserve( skb, 2 ); t = (void *) skb_put( skb, head_list->frameSize ); - // printk( " %hd %p %p\n", head_list->frameSize, skb->data, t ); + /* printk( " %hd %p %p\n", head_list->frameSize, skb->data, t ); */ #if LINUX_KERNEL_VERSION > 0x20100 priv->stats->rx_bytes += head_list->frameSize; @@ -1165,8 +1147,7 @@ if ( priv->rxTail >= TLAN_NUM_RX_LISTS ) priv->rxTail = 0; if ( eoc ) { - TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: Handling RX EOC (Head=%d Tail=%d)\n", - priv->rxHead, priv->rxTail ); + TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: Handling RX EOC (Head=%d Tail=%d)\n", priv->rxHead, priv->rxTail ); head_list = priv->rxList + priv->rxHead; outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); ack |= TLAN_HC_GO | TLAN_HC_RT; @@ -1175,13 +1156,13 @@ TLan_DioWrite8( dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT ); if ( priv->phyFlags & TLAN_PHY_ACTIVITY ) { if ( priv->timerSetAt == 0 ) { - // printk("RxEOF Starting timer...\n"); + /* printk("RxEOF Starting timer...\n"); */ priv->timerSetAt = jiffies; priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY; priv->timerType = TLAN_TIMER_ACT; add_timer( &priv->timer ); } else if ( priv->timerType == TLAN_TIMER_ACT ) { - // printk("RxEOF tarting continuing timer...\n"); + /* printk("RxEOF tarting continuing timer...\n"); */ priv->timerSetAt = jiffies; } } @@ -1250,8 +1231,7 @@ host_int = 0; if ( priv->tlanRev < 0x30 ) { - TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Handling TX EOC (Head=%d Tail=%d) -- - IRQ\n", priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TLAN TRANSMIT: Handling TX EOC (Head=%d Tail=%d) -- IRQ\n", priv->txHead, priv->txTail ); head_list = priv->txList + priv->txHead; if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) { outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); @@ -1312,7 +1292,7 @@ priv->timerType = TLAN_TIMER_LINK; add_timer( &priv->timer ); } else { - //printk( " RX GO---->\n" ); + /*printk( " RX GO---->\n" ); */ outl( virt_to_bus( priv->rxList ), dev->base_addr + TLAN_CH_PARM ); outl( TLAN_HC_GO | TLAN_HC_RT, dev->base_addr + TLAN_HOST_CMD ); } @@ -1328,8 +1308,7 @@ TLan_PhyPrint( dev ); } } - TLAN_DBG( TLAN_DEBUG_GNRL, "TLAN: Status Check! %s Net_Sts=%x\n", dev->name, -(unsigned) net_sts ); + TLAN_DBG( TLAN_DEBUG_GNRL, "TLAN: Status Check! %s Net_Sts=%x\n", dev->name, (unsigned) net_sts ); } return ack; @@ -1367,8 +1346,7 @@ host_int = 0; if ( priv->tlanRev < 0x30 ) { - TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: Handling RX EOC (Head=%d Tail=%d) -- -IRQ\n", priv->rxHead, priv->rxTail ); + TLAN_DBG( TLAN_DEBUG_RX, "TLAN RECEIVE: Handling RX EOC (Head=%d Tail=%d) -- IRQ\n", priv->rxHead, priv->rxTail ); head_list = priv->rxList + priv->rxHead; outl( virt_to_bus( head_list ), dev->base_addr + TLAN_CH_PARM ); ack |= TLAN_HC_GO | TLAN_HC_RT; @@ -1427,8 +1405,7 @@ u16 gen_sts; TLanPrivateInfo *priv = (TLanPrivateInfo *) dev->priv; - // printk( "TLAN: %s Entered Timer, type = %d\n", dev->name, priv->timerType ) -; + /* printk( "TLAN: %s Entered Timer, type = %d\n", dev->name, priv->timerType ); */ switch ( priv->timerType ) { case TLAN_TIMER_LINK: @@ -1497,8 +1474,7 @@ for ( i = 0; i < TLAN_NUM_TX_LISTS; i++ ) { list = priv->txList + i; list->cStat = TLAN_CSTAT_UNUSED; - list->buffer[0].address = virt_to_bus( priv->txBuffer + ( i * TLAN_MAX_FRAME_S -IZE ) ); + list->buffer[0].address = virt_to_bus( priv->txBuffer + ( i * TLAN_MAX_FRAME_SIZE ) ); list->buffer[2].count = 0; list->buffer[2].address = 0; } @@ -1510,8 +1486,7 @@ list->cStat = TLAN_CSTAT_READY; list->frameSize = TLAN_MAX_FRAME_SIZE; list->buffer[0].count = TLAN_MAX_FRAME_SIZE | TLAN_LAST_BUFFER; - list->buffer[0].address = virt_to_bus( priv->rxBuffer + ( i * TLAN_MAX_FRAME_S -IZE ) ); + list->buffer[0].address = virt_to_bus( priv->rxBuffer + ( i * TLAN_MAX_FRAME_SIZE ) ); list->buffer[1].count = 0; list->buffer[1].address = 0; if ( i < TLAN_NUM_RX_LISTS - 1 ) @@ -1544,8 +1519,7 @@ u32 data0, data1; int i; - printk( "TLAN: Contents of internal registers for io base 0x%04hx.\n", io_bas -e ); + printk( "TLAN: Contents of internal registers for io base 0x%04hx.\n", io_base ); printk( "TLAN: Off. +0 +4\n" ); for ( i = 0; i < 0x4C; i+= 8 ) { data0 = TLan_DioRead32( io_base, i ); @@ -1583,10 +1557,9 @@ printk( "TLAN: Forward = 0x%08x\n", list->forward ); printk( "TLAN: CSTAT = 0x%04hx\n", list->cStat ); printk( "TLAN: Frame Size = 0x%04hx\n", list->frameSize ); - // for ( i = 0; i < 10; i++ ) { + /* for ( i = 0; i < 10; i++ ) { */ for ( i = 0; i < 2; i++ ) { - printk( "TLAN: Buffer[%d].count, addr = 0x%08x, 0x%08x\n", i, list->buffe -r[i].count, list->buffer[i].address ); + printk( "TLAN: Buffer[%d].count, addr = 0x%08x, 0x%08x\n", i, list->buffer[i].count, list->buffer[i].address ); } } /* TLan_PrintList */ @@ -1695,41 +1668,41 @@ u32 data; u8 data8; -// 1. Assert reset bit. +/* 1. Assert reset bit. */ data = inl(dev->base_addr + TLAN_HOST_CMD); data |= TLAN_HC_AD_RST; outl(data, dev->base_addr + TLAN_HOST_CMD); -// 2. Turn off interrupts. ( Probably isn't necessary ) +/* 2. Turn off interrupts. ( Probably isn't necessary ) */ data = inl(dev->base_addr + TLAN_HOST_CMD); data |= TLAN_HC_INT_OFF; outl(data, dev->base_addr + TLAN_HOST_CMD); -// 3. Clear AREGs and HASHs. +/* 3. Clear AREGs and HASHs. */ for ( i = TLAN_AREG_0; i <= TLAN_HASH_2; i += 4 ) { TLan_DioWrite32( dev->base_addr, (u16) i, 0 ); } -// 4. Setup NetConfig register. +/* 4. Setup NetConfig register. */ data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN | TLAN_NET_CFG_PHY_EN; TLan_DioWrite16( dev->base_addr, TLAN_NET_CONFIG, (u16) data ); -// 5. Load Ld_Tmr and Ld_Thr in HOST_CMD. +/* 5. Load Ld_Tmr and Ld_Thr in HOST_CMD. */ outl( TLAN_HC_LD_TMR | 0x0, dev->base_addr + TLAN_HOST_CMD ); outl( TLAN_HC_LD_THR | 0x1, dev->base_addr + TLAN_HOST_CMD ); -// 6. Unreset the MII by setting NMRST (in NetSio) to 1. +/* 6. Unreset the MII by setting NMRST (in NetSio) to 1. */ outw( TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR ); addr = dev->base_addr + TLAN_DIO_DATA + TLAN_NET_SIO; TLan_SetBit( TLAN_NET_SIO_NMRST, addr ); -// 7. Setup the remaining registers. +/* 7. Setup the remaining registers. */ if ( priv->tlanRev >= 0x30 ) { data8 = TLAN_ID_TX_EOC | TLAN_ID_RX_EOC; @@ -1947,17 +1920,18 @@ u16 vendor; u16 device; - priv->phyCheck = &TLan_PhyNop; // Make sure these aren't ever NULL + priv->phyCheck = &TLan_PhyNop; /* Make sure these aren't ever NULL */ priv->phyService = &TLan_PhyNop; vendor = TLanDeviceList[priv->pciEntry].vendorId; device = TLanDeviceList[priv->pciEntry].deviceId; - // This is a bit uglier than I'd like, but the 0xF130 device must - // NOT be assigned a valid PHY as it uses an unmanaged, bit-rate - // PHY. It is simplest just to use another goto, rather than - // nesting the two for loops in the if statement. - + /* + * This is a bit uglier than I'd like, but the 0xF130 device must + * NOT be assigned a valid PHY as it uses an unmanaged, bit-rate + * PHY. It is simplest just to use another goto, rather than + * nesting the two for loops in the if statement. + */ if ( ( vendor == PCI_VENDOR_ID_COMPAQ ) && ( device == PCI_DEVICE_ID_NETFLEX_3P ) ) { entry = 0; @@ -2085,8 +2059,7 @@ TLan_MiiReadReg( io, phy, MII_GEN_CTL, &gen_ctl ); if ( gen_ctl & MII_GC_PDOWN ) { TLan_MiiSync( io ); - TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_PDOWN | MII_GC_LOOPBK | MII_GC_ -ISOLATE ); + TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_PDOWN | MII_GC_LOOPBK | MII_GC_ISOLATE ); TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_LOOPBK ); udelay(50000); TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_RESET | MII_GC_LOOPBK ); @@ -2097,8 +2070,8 @@ while ( value & MII_GC_RESET ) TLan_MiiReadReg( io, phy, MII_GEN_CTL, &value ); - // TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_LOOPBK | MII_GC_DUPLEX ); - // TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_DUPLEX ); + /* TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_LOOPBK | MII_GC_DUPLEX ); */ + /* TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_DUPLEX ); */ TLan_MiiWriteReg( io, phy, MII_GEN_CTL, 0 ); udelay(500000); @@ -2110,9 +2083,9 @@ value &= ~TLAN_TC_AUISEL; TLan_MiiWriteReg( io, phy, TLAN_TLPHY_CTL, value ); - // Read Possible Latched Link Status + /* Read Possible Latched Link Status */ TLan_MiiReadReg( io, phy, MII_GEN_STS, &value ); - // Read Real Link Status + /* Read Real Link Status */ TLan_MiiReadReg( io, phy, MII_GEN_STS, &value ); if ( ( value & MII_GS_LINK ) || aui ) { priv->phyOnline = 1; @@ -2122,7 +2095,7 @@ TLan_DioWrite8( io, TLAN_LED_REG, 0 ); } - // Enable Interrupts + /* Enable Interrupts */ TLan_MiiReadReg( io, phy, TLAN_TLPHY_CTL, &value ); value |= TLAN_TC_INTEN; TLan_MiiWriteReg( io, phy, TLAN_TLPHY_CTL, value ); @@ -2223,11 +2196,9 @@ TLan_MiiReadReg( io, phy, MII_GEN_CTL, &gen_ctl ); if ( gen_ctl & MII_GC_PDOWN ) { TLan_MiiSync( io ); - TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_PDOWN | MII_GC_LOOPBK | MII_GC_ -ISOLATE ); + TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_PDOWN | MII_GC_LOOPBK | MII_GC_ISOLATE ); TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_LOOPBK ); - for ( i = 0; i < 500000; i++ ) - SLOW_DOWN_IO; + udelay(500000); TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_RESET | MII_GC_LOOPBK ); TLan_MiiSync( io ); } @@ -2236,8 +2207,8 @@ while ( value & MII_GC_RESET ) TLan_MiiReadReg( io, phy, MII_GEN_CTL, &value ); - // TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_LOOPBK | MII_GC_DUPLEX ); - // TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_DUPLEX ); + /* TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_LOOPBK | MII_GC_DUPLEX ); */ + /* TLan_MiiWriteReg( io, phy, MII_GEN_CTL, MII_GC_DUPLEX ); */ TLan_MiiWriteReg( io, phy, MII_GEN_CTL, 0 ); TLan_MiiReadReg( io, phy, MII_AN_ADV, &value ); value &= ~0x0140; @@ -2245,13 +2216,11 @@ TLan_MiiWriteReg( io, phy, MII_GEN_CTL, 0x1000 ); TLan_MiiWriteReg( io, phy, MII_GEN_CTL, 0x1200 ); - for ( i = 0; i < 50000; i++ ) - SLOW_DOWN_IO; - -/* - // Read Possible Latched Link Status + udelay(50000); +#if 0 + /* Read Possible Latched Link Status */ TLan_MiiReadReg( io, phy, MII_GEN_STS, &value ); - // Read Real Link Status + /* Read Real Link Status */ TLan_MiiReadReg( io, phy, MII_GEN_STS, &value ); if ( value & MII_GS_LINK ) { priv->phyOnline = 1; @@ -2261,15 +2230,15 @@ TLan_DioWrite8( io, TLAN_LED_REG, 0 ); } - // Enable Interrupts + /* Enable Interrupts */ TLan_MiiReadReg( io, phy, TLAN_TLPHY_CTL, &value ); value |= TLAN_TC_INTEN; TLan_MiiWriteReg( io, phy, TLAN_TLPHY_CTL, value ); -*/ +#endif sio = TLan_DioRead8( dev->base_addr, TLAN_NET_SIO ); sio &= ~TLAN_NET_SIO_MINTEN; TLan_DioWrite8( dev->base_addr, TLAN_NET_SIO, sio ); -// priv->phyOnline = 1; +/* priv->phyOnline = 1; */ return 0; @@ -2592,7 +2561,7 @@ outw( TLAN_NET_SIO, io_base + TLAN_DIO_ADR ); sio = io_base + TLAN_DIO_DATA + TLAN_NET_SIO; - // Assume clock is low, tx is enabled; + /* Assume clock is low, tx is enabled; */ for ( place = 0x80; place != 0; place >>= 1 ) { if ( place & data ) TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); @@ -2608,8 +2577,7 @@ TLan_SetBit( TLAN_NET_SIO_ETXEN, sio ); if ( ( ! err ) && stop ) { - TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); // STOP, raise data while clock is h -igh + TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); /* STOP, raise data while clock is high */ TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); } @@ -2654,7 +2622,7 @@ sio = io_base + TLAN_DIO_DATA + TLAN_NET_SIO; *data = 0; - // Assume clock is low, tx is enabled; + /* Assume clock is low, tx is enabled; */ TLan_ClearBit( TLAN_NET_SIO_ETXEN, sio ); for ( place = 0x80; place; place >>= 1 ) { TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); @@ -2665,15 +2633,14 @@ TLan_SetBit( TLAN_NET_SIO_ETXEN, sio ); if ( ! stop ) { - TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); // Ack = 0 + TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); /* Ack = 0 */ TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio ); } else { - TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); // No ack = 1 (?) + TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); /* No ack = 1 (?) */ TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio ); - TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); // STOP, raise data while clock is h -igh + TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); /* STOP, raise data while clock is high */ TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); } diff -u --recursive --new-file v2.1.67/linux/drivers/net/tlan.h linux/drivers/net/tlan.h --- v2.1.67/linux/drivers/net/tlan.h Sat Nov 29 11:25:10 1997 +++ linux/drivers/net/tlan.h Sun Nov 30 12:21:45 1997 @@ -483,26 +483,19 @@ { return ( ( a && ! b ) || ( ! a && b ) ); } -#define XOR8( a, b, c, d, e, f, g, h ) xor( a, xor( b, xor( c, xor( d, xor( e, x -or( f, xor( g, h ) ) ) ) ) ) ) +#define XOR8( a, b, c, d, e, f, g, h ) xor( a, xor( b, xor( c, xor( d, xor( e, xor( f, xor( g, h ) ) ) ) ) ) ) #define DA( a, bit ) ( ( (u8) a[bit/8] ) & ( (u8) ( 1 << bit%8 ) ) ) inline u32 TLan_HashFunc( u8 *a ) { u32 hash; - hash = XOR8( DA(a,0), DA(a, 6), DA(a,12), DA(a,18), DA(a,24), DA(a,30), DA(a,3 -6), DA(a,42) ); - hash |= XOR8( DA(a,1), DA(a, 7), DA(a,13), DA(a,19), DA(a,25), DA(a,31), DA(a,3 -7), DA(a,43) ) << 1; - hash |= XOR8( DA(a,2), DA(a, 8), DA(a,14), DA(a,20), DA(a,26), DA(a,32), DA(a,3 -8), DA(a,44) ) << 2; - hash |= XOR8( DA(a,3), DA(a, 9), DA(a,15), DA(a,21), DA(a,27), DA(a,33), DA(a,3 -9), DA(a,45) ) << 3; - hash |= XOR8( DA(a,4), DA(a,10), DA(a,16), DA(a,22), DA(a,28), DA(a,34), DA(a,4 -0), DA(a,46) ) << 4; - hash |= XOR8( DA(a,5), DA(a,11), DA(a,17), DA(a,23), DA(a,29), DA(a,35), DA(a,4 -1), DA(a,47) ) << 5; + hash = XOR8( DA(a,0), DA(a, 6), DA(a,12), DA(a,18), DA(a,24), DA(a,30), DA(a,36), DA(a,42) ); + hash |= XOR8( DA(a,1), DA(a, 7), DA(a,13), DA(a,19), DA(a,25), DA(a,31), DA(a,37), DA(a,43) ) << 1; + hash |= XOR8( DA(a,2), DA(a, 8), DA(a,14), DA(a,20), DA(a,26), DA(a,32), DA(a,38), DA(a,44) ) << 2; + hash |= XOR8( DA(a,3), DA(a, 9), DA(a,15), DA(a,21), DA(a,27), DA(a,33), DA(a,39), DA(a,45) ) << 3; + hash |= XOR8( DA(a,4), DA(a,10), DA(a,16), DA(a,22), DA(a,28), DA(a,34), DA(a,40), DA(a,46) ) << 4; + hash |= XOR8( DA(a,5), DA(a,11), DA(a,17), DA(a,23), DA(a,29), DA(a,35), DA(a,41), DA(a,47) ) << 5; return hash; diff -u --recursive --new-file v2.1.67/linux/drivers/pci/pci.c linux/drivers/pci/pci.c --- v2.1.67/linux/drivers/pci/pci.c Wed Nov 26 16:24:02 1997 +++ linux/drivers/pci/pci.c Sun Nov 30 13:48:48 1997 @@ -162,7 +162,7 @@ DEVICE( MOTOROLA, MOTOROLA_MPC105,"MPC105 Eagle"), DEVICE( MOTOROLA, MOTOROLA_MPC106,"MPC106 Grackle"), DEVICE( MOTOROLA, MOTOROLA_RAVEN, "Raven"), - DEVICE( PROMISE, PROMISE_IDE_UDMA,"IDE Ultra DMA/33"), + DEVICE( PROMISE, PROMISE_20246, "IDE UltraDMA/33"), DEVICE( PROMISE, PROMISE_5300, "DC5030"), DEVICE( N9, N9_I128, "Imagine 128"), DEVICE( N9, N9_I128_2, "Imagine 128v2"), diff -u --recursive --new-file v2.1.67/linux/drivers/scsi/BusLogic.h linux/drivers/scsi/BusLogic.h --- v2.1.67/linux/drivers/scsi/BusLogic.h Sat Nov 29 11:25:10 1997 +++ linux/drivers/scsi/BusLogic.h Sun Nov 30 14:11:23 1997 @@ -1526,15 +1526,11 @@ static inline void BusLogic_Delay(int Seconds) { + int Milliseconds = 1000 * Seconds; unsigned long ProcessorFlags; save_flags(ProcessorFlags); sti(); - while (--Seconds >= 0) { - int i = 1000; - do { - udelay(1000); - } while (--i); - } + while (--Milliseconds >= 0) udelay(1000); restore_flags(ProcessorFlags); } diff -u --recursive --new-file v2.1.67/linux/drivers/scsi/Config.in linux/drivers/scsi/Config.in --- v2.1.67/linux/drivers/scsi/Config.in Mon Nov 17 18:47:21 1997 +++ linux/drivers/scsi/Config.in Sun Nov 30 11:34:27 1997 @@ -85,7 +85,7 @@ if [ "$CONFIG_MCA" = "y" ]; then dep_tristate 'IBMMCA SCSI support' CONFIG_SCSI_IBMMCA $CONFIG_SCSI if [ "$CONFIG_SCSI_IBMMCA" != "n" ]; then - bool ' reset SCSI-devices while booting' SCSI_IBMMCA_DEV_RESET + bool ' reset SCSI-devices while booting' CONFIG_SCSI_IBMMCA_DEV_RESET fi fi if [ "$CONFIG_PARPORT" != "n" ]; then diff -u --recursive --new-file v2.1.67/linux/drivers/scsi/ibmmca.c linux/drivers/scsi/ibmmca.c --- v2.1.67/linux/drivers/scsi/ibmmca.c Mon Oct 20 10:36:52 1997 +++ linux/drivers/scsi/ibmmca.c Sun Nov 30 11:34:27 1997 @@ -199,11 +199,10 @@ 5) Magneto-Optical drives and medium-changers are also recognized, now. Therefore, we have a completely gapfree recognition of all SCSI- device-types, that are known by Linux up to kernel 2.1.31. - 6) The flag SCSI_IBMMCA_DEV_RESET has been inserted. If it is set within - the configuration, each connected SCSI-device will get a reset command - during boottime. This can be necessary for some special SCSI-devices. - This flag should be included in Config.in. - (See also the new Config.in file.) + 6) The flag CONFIG_SCSI_IBMMCA_DEV_RESET has been inserted. If it is set + within the configuration, each connected SCSI-device will get a reset + command during boottime. This can be necessary for some special + SCSI-devices. (See also the new Config.in file.) Probable next improvement: bad disk handler. - Michael Lang @@ -1164,7 +1163,7 @@ if (device_exists (shpnt, ldn, &ld[ldn].block_length, &ld[ldn].device_type)) { -#ifdef SCSI_IBMMCA_DEV_RESET +#ifdef CONFIG_SCSI_IBMMCA_DEV_RESET int ticks; printk("(resetting)"); ticks = IM_RESET_DELAY*HZ; diff -u --recursive --new-file v2.1.67/linux/drivers/sound/Makefile linux/drivers/sound/Makefile --- v2.1.67/linux/drivers/sound/Makefile Sat Nov 29 11:25:10 1997 +++ linux/drivers/sound/Makefile Sun Nov 30 13:07:12 1997 @@ -34,7 +34,7 @@ else ifeq (.defines,$(wildcard .defines)) -#include .defines +include .defines include .objects endif @@ -109,10 +109,10 @@ endif endif -ifeq ($(CONFIG_ADLIB),y) +ifeq ($(CONFIG_MSS),y) LX_OBJS += ad1848.o else - ifeq ($(CONFIG_ADLIB),m) + ifeq ($(CONFIG_MSS),m) MX_OBJS += ad1848.o endif endif diff -u --recursive --new-file v2.1.67/linux/drivers/sound/dmabuf.c linux/drivers/sound/dmabuf.c --- v2.1.67/linux/drivers/sound/dmabuf.c Sat Nov 29 11:25:11 1997 +++ linux/drivers/sound/dmabuf.c Sun Nov 30 10:30:18 1997 @@ -1539,10 +1539,11 @@ { /* This routine is called when driver is being unloaded */ #ifdef RUNTIME_DMA_ALLOC - sound_free_dmap (dev, audio_devs[dev]->dmap_out, - audio_devs[dev]->dmap_out->dma); + if (audio_devs[dev]) + sound_free_dmap (dev, audio_devs[dev]->dmap_out, + audio_devs[dev]->dmap_out->dma); - if (audio_devs[dev]->flags & DMA_DUPLEX) + if (audio_devs[dev] && audio_devs[dev]->flags & DMA_DUPLEX) sound_free_dmap (dev, audio_devs[dev]->dmap_in, audio_devs[dev]->dmap_in->dma); #endif diff -u --recursive --new-file v2.1.67/linux/drivers/sound/lowlevel/awe_wave.c linux/drivers/sound/lowlevel/awe_wave.c --- v2.1.67/linux/drivers/sound/lowlevel/awe_wave.c Sat Nov 29 11:25:11 1997 +++ linux/drivers/sound/lowlevel/awe_wave.c Sun Nov 30 13:07:12 1997 @@ -551,7 +551,7 @@ INIT_TABLE(samples, max_samples, AWE_MAX_SAMPLES, awe_sample_list); INIT_TABLE(infos, max_infos, AWE_MAX_INFOS, awe_voice_list); - if (my_dev=sound_alloc_synthdev()) + if ((my_dev=sound_alloc_synthdev())!=-1) printk(KERN_WARNING "AWE32 Error: too many synthesizers\n"); else { voice_alloc = &awe_operations.alloc; @@ -560,7 +560,7 @@ } #ifdef CONFIG_AWE32_MIXER - if (my_mixerdev=sound_alloc_mixerdev()) { + if ((my_mixerdev=sound_alloc_mixerdev())!=-1) { mixer_devs[my_mixerdev] = &awe_mixer_operations; } #endif diff -u --recursive --new-file v2.1.67/linux/drivers/sound/pss.c linux/drivers/sound/pss.c --- v2.1.67/linux/drivers/sound/pss.c Sat Nov 29 11:25:11 1997 +++ linux/drivers/sound/pss.c Sun Nov 30 10:30:18 1997 @@ -116,17 +116,15 @@ if (devc->base != 0x230 && devc->base != 0x250) /* Some cards use these */ return 0; - if (check_region(devc->base, 16)) - { - printk("PSS: I/O port conflict\n"); - return 0; - } + if (check_region(devc->base, 16)) { + printk(KERN_ERR "PSS: I/O port conflict\n"); + return 0; + } id = inw(REG(PSS_ID)); - if ((id >> 8) != 'E') - { - /* printk( "No PSS signature detected at 0x%x (0x%x)\n", devc->base, id); */ - return 0; - } + if ((id >> 8) != 'E') { + printk(KERN_ERR "No PSS signature detected at 0x%x (0x%x)\n", devc->base, id); + return 0; + } return 1; } @@ -855,14 +853,28 @@ #ifdef MODULE -int io = -1; -int irq = -1; -int dma = -1; +int pss_io = 0x220; -int pssmpu, pssmss; -struct address_info cfg; +int mss_io = 0x530; +int mss_irq = 11; +int mss_dma = 1; + +int mpu_io = 0x330; +int mpu_irq = -1; + +struct address_info cfgpss = { 0 /* pss_io */, 0, -1, -1 }; +struct address_info cfgmpu = { 0 /* mpu_io */, 0 /* mpu_irq */, 0, -1 }; +struct address_info cfgmss = { 0 /* mss_io */, 0 /* mss_irq */, 0 /* mss_dma */, -1 }; + +MODULE_PARM(pss_io, "i"); +MODULE_PARM(mss_io, "i"); +MODULE_PARM(mss_irq, "i"); +MODULE_PARM(mss_dma, "i"); +MODULE_PARM(mpu_io, "i"); +MODULE_PARM(mpu_irq, "i"); static int fw_load = 0; +static int pssmpu = 0, pssmss = 0; /* * Load a PSS sound card module @@ -871,34 +883,39 @@ int init_module(void) { - if (io == -1 || irq == -1 || dma == -1) - { +#if 0 + if (pss_io == -1 || irq == -1 || dma == -1) { printk("pss: dma, irq and io must be set.\n"); return -EINVAL; - } - cfg.io_base = io; - cfg.irq = irq; + } +#endif + cfgpss.io_base = pss_io; - if (!pss_synth) - { - fw_load = 1; - pss_synthLen = mod_firmware_load("/etc/sound/pss_synth", (void *) &pss_synth); - } - if (probe_pss(&cfg)) + cfgmss.io_base = mss_io; + cfgmss.irq = mss_irq; + cfgmss.dma = mss_dma; + + cfgmpu.io_base = mpu_io; + cfgmpu.irq = mpu_irq; + + if (!pss_synth) { + fw_load = 1; + pss_synthLen = mod_firmware_load("/etc/sound/pss_synth", (void *) &pss_synth); + } + if (!probe_pss(&cfgpss)) return -ENODEV; + attach_pss(&cfgpss); /* * Attach stuff */ - if (probe_pss_mpu(&cfg)) - { - pssmpu = 1; - attach_pss_mpu(&cfg); - } - if (probe_pss_mss(&cfg)) - { - pssmss = 1; - attach_pss_mss(&cfg); - } + if (probe_pss_mpu(&cfgmpu)) { + pssmpu = 1; + attach_pss_mpu(&cfgmpu); + } + if (probe_pss_mss(&cfgmss)) { + pssmss = 1; + attach_pss_mss(&cfgmss); + } SOUND_LOCK; return 0; } @@ -909,10 +926,10 @@ if (fw_load && pss_synth) kfree(pss_synth); if (pssmss) - unload_pss_mss(&cfg); + unload_pss_mss(&cfgmss); if (pssmpu) - unload_pss_mpu(&cfg); - unload_pss(&cfg); + unload_pss_mpu(&cfgmpu); + unload_pss(&cfgpss); SOUND_LOCK_END; } #endif diff -u --recursive --new-file v2.1.67/linux/fs/autofs/waitq.c linux/fs/autofs/waitq.c --- v2.1.67/linux/fs/autofs/waitq.c Sat Oct 25 02:44:17 1997 +++ linux/fs/autofs/waitq.c Sun Nov 30 10:59:02 1997 @@ -42,30 +42,34 @@ static int autofs_write(struct file *file, const void *addr, int bytes) { - unsigned long fs; - unsigned long old_signal; + unsigned long fs, sigpipe, flags; const char *data = (const char *)addr; - int written = 0; + ssize_t wr = 0; /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/ + sigpipe = sigismember(¤t->signal, SIGPIPE); + /* Save pointer to user space and point back to kernel space */ fs = get_fs(); set_fs(KERNEL_DS); - old_signal = current->signal; - - while ( bytes && (written = file->f_op->write(file,data,bytes,&file->f_pos)) > 0 ) { - data += written; - bytes -= written; + while (bytes && + (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { + data += wr; + bytes -= wr; } - if ( written == -EPIPE && !(old_signal & (1 << (SIGPIPE-1))) ) { - /* Keep the currently executing process from receiving a - SIGPIPE unless it was already supposed to get one */ - current->signal &= ~(1 << (SIGPIPE-1)); - } set_fs(fs); + + /* Keep the currently executing process from receiving a + SIGPIPE unless it was already supposed to get one */ + if (wr == -EPIPE && !sigpipe) { + spin_lock_irqsave(¤t->sigmask_lock, flags); + sigdelset(¤t->signal, SIGPIPE); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + } return (bytes > 0); } diff -u --recursive --new-file v2.1.67/linux/fs/buffer.c linux/fs/buffer.c --- v2.1.67/linux/fs/buffer.c Wed Nov 26 16:24:03 1997 +++ linux/fs/buffer.c Sun Nov 30 10:59:02 1997 @@ -1944,7 +1944,10 @@ /* If there are still a lot of dirty buffers around, skip the sleep and flush some more */ if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) { - current->signal = 0; + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + spin_unlock_irq(¤t->sigmask_lock); + interruptible_sleep_on(&bdflush_wait); } } diff -u --recursive --new-file v2.1.67/linux/fs/dcache.c linux/fs/dcache.c --- v2.1.67/linux/fs/dcache.c Tue Nov 18 17:22:08 1997 +++ linux/fs/dcache.c Sun Nov 30 10:59:02 1997 @@ -129,7 +129,7 @@ return; } - printk("Negative d_count (%d) for %s/%s\n", + printk(KERN_CRIT "Negative d_count (%d) for %s/%s\n", count, dentry->d_parent->d_name.name, dentry->d_name.name); @@ -378,7 +378,7 @@ if (!list_empty(&dentry->d_subdirs)) { this_parent = dentry; #ifdef DCACHE_DEBUG -printk("select_parent: descending to %s/%s, found=%d\n", +printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n", dentry->d_parent->d_name.name, dentry->d_name.name, found); #endif goto repeat; @@ -391,7 +391,7 @@ next = this_parent->d_child.next; this_parent = this_parent->d_parent; #ifdef DCACHE_DEBUG -printk("select_parent: ascending to %s/%s, found=%d\n", +printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n", this_parent->d_parent->d_name.name, this_parent->d_name.name, found); #endif goto resume; @@ -439,7 +439,7 @@ goal = 50; count = select_dcache(32, goal); #ifdef DCACHE_DEBUG -printk("check_dcache_memory: goal=%d, count=%d\n", goal, count); +printk(KERN_DEBUG "check_dcache_memory: goal=%d, count=%d\n", goal, count); #endif if (count) { prune_dcache(count); @@ -678,7 +678,7 @@ void d_move(struct dentry * dentry, struct dentry * target) { if (!dentry->d_inode) - printk("VFS: moving negative dcache entry\n"); + printk(KERN_WARNING "VFS: moving negative dcache entry\n"); /* Move the dentry to the target hash queue */ list_del(&dentry->d_hash); diff -u --recursive --new-file v2.1.67/linux/fs/exec.c linux/fs/exec.c --- v2.1.67/linux/fs/exec.c Sat Oct 25 02:44:17 1997 +++ linux/fs/exec.c Sun Nov 30 10:59:02 1997 @@ -459,18 +459,10 @@ * so that a new one can be started */ -static inline void flush_old_signals(struct signal_struct *sig) +static inline void flush_old_signals(struct task_struct *t) { - int i; - struct sigaction * sa = sig->action; - - for (i=32 ; i != 0 ; i--) { - sa->sa_mask = 0; - sa->sa_flags = 0; - if (sa->sa_handler != SIG_IGN) - sa->sa_handler = NULL; - sa++; - } + flush_signals(t); + flush_signal_handlers(t); } static inline void flush_old_files(struct files_struct * files) @@ -531,7 +523,7 @@ permission(bprm->dentry->d_inode,MAY_READ)) current->dumpable = 0; - flush_old_signals(current->sig); + flush_old_signals(current); flush_old_files(current->files); return 0; diff -u --recursive --new-file v2.1.67/linux/fs/fcntl.c linux/fs/fcntl.c --- v2.1.67/linux/fs/fcntl.c Wed Sep 24 20:05:48 1997 +++ linux/fs/fcntl.c Sun Nov 30 10:59:02 1997 @@ -178,7 +178,7 @@ (euid ^ p->suid) && (euid ^ p->uid) && (uid ^ p->suid) && (uid ^ p->uid)) continue; - p->signal |= 1 << (SIGIO-1); + send_sig(SIGIO, p, 1); if (p->state == TASK_INTERRUPTIBLE && signal_pending(p)) wake_up_process(p); } diff -u --recursive --new-file v2.1.67/linux/fs/lockd/clntproc.c linux/fs/lockd/clntproc.c --- v2.1.67/linux/fs/lockd/clntproc.c Wed Oct 15 16:04:23 1997 +++ linux/fs/lockd/clntproc.c Sun Nov 30 10:59:02 1997 @@ -90,7 +90,8 @@ struct nfs_server *nfssrv = NFS_SERVER(inode); struct nlm_host *host; struct nlm_rqst reqst, *call = &reqst; - unsigned long oldmask; + sigset_t oldset; + unsigned long flags; int status; /* Always use NLM version 1 over UDP for now... */ @@ -114,16 +115,21 @@ } /* Keep the old signal mask */ - oldmask = current->blocked; + spin_lock_irqsave(¤t->sigmask_lock, flags); + oldset = current->blocked; /* If we're cleaning up locks because the process is exiting, * perform the RPC call asynchronously. */ if (cmd == F_SETLK && fl->fl_type == F_UNLCK - && (current->flags & PF_EXITING)) { - current->blocked = ~0UL; /* Mask all signals */ + && (current->flags & PF_EXITING)) { + sigfillset(¤t->blocked); /* Mask all signals */ + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + call = nlmclnt_alloc_call(); call->a_flags = RPC_TASK_ASYNC; } else { + spin_unlock_irqrestore(¤t->sigmask_lock, flags); call->a_flags = 0; } call->a_host = host; @@ -145,7 +151,10 @@ if (status < 0 && (call->a_flags & RPC_TASK_ASYNC)) rpc_free(call); - current->blocked = oldmask; + spin_lock_irqsave(¤t->sigmask_lock, flags); + current->blocked = oldset; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); done: dprintk("lockd: clnt proc returns %d\n", status); @@ -454,11 +463,16 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) { struct nlm_rqst *req; - unsigned long oldmask = current->blocked; + unsigned long flags; + sigset_t oldset; int status; /* Block all signals while setting up call */ - current->blocked = ~0UL; + spin_lock_irqsave(¤t->sigmask_lock, flags); + oldset = current->blocked; + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); do { req = (struct nlm_rqst *) rpc_allocate(RPC_TASK_ASYNC, @@ -474,7 +488,11 @@ if (status < 0) rpc_free(req); - current->blocked = oldmask; + spin_lock_irqsave(¤t->sigmask_lock, flags); + current->blocked = oldset; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + return status; } diff -u --recursive --new-file v2.1.67/linux/fs/lockd/svc.c linux/fs/lockd/svc.c --- v2.1.67/linux/fs/lockd/svc.c Wed Oct 15 16:04:23 1997 +++ linux/fs/lockd/svc.c Sun Nov 30 10:59:02 1997 @@ -37,8 +37,7 @@ #define NLMDBG_FACILITY NLMDBG_SVC #define LOCKD_BUFSIZE (1024 + NLMSSVC_XDRSIZE) -#define BLOCKABLE_SIGS (~(_S(SIGKILL) | _S(SIGSTOP))) -#define _S(sig) (1 << ((sig) - 1)) +#define BLOCKABLE_SIGS (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) extern struct svc_program nlmsvc_program; struct nlmsvc_binding * nlmsvc_ops = NULL; @@ -65,7 +64,6 @@ lockd(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; - sigset_t oldsigmask; int err = 0; /* Lock module and set up kernel thread */ @@ -118,8 +116,11 @@ */ while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) { - if (signalled()) - current->signal = 0; + if (signalled()) { + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + spin_unlock_irq(¤t->sigmask_lock); + } /* * Retry any blocked locks that have been notified by @@ -162,10 +163,17 @@ } /* Process request with all signals blocked. */ - oldsigmask = current->blocked; - current->blocked = BLOCKABLE_SIGS; + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, ~BLOCKABLE_SIGS); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + svc_process(serv, rqstp); - current->blocked = oldsigmask; + + spin_lock_irq(¤t->sigmask_lock); + sigemptyset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); /* Unlock export hash tables */ if (nlmsvc_ops) diff -u --recursive --new-file v2.1.67/linux/fs/ncpfs/sock.c linux/fs/ncpfs/sock.c --- v2.1.67/linux/fs/ncpfs/sock.c Mon Nov 17 18:47:22 1997 +++ linux/fs/ncpfs/sock.c Sun Nov 30 10:59:02 1997 @@ -75,8 +75,6 @@ #define NCP_SLACK_SPACE 1024 -#define _S(nr) (1<<((nr)-1)) - static int do_ncp_rpc_call(struct ncp_server *server, int size) { struct file *file; @@ -93,7 +91,8 @@ int major_timeout_seen; int acknowledge_seen; int n; - unsigned long old_mask; + sigset_t old_set; + unsigned long mask, flags; /* We have to check the result, so store the complete header */ struct ncp_request_header request = @@ -115,17 +114,25 @@ retrans = server->m.retry_count; major_timeout_seen = 0; acknowledge_seen = 0; - old_mask = current->blocked; - current->blocked |= ~(_S(SIGKILL) -#if 0 - | _S(SIGSTOP) -#endif - | ((server->m.flags & NCP_MOUNT_INTR) - ? ((current->sig->action[SIGINT - 1].sa_handler == SIG_DFL - ? _S(SIGINT) : 0) - | (current->sig->action[SIGQUIT - 1].sa_handler == SIG_DFL - ? _S(SIGQUIT) : 0)) - : 0)); + + spin_lock_irqsave(¤t->sigmask_lock, flags); + old_set = current->blocked; + mask = sigmask(SIGKILL) | sigmask(SIGSTOP); + if (server->m.flags & NCP_MOUNT_INTR) { + /* FIXME: This doesn't seem right at all. So, like, + we can't handle SIGINT and get whatever to stop? + What if we've blocked it ourselves? What about + alarms? Why, in fact, are we mucking with the + sigmask at all? -- r~ */ + if (current->sig->action[SIGINT - 1].sa_handler == SIG_DFL) + mask |= sigmask(SIGINT); + if (current->sig->action[SIGQUIT - 1].sa_handler == SIG_DFL) + mask |= sigmask(SIGQUIT); + } + siginitmaskinv(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + fs = get_fs(); set_fs(get_ds()); for (n = 0, timeout = init_timeout;; n++, timeout <<= 1) { @@ -269,7 +276,12 @@ printk(KERN_ERR "NCP: result=%d\n", result); result = -EIO; } + + spin_lock_irqsave(¤t->sigmask_lock, flags); current->blocked = old_mask; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + set_fs(fs); return result; } diff -u --recursive --new-file v2.1.67/linux/fs/nfsd/nfssvc.c linux/fs/nfsd/nfssvc.c --- v2.1.67/linux/fs/nfsd/nfssvc.c Wed Apr 23 19:01:27 1997 +++ linux/fs/nfsd/nfssvc.c Sun Nov 30 12:50:53 1997 @@ -35,9 +35,9 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC #define NFSD_BUFSIZE (1024 + NFSSVC_MAXBLKSIZE) -#define BLOCKABLE_SIGS (~(_S(SIGKILL) | _S(SIGSTOP))) -#define SHUTDOWN_SIGS (_S(SIGKILL)|_S(SIGINT)|_S(SIGTERM)) -#define _S(sig) (1 << ((sig) - 1)) + +#define BLOCKABLE_SIGS (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGTERM)) extern struct svc_program nfsd_program; static void nfsd(struct svc_rqst *rqstp); @@ -96,7 +96,6 @@ nfsd(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; - sigset_t oldsigmask; int oldumask, err; lock_kernel(); @@ -108,7 +107,7 @@ sprintf(current->comm, "nfsd"); oldumask = current->fs->umask; /* Set umask to 0. */ - current->blocked |= ~SHUTDOWN_SIGS; + siginitsetinv(¤t->blocked, SHUTDOWN_SIGS); current->fs->umask = 0; nfssvc_boot = xtime; /* record boot time */ lockd_up(); /* start lockd */ @@ -142,10 +141,17 @@ serv->sv_stats->rpcbadclnt++; } else { /* Process request with all signals blocked. */ - oldsigmask = current->blocked; - current->blocked = BLOCKABLE_SIGS; + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, ~BLOCKABLE_SIGS); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + svc_process(serv, rqstp); - current->blocked = oldsigmask; + + spin_lock_irq(¤t->sigmask_lock); + siginitsetinv(¤t->blocked, SHUTDOWN_SIGS); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); } /* Unlock export hash tables */ @@ -157,8 +163,9 @@ } else { unsigned int signo; - for (signo = 0; signo < 32; signo++) - if (current->signal & current->blocked & (1<signal, signo) && + !sigismember(¤t->signal, signo)) break; printk(KERN_WARNING "nfsd: terminating on signal %d\n", signo); } diff -u --recursive --new-file v2.1.67/linux/fs/proc/array.c linux/fs/proc/array.c --- v2.1.67/linux/fs/proc/array.c Sat Oct 25 02:44:18 1997 +++ linux/fs/proc/array.c Sun Nov 30 10:59:02 1997 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -643,37 +644,58 @@ return buffer; } -static inline char * task_sig(struct task_struct *p, char *buffer) +char * render_sigset_t(sigset_t *set, char *buffer) { - buffer += sprintf(buffer, - "SigPnd:\t%08lx\n" - "SigBlk:\t%08lx\n", - p->signal, p->blocked); + int i = _NSIG, x; + do { + i -= 4, x = 0; + if (sigismember(set, i+1)) x |= 1; + if (sigismember(set, i+2)) x |= 2; + if (sigismember(set, i+3)) x |= 4; + if (sigismember(set, i+4)) x |= 8; + *buffer++ = (x < 10 ? '0' : 'a' - 10) + x; + } while (i >= 4); + *buffer = 0; + return buffer; +} + +static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, + sigset_t *catch) +{ + struct k_sigaction *k; + int i; + + sigemptyset(ign); + sigemptyset(catch); + if (p->sig) { - struct sigaction * action = p->sig->action; - unsigned long sig_ign = 0, sig_caught = 0; - unsigned long bit = 1; - int i; - - for (i = 0; i < 32; i++) { - switch((unsigned long) action->sa_handler) { - case 0: - break; - case 1: - sig_ign |= bit; - break; - default: - sig_caught |= bit; - } - bit <<= 1; - action++; + k = p->sig->action; + for (i = 1; i <= _NSIG; ++i, ++k) { + if (k->sa.sa_handler == SIG_IGN) + sigaddset(ign, i); + else if (k->sa.sa_handler != SIG_DFL) + sigaddset(catch, i); } - - buffer += sprintf(buffer, - "SigIgn:\t%08lx\n" - "SigCgt:\t%08lx\n", - sig_ign, sig_caught); } +} + +static inline char * task_sig(struct task_struct *p, char *buffer) +{ + sigset_t ign, catch; + + buffer += sprintf(buffer, "SigPnd:\t"); + buffer = render_sigset_t(&p->signal, buffer); + buffer += sprintf(buffer, "SigBlk:\t"); + buffer = render_sigset_t(&p->blocked, buffer); + *buffer++ = '\n'; + + collect_sigign_sigcatch(p, &ign, &catch); + buffer += sprintf(buffer, "SigIgn:\t"); + buffer = render_sigset_t(&ign, buffer); + buffer += sprintf(buffer, "SigCat:\t"); + buffer = render_sigset_t(&catch, buffer); + *buffer++ = '\n'; + return buffer; } @@ -694,10 +716,14 @@ static int get_stat(int pid, char * buffer) { struct task_struct *tsk = find_task_by_pid(pid); - unsigned long sigignore=0, sigcatch=0, wchan; - unsigned long vsize, eip, esp; + unsigned long vsize, eip, esp, wchan; long priority, nice; - int i,tty_pgrp; + int tty_pgrp; + sigset_t sigign, sigcatch; + char signal_str[sizeof(sigset_t)*2+1]; + char blocked_str[sizeof(sigset_t)*2+1]; + char sigign_str[sizeof(sigset_t)*2+1]; + char sigcatch_str[sizeof(sigset_t)*2+1]; char state; if (!tsk) @@ -716,22 +742,15 @@ eip = KSTK_EIP(tsk); esp = KSTK_ESP(tsk); } + wchan = get_wchan(tsk); - if (tsk->sig) { - unsigned long bit = 1; - for(i=0; i<32; ++i) { - switch((unsigned long) tsk->sig->action[i].sa_handler) { - case 0: - break; - case 1: - sigignore |= bit; - break; - default: - sigcatch |= bit; - } - bit <<= 1; - } - } + + collect_sigign_sigcatch(tsk, &sigign, &sigcatch); + render_sigset_t(&tsk->signal, signal_str); + render_sigset_t(&tsk->blocked, blocked_str); + render_sigset_t(&sigign, sigign_str); + render_sigset_t(&sigcatch, sigcatch_str); + if (tsk->tty) tty_pgrp = tsk->tty->pgrp; else @@ -746,7 +765,7 @@ return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \ -%lu %lu %lu %lu %lu %lu %lu %lu\n", +%lu %s %s %s %s %lu %lu %lu\n", pid, tsk->comm, state, @@ -777,10 +796,10 @@ tsk->mm ? tsk->mm->start_stack : 0, esp, eip, - tsk->signal, - tsk->blocked, - sigignore, - sigcatch, + signal_str, + blocked_str, + sigign_str, + sigcatch_str, wchan, tsk->nswap, tsk->cnswap); diff -u --recursive --new-file v2.1.67/linux/fs/smbfs/sock.c linux/fs/smbfs/sock.c --- v2.1.67/linux/fs/smbfs/sock.c Wed Nov 26 16:24:03 1997 +++ linux/fs/smbfs/sock.c Sun Nov 30 10:59:03 1997 @@ -26,8 +26,6 @@ #define SMBFS_PARANOIA 1 /* #define SMBFS_DEBUG_VERBOSE 1 */ -#define _S(nr) (1<<((nr)-1)) - static int _recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags) @@ -599,8 +597,8 @@ int smb_request(struct smb_sb_info *server) { - unsigned long old_mask; - unsigned long fs; + unsigned long fs, flags, sigpipe; + sigset_t old_set; int len, result; unsigned char *buffer; @@ -619,8 +617,13 @@ len = smb_len(buffer) + 4; pr_debug("smb_request: len = %d cmd = 0x%X\n", len, buffer[8]); - old_mask = current->blocked; - current->blocked |= ~(_S(SIGKILL) | _S(SIGSTOP)); + spin_lock_irqsave(¤t->sigmask_lock, flags); + sigpipe = sigismember(¤t->signal, SIGPIPE); + old_set = current->blocked; + siginitsetinv(¤t->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + fs = get_fs(); set_fs(get_ds()); @@ -629,9 +632,15 @@ { result = smb_receive(server); } + /* read/write errors are handled by errno */ - current->signal &= ~_S(SIGPIPE); - current->blocked = old_mask; + spin_lock_irqsave(¤t->sigmask_lock, flags); + if (result == -EPIPE && !sigpipe) + sigdelset(¤t->signal, SIGPIPE); + current->blocked = old_set; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + set_fs(fs); if (result >= 0) @@ -758,8 +767,8 @@ int *lrdata, unsigned char **rdata, int *lrparam, unsigned char **rparam) { - unsigned long old_mask; - unsigned long fs; + sigset_t old_set; + unsigned long fs, flags, sigpipe; int result; pr_debug("smb_trans2_request: com=%d, ld=%d, lp=%d\n", @@ -778,8 +787,13 @@ if ((result = smb_dont_catch_keepalive(server)) != 0) goto bad_conn; - old_mask = current->blocked; - current->blocked |= ~(_S(SIGKILL) | _S(SIGSTOP)); + spin_lock_irqsave(¤t->sigmask_lock, flags); + sigpipe = sigismember(¤t->signal, SIGPIPE); + old_set = current->blocked; + siginitsetinv(¤t->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + fs = get_fs(); set_fs(get_ds()); @@ -790,9 +804,15 @@ result = smb_receive_trans2(server, lrdata, rdata, lrparam, rparam); } + /* read/write errors are handled by errno */ - current->signal &= ~_S(SIGPIPE); - current->blocked = old_mask; + spin_lock_irqsave(¤t->sigmask_lock, flags); + if (result == -EPIPE && !sigpipe) + sigdelset(¤t->signal, SIGPIPE); + current->blocked = old_set; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + set_fs(fs); if (result >= 0) diff -u --recursive --new-file v2.1.67/linux/include/asm-alpha/processor.h linux/include/asm-alpha/processor.h --- v2.1.67/linux/include/asm-alpha/processor.h Mon Jun 16 16:35:59 1997 +++ linux/include/asm-alpha/processor.h Sun Nov 30 10:59:03 1997 @@ -44,6 +44,7 @@ /* bit 1..5: IEEE_TRAP_ENABLE bits (see fpu.h) */ /* bit 6..8: UAC bits (see sysinfo.h) */ /* bit 17..21: IEEE_STATUS_MASK bits (see fpu.h) */ + /* bit 63: die_if_kernel recursion lock */ unsigned long flags; /* perform syscall argument validation (get/set_fs) */ unsigned long fs; diff -u --recursive --new-file v2.1.67/linux/include/asm-alpha/sigcontext.h linux/include/asm-alpha/sigcontext.h --- v2.1.67/linux/include/asm-alpha/sigcontext.h Mon Sep 30 07:43:29 1996 +++ linux/include/asm-alpha/sigcontext.h Sun Nov 30 10:59:03 1997 @@ -3,7 +3,7 @@ struct sigcontext { /* - * what should we have here? I'd probably better use the same + * What should we have here? I'd probably better use the same * stack layout as OSF/1, just in case we ever want to try * running their binaries.. * @@ -28,7 +28,15 @@ unsigned long sc_fp_trap_pc; unsigned long sc_fp_trigger_sum; unsigned long sc_fp_trigger_inst; - unsigned long sc_retcode[2]; +}; + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + old_sigset_t uc_osf_sigmask; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ }; #endif diff -u --recursive --new-file v2.1.67/linux/include/asm-alpha/siginfo.h linux/include/asm-alpha/siginfo.h --- v2.1.67/linux/include/asm-alpha/siginfo.h Wed Dec 31 16:00:00 1969 +++ linux/include/asm-alpha/siginfo.h Sun Nov 30 10:59:03 1997 @@ -0,0 +1,195 @@ +#ifndef _ALPHA_SIGINFO_H +#define _ALPHA_SIGINFO_H + +#include + +/* This structure matches OSF/1 for binary compatibility. */ + +typedef union sigval { + int sival_int; + void *sival_ptr; +} sigval_t; + +#define SI_MAX_SIZE 128 +#define SI_PAD_SIZE ((SI_MAX_SIZE/sizeof(int)) - 4) + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + unsigned int _timer1; + unsigned int _timer2; + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void *_addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} siginfo_t; + +/* + * How these fields are to be accessed. + */ +#define si_pid _sifields._kill._pid +#define si_uid _sifields._kill._uid +#define si_status _sifields._sigchld._status +#define si_utime _sifields._sigchld._utime +#define si_stime _sifields._sigchld._stime +#define si_value _sifields._rt._sigval +#define si_int _sifields._rt._sigval.sival_int +#define si_ptr _sifields._rt._sigval.sival_ptr +#define si_addr _sifields._sigfault._addr +#define si_band _sifields._sigpoll._band +#define si_fd _sifields._sigpoll._fd + +/* + * si_code values + * Digital reserves positive values for kernel-generated signals. + */ +#define SI_USER 0 /* sent by kill, sigsend, raise */ +#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ +#define SI_QUEUE -1 /* sent by sigqueue */ +#define SI_TIMER -2 /* sent by timer expiration */ +#define SI_MESGQ -3 /* sent by real time mesq state change */ +#define SI_ASYNCIO -4 /* sent by AIO completion */ + +#define SI_FROMUSER(siptr) ((siptr)->si_code <= 0) +#define SI_FROMKERNEL(siptr) ((siptr)->si_code > 0) + +/* + * SIGILL si_codes + */ +#define ILL_ILLOPC 1 /* illegal opcode */ +#define ILL_ILLOPN 2 /* illegal operand */ +#define ILL_ILLADR 3 /* illegal addressing mode */ +#define ILL_ILLTRP 4 /* illegal trap */ +#define ILL_PRVOPC 5 /* privileged opcode */ +#define ILL_PRVREG 6 /* privileged register */ +#define ILL_COPROC 7 /* coprocessor error */ +#define ILL_BADSTK 8 /* internal stack error */ +#define NSIGILL 8 + +/* + * SIGFPE si_codes + */ +#define FPE_INTDIV 1 /* integer divide by zero */ +#define FPE_INTOVF 2 /* integer overflow */ +#define FPE_FLTDIV 3 /* floating point divide by zero */ +#define FPE_FLTOVF 4 /* floating point overflow */ +#define FPE_FLTUND 5 /* floating point underflow */ +#define FPE_FLTRES 6 /* floating point inexact result */ +#define FPE_FLTINV 7 /* floating point invalid operation */ +#define FPE_FLTSUB 8 /* subscript out of range */ +#define NSIGFPE 8 + +/* + * SIGSEGV si_codes + */ +#define SEGV_MAPERR 1 /* address not mapped to object */ +#define SRGV_ACCERR 2 /* invalid permissions for mapped object */ +#define NSIGSEGV 2 + +/* + * SIGBUS si_codes + */ +#define BUS_ADRALN 1 /* invalid address alignment */ +#define BUS_ADRERR 2 /* non-existant physical address */ +#define BUS_OBJERR 3 /* object specific hardware error */ +#define NSIGBUS 3 + +/* + * SIGTRAP si_codes + */ +#define TRAP_BRKPT 1 /* process breakpoint */ +#define TRAP_TRACE 2 /* process trace trap */ +#define NSIGTRAP + +/* + * SIGCHLD si_codes + */ +#define CLD_EXITED 1 /* child has exited */ +#define CLD_KILLED 2 /* child was killed */ +#define CLD_DUMPED 3 /* child terminated abnormally */ +#define CLD_TRAPPED 4 /* traced child has trapped */ +#define CLD_STOPPED 5 /* child has stopped */ +#define CLD_CONTINUED 6 /* stopped child has continued */ +#define NSIGCHLD + +/* + * SIGPOLL si_codes + */ +#define POLL_IN 1 /* data input available */ +#define POLL_OUT 2 /* output buffers available */ +#define POLL_MSG 3 /* input message available */ +#define POLL_ERR 4 /* i/o error */ +#define POLL_PRI 5 /* high priority input available */ +#define POLL_HUP 6 /* device disconnected */ +#define NSIGPOLL 6 + +/* + * sigevent definitions + * + * It seems likely that SIGEV_THREAD will have to be handled from + * userspace, libpthread transmuting it to SIGEV_SIGNAL, which the + * thread manager then catches and does the appropriate nonsense. + * However, everything is written out here so as to not get lost. + */ +#define SIGEV_SIGNAL 0 /* notify via signal */ +#define SIGEV_NONE 1 /* other notification: meaningless */ +#define SIGEV_THREAD 2 /* deliver via thread creation */ + +#define SIGEV_MAX_SIZE 64 +#define SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 4) + +typedef struct sigevent { + sigval_t sigev_value; + int sigev_signo; + int sigev_notify; + union { + int _pad[SIGEV_PAD_SIZE]; + + struct { + void (*_function)(sigval_t); + void *_attribute; /* really pthread_attr_t */ + } _sigev_thread; + } _sigev_un; +} sigevent_t; + +#define sigev_notify_function _sigev_un._sigev_thread._function +#define sigev_notify_attributes _sigev_un._sigev_thread._attribute + +#endif diff -u --recursive --new-file v2.1.67/linux/include/asm-alpha/signal.h linux/include/asm-alpha/signal.h --- v2.1.67/linux/include/asm-alpha/signal.h Thu Feb 29 21:50:56 1996 +++ linux/include/asm-alpha/signal.h Sun Nov 30 10:59:03 1997 @@ -1,10 +1,23 @@ #ifndef _ASMAXP_SIGNAL_H #define _ASMAXP_SIGNAL_H -typedef unsigned long sigset_t; /* at least 32 bits */ +#include -#define _NSIG 32 -#define NSIG _NSIG +/* Avoid too many header ordering problems. */ +struct siginfo; + +/* Digital Unix defines 64 signals. Most things should be clean enough + to redefine this at will, if care is taken to make libc match. */ + +#define _NSIG 64 +#define _NSIG_BPW 64 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; /* * Linux/AXP has different signal numbers that Linux/i386: I'm trying @@ -46,23 +59,36 @@ #define SIGPWR SIGINFO #define SIGIOT SIGABRT +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX (_NSIG-1) + /* - * sa_flags values: SA_STACK is not currently supported, but will allow the - * usage of signal stacks by using the (now obsolete) sa_restorer field in - * the sigaction structure as a stack pointer. This is now possible due to - * the changes in signal handling. LBT 010493. + * SA_FLAGS values: + * + * SA_ONSTACK is not currently supported, but will allow sigaltstack(2). * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_SHIRQ flag is for shared interrupt support on PCI and EISA. + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. */ -#define SA_NOCLDSTOP 0x00000004 -#define SA_STACK 0x00000001 +#define SA_ONSTACK 0x00000001 #define SA_RESTART 0x00000002 -#define SA_INTERRUPT 0x20000000 -#define SA_NOMASK 0x00000008 -#define SA_ONESHOT 0x00000010 -#define SA_SHIRQ 0x00000020 +#define SA_NOCLDSTOP 0x00000004 +#define SA_NODEFER 0x00000008 +#define SA_RESETHAND 0x00000010 +#define SA_NOCLDWAIT 0x00000020 /* not supported yet */ +#define SA_SIGINFO 0x00000040 + +#define SA_ONESHOT SA_RESETHAND +#define SA_NOMASK SA_NODEFER +#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */ #ifdef __KERNEL__ /* @@ -70,12 +96,13 @@ * irq handling routines. * * SA_INTERRUPT is also used by the irq handling routines. + * SA_SHIRQ is for shared interrupt support on PCI and EISA. */ -#define SA_PROBE SA_ONESHOT -#define SA_SAMPLE_RANDOM SA_RESTART +#define SA_PROBE SA_ONESHOT +#define SA_SAMPLE_RANDOM SA_RESTART +#define SA_SHIRQ 0x40000000 #endif - #define SIG_BLOCK 1 /* for blocking signals */ #define SIG_UNBLOCK 2 /* for unblocking signals */ #define SIG_SETMASK 3 /* for setting the signal mask */ @@ -87,11 +114,28 @@ #define SIG_IGN ((__sighandler_t)1) /* ignore signal */ #define SIG_ERR ((__sighandler_t)-1) /* error return from signal */ +struct osf_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + int sa_flags; +}; + struct sigaction { __sighandler_t sa_handler; - sigset_t sa_mask; - unsigned int sa_flags; + unsigned long sa_flags; + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; + void (*ka_restorer)(void); }; + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; #ifdef __KERNEL__ #include diff -u --recursive --new-file v2.1.67/linux/include/asm-alpha/system.h linux/include/asm-alpha/system.h --- v2.1.67/linux/include/asm-alpha/system.h Tue May 13 22:41:17 1997 +++ linux/include/asm-alpha/system.h Sun Nov 30 10:59:03 1997 @@ -52,6 +52,8 @@ extern unsigned long rdusp(void); extern unsigned long rdmces (void); extern void wrmces (unsigned long); +extern unsigned long whami(void); +extern void wripir(unsigned long); #define halt() __asm__ __volatile__ ("call_pal %0" : : "i" (PAL_halt) : "memory") @@ -119,11 +121,13 @@ #define __cli() setipl(7) #define __sti() setipl(0) #define __save_flags(flags) do { (flags) = getipl(); } while (0) +#define __save_and_cli(flags) do { (flags) = swpipl(7); } while (0) #define __restore_flags(flags) setipl(flags) #define cli() setipl(7) #define sti() setipl(0) #define save_flags(flags) do { (flags) = getipl(); } while (0) +#define save_and_cli(flags) do { (flags) = swpipl(7); } while (0) #define restore_flags(flags) setipl(flags) /* diff -u --recursive --new-file v2.1.67/linux/include/asm-alpha/uaccess.h linux/include/asm-alpha/uaccess.h --- v2.1.67/linux/include/asm-alpha/uaccess.h Sun Sep 7 13:10:43 1997 +++ linux/include/asm-alpha/uaccess.h Sun Nov 30 10:59:03 1997 @@ -412,7 +412,19 @@ return retval; \ }) -extern void __clear_user(void); +extern void __do_clear_user(void); + +#define __clear_user(to,n) \ +({ \ + register void * __cl_to __asm__("$6") = (to); \ + register long __cl_len __asm__("$0") = (n); \ + __asm__ __volatile__( \ + "jsr $28,(%2),__do_clear_user" \ + : "=r"(__cl_len), "=r"(__cl_to) \ + : "r"(__do_clear_user), "0"(__cl_len), "1"(__cl_to) \ + : "$1","$2","$3","$4","$5","$28","memory"); \ + __cl_len; \ +}) #define clear_user(to,n) \ ({ \ @@ -420,14 +432,13 @@ register long __cl_len __asm__("$0") = (n); \ if (__access_ok(((long)__cl_to),__cl_len,__access_mask)) { \ __asm__ __volatile__( \ - "jsr $28,(%2),__clear_user" \ + "jsr $28,(%2),__do_clear_user" \ : "=r"(__cl_len), "=r"(__cl_to) \ - : "r"(__clear_user), "0"(__cl_len), "1"(__cl_to)\ + : "r"(__do_clear_user), "0"(__cl_len), "1"(__cl_to)\ : "$1","$2","$3","$4","$5","$28","memory"); \ } \ __cl_len; \ }) - /* Returns: -EFAULT if exception before terminator, N if the entire buffer filled, else strlen. */ diff -u --recursive --new-file v2.1.67/linux/include/asm-alpha/unistd.h linux/include/asm-alpha/unistd.h --- v2.1.67/linux/include/asm-alpha/unistd.h Sat Oct 25 02:44:18 1997 +++ linux/include/asm-alpha/unistd.h Sun Nov 30 10:59:03 1997 @@ -156,7 +156,7 @@ #define __NR_osf_pid_block 153 /* not implemented */ #define __NR_osf_pid_unblock 154 /* not implemented */ -#define __NR_sigaction 156 +#define __NR_osf_sigaction 156 #define __NR_osf_sigwaitprim 157 /* not implemented */ #define __NR_osf_nfssvc 158 /* not implemented */ #define __NR_osf_getdirentries 159 @@ -288,6 +288,14 @@ #define __NR_prctl 348 #define __NR_pread 349 #define __NR_pwrite 350 +#define __NR_rt_sigreturn 351 +#define __NR_rt_sigaction 352 +#define __NR_rt_sigprocmask 353 +#define __NR_rt_sigpending 354 +#define __NR_rt_sigtimedwait 355 +#define __NR_rt_sigqueueinfo 356 +#define __NR_rt_sigsuspend 357 + #if defined(__LIBRARY__) && defined(__GNUC__) diff -u --recursive --new-file v2.1.67/linux/include/asm-i386/delay.h linux/include/asm-i386/delay.h --- v2.1.67/linux/include/asm-i386/delay.h Sat Nov 29 11:25:12 1997 +++ linux/include/asm-i386/delay.h Sat Nov 29 12:56:42 1997 @@ -12,7 +12,7 @@ extern void __delay(unsigned long loops); #define udelay(n) (__builtin_constant_p(n) ? \ - __const_udelay((n) * 0x10c6) : \ + __const_udelay((n) * 0x10c6ul) : \ __udelay(n)) #endif /* defined(_I386_DELAY_H) */ diff -u --recursive --new-file v2.1.67/linux/include/asm-i386/io.h linux/include/asm-i386/io.h --- v2.1.67/linux/include/asm-i386/io.h Wed Nov 12 13:34:27 1997 +++ linux/include/asm-i386/io.h Sun Nov 30 14:10:42 1997 @@ -36,9 +36,9 @@ #endif #ifdef REALLY_SLOW_IO -#define SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO #else -#define SLOW_DOWN_IO __SLOW_DOWN_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO #endif /* @@ -52,7 +52,7 @@ #define __OUT(s,s1,x) \ __OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ #define __IN1(s) \ extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; @@ -62,7 +62,7 @@ #define __IN(s,s1,i...) \ __IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ #define __INS(s) \ extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \ diff -u --recursive --new-file v2.1.67/linux/include/asm-i386/sigcontext.h linux/include/asm-i386/sigcontext.h --- v2.1.67/linux/include/asm-i386/sigcontext.h Mon Sep 30 07:43:21 1996 +++ linux/include/asm-i386/sigcontext.h Sun Nov 30 10:59:03 1997 @@ -51,4 +51,12 @@ unsigned long cr2; }; +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + #endif diff -u --recursive --new-file v2.1.67/linux/include/asm-i386/siginfo.h linux/include/asm-i386/siginfo.h --- v2.1.67/linux/include/asm-i386/siginfo.h Wed Dec 31 16:00:00 1969 +++ linux/include/asm-i386/siginfo.h Sun Nov 30 10:59:03 1997 @@ -0,0 +1,195 @@ +#ifndef _I386_SIGINFO_H +#define _I386_SIGINFO_H + +#include + +/* XXX: This structure was copied from the Alpha; is there an iBCS version? */ + +typedef union sigval { + int sival_int; + void *sival_ptr; +} sigval_t; + +#define SI_MAX_SIZE 128 +#define SI_PAD_SIZE ((SI_MAX_SIZE/sizeof(int)) - 3) + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + unsigned int _timer1; + unsigned int _timer2; + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void *_addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} siginfo_t; + +/* + * How these fields are to be accessed. + */ +#define si_pid _sifields._kill._pid +#define si_uid _sifields._kill._uid +#define si_status _sifields._sigchld._status +#define si_utime _sifields._sigchld._utime +#define si_stime _sifields._sigchld._stime +#define si_value _sifields._rt._sigval +#define si_int _sifields._rt._sigval.sival_int +#define si_ptr _sifields._rt._sigval.sival_ptr +#define si_addr _sifields._sigfault._addr +#define si_band _sifields._sigpoll._band +#define si_fd _sifields._sigpoll._fd + +/* + * si_code values + * Digital reserves positive values for kernel-generated signals. + */ +#define SI_USER 0 /* sent by kill, sigsend, raise */ +#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ +#define SI_QUEUE -1 /* sent by sigqueue */ +#define SI_TIMER -2 /* sent by timer expiration */ +#define SI_MESGQ -3 /* sent by real time mesq state change */ +#define SI_ASYNCIO -4 /* sent by AIO completion */ + +#define SI_FROMUSER(siptr) ((siptr)->si_code <= 0) +#define SI_FROMKERNEL(siptr) ((siptr)->si_code > 0) + +/* + * SIGILL si_codes + */ +#define ILL_ILLOPC 1 /* illegal opcode */ +#define ILL_ILLOPN 2 /* illegal operand */ +#define ILL_ILLADR 3 /* illegal addressing mode */ +#define ILL_ILLTRP 4 /* illegal trap */ +#define ILL_PRVOPC 5 /* privileged opcode */ +#define ILL_PRVREG 6 /* privileged register */ +#define ILL_COPROC 7 /* coprocessor error */ +#define ILL_BADSTK 8 /* internal stack error */ +#define NSIGILL 8 + +/* + * SIGFPE si_codes + */ +#define FPE_INTDIV 1 /* integer divide by zero */ +#define FPE_INTOVF 2 /* integer overflow */ +#define FPE_FLTDIV 3 /* floating point divide by zero */ +#define FPE_FLTOVF 4 /* floating point overflow */ +#define FPE_FLTUND 5 /* floating point underflow */ +#define FPE_FLTRES 6 /* floating point inexact result */ +#define FPE_FLTINV 7 /* floating point invalid operation */ +#define FPE_FLTSUB 8 /* subscript out of range */ +#define NSIGFPE 8 + +/* + * SIGSEGV si_codes + */ +#define SEGV_MAPERR 1 /* address not mapped to object */ +#define SRGV_ACCERR 2 /* invalid permissions for mapped object */ +#define NSIGSEGV 2 + +/* + * SIGBUS si_codes + */ +#define BUS_ADRALN 1 /* invalid address alignment */ +#define BUS_ADRERR 2 /* non-existant physical address */ +#define BUS_OBJERR 3 /* object specific hardware error */ +#define NSIGBUS 3 + +/* + * SIGTRAP si_codes + */ +#define TRAP_BRKPT 1 /* process breakpoint */ +#define TRAP_TRACE 2 /* process trace trap */ +#define NSIGTRAP + +/* + * SIGCHLD si_codes + */ +#define CLD_EXITED 1 /* child has exited */ +#define CLD_KILLED 2 /* child was killed */ +#define CLD_DUMPED 3 /* child terminated abnormally */ +#define CLD_TRAPPED 4 /* traced child has trapped */ +#define CLD_STOPPED 5 /* child has stopped */ +#define CLD_CONTINUED 6 /* stopped child has continued */ +#define NSIGCHLD + +/* + * SIGPOLL si_codes + */ +#define POLL_IN 1 /* data input available */ +#define POLL_OUT 2 /* output buffers available */ +#define POLL_MSG 3 /* input message available */ +#define POLL_ERR 4 /* i/o error */ +#define POLL_PRI 5 /* high priority input available */ +#define POLL_HUP 6 /* device disconnected */ +#define NSIGPOLL 6 + +/* + * sigevent definitions + * + * It seems likely that SIGEV_THREAD will have to be handled from + * userspace, libpthread transmuting it to SIGEV_SIGNAL, which the + * thread manager then catches and does the appropriate nonsense. + * However, everything is written out here so as to not get lost. + */ +#define SIGEV_SIGNAL 0 /* notify via signal */ +#define SIGEV_NONE 1 /* other notification: meaningless */ +#define SIGEV_THREAD 2 /* deliver via thread creation */ + +#define SIGEV_MAX_SIZE 64 +#define SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3) + +typedef struct sigevent { + sigval_t sigev_value; + int sigev_signo; + int sigev_notify; + union { + int _pad[SIGEV_PAD_SIZE]; + + struct { + void (*_function)(sigval_t); + void *_attribute; /* really pthread_attr_t */ + } _sigev_thread; + } _sigev_un; +} sigevent_t; + +#define sigev_notify_function _sigev_un._sigev_thread._function +#define sigev_notify_attributes _sigev_un._sigev_thread._attribute + +#endif diff -u --recursive --new-file v2.1.67/linux/include/asm-i386/signal.h linux/include/asm-i386/signal.h --- v2.1.67/linux/include/asm-i386/signal.h Mon Sep 30 07:47:39 1996 +++ linux/include/asm-i386/signal.h Sun Nov 30 10:59:03 1997 @@ -1,10 +1,23 @@ #ifndef _ASMi386_SIGNAL_H #define _ASMi386_SIGNAL_H -typedef unsigned long sigset_t; /* at least 32 bits */ +#include -#define _NSIG 32 -#define NSIG _NSIG +/* Avoid too many header ordering problems. */ +struct siginfo; + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +#define _NSIG 64 +#define _NSIG_BPW 32 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; #define SIGHUP 1 #define SIGINT 2 @@ -43,22 +56,37 @@ #define SIGPWR 30 #define SIGUNUSED 31 +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX (_NSIG-1) + /* - * sa_flags values: SA_STACK is not currently supported, but will allow the - * usage of signal stacks by using the (now obsolete) sa_restorer field in - * the sigaction structure as a stack pointer. This is now possible due to - * the changes in signal handling. LBT 010493. + * SA_FLAGS values: + * + * SA_ONSTACK is not currently supported, but will allow sigaltstack(2). * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_SHIRQ flag is for shared interrupt support on PCI and EISA. + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. */ -#define SA_NOCLDSTOP 1 -#define SA_SHIRQ 0x04000000 -#define SA_STACK 0x08000000 +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 #define SA_RESTART 0x10000000 -#define SA_INTERRUPT 0x20000000 -#define SA_NOMASK 0x40000000 -#define SA_ONESHOT 0x80000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND +#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */ + +#define SA_RESTORER 0x04000000 #ifdef __KERNEL__ /* @@ -66,12 +94,13 @@ * irq handling routines. * * SA_INTERRUPT is also used by the irq handling routines. + * SA_SHIRQ is for shared interrupt support on PCI and EISA. */ -#define SA_PROBE SA_ONESHOT -#define SA_SAMPLE_RANDOM SA_RESTART +#define SA_PROBE SA_ONESHOT +#define SA_SAMPLE_RANDOM SA_RESTART +#define SA_SHIRQ 0x04000000 #endif - #define SIG_BLOCK 0 /* for blocking signals */ #define SIG_UNBLOCK 1 /* for unblocking signals */ #define SIG_SETMASK 2 /* for setting the signal mask */ @@ -83,15 +112,71 @@ #define SIG_IGN ((__sighandler_t)1) /* ignore signal */ #define SIG_ERR ((__sighandler_t)-1) /* error return from signal */ +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + struct sigaction { __sighandler_t sa_handler; - sigset_t sa_mask; unsigned long sa_flags; void (*sa_restorer)(void); + sigset_t sa_mask; /* mask last for extensibility */ }; +struct k_sigaction { + struct sigaction sa; +}; + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + #ifdef __KERNEL__ #include #endif + +#define __HAVE_ARCH_SIG_BITOPS + +extern __inline__ void sigaddset(sigset_t *set, int _sig) +{ + __asm__("btsl %1,%0" : "=m"(*set) : "ir"(_sig - 1) : "cc"); +} + +extern __inline__ void sigdelset(sigset_t *set, int _sig) +{ + __asm__("btrl %1,%0" : "=m"(*set) : "ir"(_sig - 1) : "cc"); +} + +extern __inline__ int __const_sigismember(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); +} + +extern __inline__ int __gen_sigismember(sigset_t *set, int _sig) +{ + int ret; + __asm__("btl %2,%1\n\tsbbl %0,%0" + : "=r"(ret) : "m"(*set), "ir"(_sig-1) : "cc"); + return ret; +} + +#define sigismember(set,sig) \ + (__builtin_constant_p(sig) ? \ + __const_sigismember((set),(sig)) : \ + __gen_sigismember((set),(sig))) + +#define sigmask(sig) (1UL << ((sig) - 1)) + +extern __inline__ int sigfindinword(unsigned long word) +{ + __asm__("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc"); + return word; +} #endif diff -u --recursive --new-file v2.1.67/linux/include/asm-i386/unistd.h linux/include/asm-i386/unistd.h --- v2.1.67/linux/include/asm-i386/unistd.h Sat Nov 29 11:25:12 1997 +++ linux/include/asm-i386/unistd.h Sun Nov 30 10:59:03 1997 @@ -178,6 +178,13 @@ #define __NR_setresgid 170 #define __NR_getresgid 171 #define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 /* user-visible error numbers are in the range -1 - -122: see */ diff -u --recursive --new-file v2.1.67/linux/include/linux/acct.h linux/include/linux/acct.h --- v2.1.67/linux/include/linux/acct.h Sun Mar 10 23:39:34 1996 +++ linux/include/linux/acct.h Sun Nov 30 15:00:46 1997 @@ -1,29 +1,86 @@ -#ifndef __LINUX_ACCT_H -#define __LINUX_ACCT_H +/* + * BSD Process Accounting for Linux - Definitions + * + * Author: Marco van Wieringen (mvw@planets.elm.net) + * + * This header file contains the definitions needed to implement + * BSD-style process accounting. The kernel accounting code and all + * user-level programs that try to do something useful with the + * process accounting log must include this file. + * + * Copyright (C) 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. + * + */ -#define ACCT_COMM 16 +#ifndef _LINUX_ACCT_H +#define _LINUX_ACCT_H + +#include + +/* + * comp_t is a 16-bit "floating" point number with a 3-bit base 8 + * exponent and a 13-bit fraction. See linux/kernel/acct.c for the + * specific encoding system used. + */ + +typedef __u16 comp_t; + +/* + * accounting file record + * + * This structure contains all of the information written out to the + * process accounting file whenever a process exits. + */ + +#define ACCT_COMM 16 struct acct { - char ac_comm[ACCT_COMM]; /* Accounting command name */ - time_t ac_utime; /* Accounting user time */ - time_t ac_stime; /* Accounting system time */ - time_t ac_etime; /* Accounting elapsed time */ - time_t ac_btime; /* Beginning time */ - uid_t ac_uid; /* Accounting user ID */ - gid_t ac_gid; /* Accounting group ID */ - dev_t ac_tty; /* controlling tty */ - char ac_flag; /* Accounting flag */ - long ac_minflt; /* Accounting minor pagefaults */ - long ac_majflt; /* Accounting major pagefaults */ - long ac_exitcode; /* Accounting process exitcode */ + char ac_flag; /* Accounting Flags */ +/* + * No binary format break with 2.0 - but when we hit 32bit uid we'll + * have to bite one + */ + __u16 ac_uid; /* Accounting Real User ID */ + __u16 ac_gid; /* Accounting Real Group ID */ + __u16 ac_tty; /* Accounting Control Terminal */ + __u32 ac_btime; /* Accounting Process Creation Time */ + comp_t ac_utime; /* Accounting User Time */ + comp_t ac_stime; /* Accounting System Time */ + comp_t ac_etime; /* Accounting Elapsed Time */ + comp_t ac_mem; /* Accounting Average Memory Usage */ + comp_t ac_io; /* Accounting Chars Transferred */ + comp_t ac_rw; /* Accounting Blocks Read or Written */ + comp_t ac_minflt; /* Accounting Minor Pagefaults */ + comp_t ac_majflt; /* Accounting Major Pagefaults */ + comp_t ac_swaps; /* Accounting Number of Swaps */ + __u32 ac_exitcode; /* Accounting Exitcode */ + char ac_comm[ACCT_COMM + 1]; /* Accounting Command Name */ + char ac_pad[10]; /* Accounting Padding Bytes */ }; -#define AFORK 0001 /* has executed fork, but no exec */ -#define ASU 0002 /* used super-user privileges */ -#define ACORE 0004 /* dumped core */ -#define AXSIG 0010 /* killed by a signal */ +/* + * accounting flags + */ + /* bit set when the process ... */ +#define AFORK 0x01 /* ... executed fork, but did not exec */ +#define ASU 0x02 /* ... used super-user privileges */ +#define ACOMPAT 0x04 /* ... used compatibility mode (VAX only not used) */ +#define ACORE 0x08 /* ... dumped core */ +#define AXSIG 0x10 /* ... was killed by a signal */ -#define AHZ 100 +#define AHZ 100 +#ifdef __KERNEL__ + +#include + +#ifdef CONFIG_BSD_PROCESS_ACCT +extern int acct_process(long exitcode); +#else +#define acct_process(x) do { } while (0) #endif + +#endif /* __KERNEL */ + +#endif /* _LINUX_ACCT_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/baycom.h linux/include/linux/baycom.h --- v2.1.67/linux/include/linux/baycom.h Fri Dec 20 03:17:18 1996 +++ linux/include/linux/baycom.h Sun Nov 30 10:30:19 1997 @@ -1,7 +1,7 @@ /* * The Linux BAYCOM driver for the Baycom serial 1200 baud modem * and the parallel 9600 baud modem - * (C) 1996 by Thomas Sailer, HB9JNX + * (C) 1997 by Thomas Sailer, HB9JNX/AE4WA */ #ifndef _BAYCOM_H diff -u --recursive --new-file v2.1.67/linux/include/linux/etherdevice.h linux/include/linux/etherdevice.h --- v2.1.67/linux/include/linux/etherdevice.h Mon Jul 7 16:02:45 1997 +++ linux/include/linux/etherdevice.h Sun Nov 30 14:11:18 1997 @@ -38,6 +38,8 @@ extern int eth_header_cache(struct dst_entry *dst, struct neighbour *neigh, struct hh_cache *hh); +extern int eth_header_parse(struct sk_buff *skb, + unsigned char *haddr); extern struct device * init_etherdev(struct device *, int); #ifdef CONFIG_IP_ROUTER diff -u --recursive --new-file v2.1.67/linux/include/linux/hdreg.h linux/include/linux/hdreg.h --- v2.1.67/linux/include/linux/hdreg.h Mon Jul 7 16:02:01 1997 +++ linux/include/linux/hdreg.h Sun Nov 30 14:10:40 1997 @@ -73,7 +73,8 @@ #define ABRT_ERR 0x04 /* Command aborted */ #define ID_ERR 0x10 /* ID field not found */ #define ECC_ERR 0x40 /* Uncorrectable ECC error */ -#define BBD_ERR 0x80 /* block marked bad */ +#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ +#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ struct hd_geometry { unsigned char heads; @@ -149,11 +150,28 @@ unsigned short eide_dma_time; /* recommended mword dma cycle time (ns) */ unsigned short eide_pio; /* min cycle time (ns), no IORDY */ unsigned short eide_pio_iordy; /* min cycle time (ns), with IORDY */ - unsigned short reserved69; /* reserved (word 69) */ - unsigned short reserved70; /* reserved (word 70) */ - /* unsigned short reservedxx[57];*/ /* reserved (words 71-127) */ - /* unsigned short vendor7 [32];*/ /* vendor unique (words 128-159) */ - /* unsigned short reservedyy[96];*/ /* reserved (words 160-255) */ + unsigned short word69; + unsigned short word70; + /* HDIO_GET_IDENTITY currently returns only words 0 through 70 */ + unsigned short word71; + unsigned short word72; + unsigned short word73; + unsigned short word74; + unsigned short word75; + unsigned short word76; + unsigned short word77; + unsigned short word78; + unsigned short word79; + unsigned short word80; + unsigned short word81; + unsigned short word82; + unsigned short word83; + unsigned short word84; + unsigned short word85; + unsigned short word86; + unsigned short word87; + unsigned short dma_ultra; + unsigned short reserved[167]; }; /* diff -u --recursive --new-file v2.1.67/linux/include/linux/hfmodem.h linux/include/linux/hfmodem.h --- v2.1.67/linux/include/linux/hfmodem.h Mon Aug 11 14:47:05 1997 +++ linux/include/linux/hfmodem.h Sun Nov 30 10:30:19 1997 @@ -100,6 +100,7 @@ /* --------------------------------------------------------------------- */ #ifdef __KERNEL__ +#include #define DMA_MODE_AUTOINIT 0x10 @@ -133,6 +134,7 @@ unsigned int pariobase; unsigned int midiiobase; unsigned int flags; + struct pardevice *pardev; } ptt_out; struct { diff -u --recursive --new-file v2.1.67/linux/include/linux/if_tunnel.h linux/include/linux/if_tunnel.h --- v2.1.67/linux/include/linux/if_tunnel.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/if_tunnel.h Sun Nov 30 14:00:38 1997 @@ -0,0 +1,29 @@ +#ifndef _IF_TUNNEL_H_ +#define _IF_TUNNEL_H_ + +#define SIOCGETTUNNEL (SIOCDEVPRIVATE + 0) +#define SIOCADDTUNNEL (SIOCDEVPRIVATE + 1) +#define SIOCDELTUNNEL (SIOCDEVPRIVATE + 2) +#define SIOCCHGTUNNEL (SIOCDEVPRIVATE + 3) + +#define GRE_CSUM __constant_htons(0x8000) +#define GRE_ROUTING __constant_htons(0x4000) +#define GRE_KEY __constant_htons(0x2000) +#define GRE_SEQ __constant_htons(0x1000) +#define GRE_STRICT __constant_htons(0x0800) +#define GRE_REC __constant_htons(0x0700) +#define GRE_FLAGS __constant_htons(0x00F8) +#define GRE_VERSION __constant_htons(0x0007) + +struct ip_tunnel_parm +{ + char name[IFNAMSIZ]; + int link; + __u16 i_flags; + __u16 o_flags; + __u32 i_key; + __u32 o_key; + struct iphdr iph; +}; + +#endif /* _IF_TUNNEL_H_ */ diff -u --recursive --new-file v2.1.67/linux/include/linux/igmp.h linux/include/linux/igmp.h --- v2.1.67/linux/include/linux/igmp.h Thu Dec 12 06:54:20 1996 +++ linux/include/linux/igmp.h Sun Nov 30 14:00:38 1997 @@ -38,7 +38,7 @@ #define IGMP_PIM 0x14 /* PIM routing */ #define IGMP_TRACE 0x15 #define IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 /* New version of 0x11 */ -#define IGMP_HOST_LEAVE_MESSAGE 0x17 /* An extra BSD seems to send */ +#define IGMP_HOST_LEAVE_MESSAGE 0x17 #define IGMP_MTRACE_RESP 0x1e #define IGMP_MTRACE 0x1f @@ -54,9 +54,6 @@ #define IGMP_SLEEPING_MEMBER 0x04 #define IGMP_AWAKENING_MEMBER 0x05 -#define IGMP_OLD_ROUTER 0x00 -#define IGMP_NEW_ROUTER 0x01 - #define IGMP_MINLEN 8 #define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ @@ -65,7 +62,7 @@ #define IGMP_TIMER_SCALE 10 /* denotes that the igmphdr->timer field */ /* specifies time in 10th of seconds */ -#define IGMP_AGE_THRESHOLD 540 /* If this host don't hear any IGMP V1 */ +#define IGMP_AGE_THRESHOLD 400 /* If this host don't hear any IGMP V1 */ /* message in this period of time, */ /* revert to IGMP v2 router. */ @@ -79,40 +76,53 @@ */ #ifdef __KERNEL__ + +/* ip_mc_socklist is real list now. Speed is not argument; + this list never used in fast path code + */ + struct ip_mc_socklist { - unsigned long multiaddr[IP_MAX_MEMBERSHIPS]; /* This is a speed trade off */ - struct device *multidev[IP_MAX_MEMBERSHIPS]; + struct ip_mc_socklist *next; + int count; + struct ip_mreqn multi; }; struct ip_mc_list { - struct device *interface; - unsigned long multiaddr; - struct ip_mc_list *next; - struct timer_list timer; - int users; - char tm_running; - char reporter; + struct in_device *interface; + unsigned long multiaddr; + struct ip_mc_list *next; + struct timer_list timer; + int users; + char tm_running; + char reporter; + char unsolicit_count; }; -struct ip_router_info +extern __inline__ int ip_check_mc(struct device *dev, u32 mc_addr) { - struct device *dev; - int type; /* type of router which is querier on this interface */ - int time; /* # of slow timeouts since last old query */ - struct timer_list timer; - struct ip_router_info *next; -}; - -extern struct ip_mc_list *ip_mc_head; + struct in_device *in_dev = dev->ip_ptr; + struct ip_mc_list *im; + if (in_dev) { + for (im=in_dev->mc_list; im; im=im->next) + if (im->multiaddr == mc_addr) + return 1; + } + return 0; +} extern int igmp_rcv(struct sk_buff *, unsigned short); -extern void ip_mc_drop_device(struct device *dev); -extern int ip_mc_join_group(struct sock *sk, struct device *dev, unsigned long addr); -extern int ip_mc_leave_group(struct sock *sk, struct device *dev,unsigned long addr); +extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); +extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern void ip_mr_init(void); +extern void ip_mc_init_dev(struct in_device *); +extern void ip_mc_destroy_dev(struct in_device *); +extern void ip_mc_up(struct in_device *); +extern void ip_mc_down(struct in_device *); +extern int ip_mc_dec_group(struct in_device *in_dev, u32 addr); +extern void ip_mc_inc_group(struct in_device *in_dev, u32 addr); #endif #endif diff -u --recursive --new-file v2.1.67/linux/include/linux/in.h linux/include/linux/in.h --- v2.1.67/linux/include/linux/in.h Mon Jul 7 16:02:01 1997 +++ linux/include/linux/in.h Sun Nov 30 14:10:40 1997 @@ -31,9 +31,13 @@ IPPROTO_PUP = 12, /* PUP protocol */ IPPROTO_UDP = 17, /* User Datagram Protocol */ IPPROTO_IDP = 22, /* XNS IDP protocol */ + IPPROTO_RSVP = 46, /* RSVP protocol */ + IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */ + IPPROTO_PIM = 103, /* Protocol Independent Multicast */ + IPPROTO_RAW = 255, /* Raw IP packets */ IPPROTO_MAX }; @@ -48,15 +52,15 @@ #define IP_TTL 2 #define IP_HDRINCL 3 #define IP_OPTIONS 4 -#define IP_LOCALADDR 5 /* Cannot remove; a lot of apps still use it. ANK */ +#define IP_ROUTER_ALERT 5 #define IP_RECVOPTS 6 #define IP_RETOPTS 7 -#define IP_RXINFO 8 -#define IP_TXINFO IP_RXINFO -/* Gated uses it. Remove later or preserve for 4.4BSD compatibility??? */ -#define IP_RECVDSTADDR 9 +#define IP_PKTINFO 8 +#define IP_PKTOPTIONS 9 #define IP_PMTUDISC 10 #define IP_RECVERR 11 +#define IP_RECVTTL 12 +#define IP_RECVTOS 13 /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS @@ -71,9 +75,6 @@ #define IP_MULTICAST_LOOP 34 #define IP_ADD_MEMBERSHIP 35 #define IP_DROP_MEMBERSHIP 36 -#define IP_MULTICAST_IFN 37 -#define IP_ADD_MEMBERSHIPN 38 -#define IP_DROP_MEMBERSHIPN 39 /* These need to appear somewhere around here */ #define IP_DEFAULT_MULTICAST_TTL 1 diff -u --recursive --new-file v2.1.67/linux/include/linux/in_route.h linux/include/linux/in_route.h --- v2.1.67/linux/include/linux/in_route.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/in_route.h Sun Nov 30 14:00:38 1997 @@ -0,0 +1,31 @@ +#ifndef _LINUX_IN_ROUTE_H +#define _LINUX_IN_ROUTE_H + +/* IPv4 routing cache flags */ + +#define RTCF_DEAD RTNH_F_DEAD +#define RTCF_ONLINK RTNH_F_ONLINK + +#define RTCF_NOPMTUDISC RTM_F_NOPMTUDISC + +#define RTCF_NOTIFY 0x00010000 +#define RTCF_DIRECTDST 0x00020000 +#define RTCF_REDIRECTED 0x00040000 + +#define RTCF_VALVE 0x00200000 +#define RTCF_MASQ 0x00400000 +#define RTCF_SNAT 0x00800000 +#define RTCF_DOREDIRECT 0x01000000 +#define RTCF_LOG 0x02000000 +#define RTCF_DIRECTSRC 0x04000000 +#define RTCF_DNAT 0x08000000 +#define RTCF_BROADCAST 0x10000000 +#define RTCF_MULTICAST 0x20000000 +#define RTCF_REJECT 0x40000000 +#define RTCF_LOCAL 0x80000000 + +#define RTCF_NAT (RTCF_DNAT|RTCF_SNAT) + +#define RT_TOS(tos) ((tos)&IPTOS_TOS_MASK) + +#endif /* _LINUX_IN_ROUTE_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/inetdevice.h linux/include/linux/inetdevice.h --- v2.1.67/linux/include/linux/inetdevice.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/inetdevice.h Sun Nov 30 14:00:38 1997 @@ -0,0 +1,118 @@ +#ifndef _LINUX_INETDEVICE_H +#define _LINUX_INETDEVICE_H + +/* IPv4 specific flags. They are initialized from global sysctl variables, + when IPv4 is initialized. + */ + +#define IFF_IP_FORWARD 1 +#define IFF_IP_PROXYARP 2 +#define IFF_IP_RXREDIRECTS 4 +#define IFF_IP_TXREDIRECTS 8 +#define IFF_IP_SHAREDMEDIA 0x10 +#define IFF_IP_MFORWARD 0x20 +#define IFF_IP_RPFILTER 0x40 + +#ifdef __KERNEL__ + +struct in_device +{ + struct device *dev; + struct in_ifaddr *ifa_list; /* IP ifaddr chain */ + struct ip_mc_list *mc_list; /* IP multicast filter chain */ + unsigned long mr_v1_seen; + unsigned flags; +}; + + +#define IN_DEV_RPFILTER(in_dev) (ipv4_config.rfc1812_filter && ((in_dev)->flags&IFF_IP_RPFILTER)) +#define IN_DEV_MFORWARD(in_dev) (ipv4_config.multicast_route && ((in_dev)->flags&IFF_IP_MFORWARD)) +#define IN_DEV_PROXY_ARP(in_dev) ((in_dev)->flags&IFF_IP_PROXYARP) + +#if 1 +#define IN_DEV_FORWARD(in_dev) (IS_ROUTER) +#define IN_DEV_RX_REDIRECTS(in_dev) (ipv4_config.accept_redirects) +#define IN_DEV_TX_REDIRECTS(in_dev) (1) +#define IN_DEV_SHARED_MEDIA(in_dev) (ipv4_config.rfc1620_redirects) +#else +#define IN_DEV_FORWARD(in_dev) (ipv4_config.ip_forwarding==1 && ((in_dev)->flags&IFF_IP_FORWARD)) +#define IN_DEV_RX_REDIRECTS(in_dev) ((in_dev)->flags&IFF_IP_RXREDIRECTS) +#define IN_DEV_TX_REDIRECTS(in_dev) ((in_dev)->flags&IFF_IP_TXREDIRECTS) +#define IN_DEV_SHARED_MEDIA(in_dev) ((in_dev)->flags&IFF_IP_SHAREDMEDIA) +#endif + +struct in_ifaddr +{ + struct in_ifaddr *ifa_next; + struct in_device *ifa_dev; + u32 ifa_local; + u32 ifa_address; + u32 ifa_mask; + u32 ifa_broadcast; + u32 ifa_anycast; + unsigned char ifa_scope; + unsigned char ifa_flags; + unsigned char ifa_prefixlen; + char ifa_label[IFNAMSIZ]; +}; + +extern int register_inetaddr_notifier(struct notifier_block *nb); +extern int unregister_inetaddr_notifier(struct notifier_block *nb); + +extern struct device *ip_dev_find(u32 addr); +extern struct in_ifaddr *inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b); +extern int devinet_ioctl(unsigned int cmd, void *); +extern void devinet_init(void); +extern struct in_device *inetdev_init(struct device *dev); +extern struct in_device *inetdev_by_index(int); +extern u32 inet_select_addr(struct device *dev, u32 dst, int scope); +extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask); +extern int inet_add_bootp_addr(struct device *dev); +extern void inet_del_bootp_addr(struct device *dev); + +extern __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa) +{ + return !((addr^ifa->ifa_address)&ifa->ifa_mask); +} + +/* + * Check if a mask is acceptable. + */ + +extern __inline__ int bad_mask(u32 mask, u32 addr) +{ + if (addr & (mask = ~mask)) + return 1; + mask = ntohl(mask); + if (mask & (mask+1)) + return 1; + return 0; +} + +#define for_primary_ifa(in_dev) { struct in_ifaddr *ifa; \ + for (ifa = (in_dev)->ifa_list; ifa && !(ifa->ifa_flags&IFA_F_SECONDARY); ifa = ifa->ifa_next) + +#define for_ifa(in_dev) { struct in_ifaddr *ifa; \ + for (ifa = (in_dev)->ifa_list; ifa; ifa = ifa->ifa_next) + + +#define endfor_ifa(in_dev) } + +#endif /* __KERNEL__ */ + +extern __inline__ __u32 inet_make_mask(int logmask) +{ + if (logmask) + return htonl(~((1<<(32-logmask))-1)); + return 0; +} + +extern __inline__ int inet_mask_len(__u32 mask) +{ + if (!(mask = ntohl(mask))) + return 0; + return 32 - ffz(~mask); +} + + +#endif /* _LINUX_INETDEVICE_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/kernel.h linux/include/linux/kernel.h --- v2.1.67/linux/include/linux/kernel.h Mon Oct 20 10:36:53 1997 +++ linux/include/linux/kernel.h Sun Nov 30 10:59:03 1997 @@ -50,10 +50,6 @@ extern int session_of_pgrp(int pgrp); -extern int kill_proc(int pid, int sig, int priv); -extern int kill_pg(int pgrp, int sig, int priv); -extern int kill_sl(int sess, int sig, int priv); - asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))); diff -u --recursive --new-file v2.1.67/linux/include/linux/mroute.h linux/include/linux/mroute.h --- v2.1.67/linux/include/linux/mroute.h Mon Jul 7 16:04:18 1997 +++ linux/include/linux/mroute.h Sun Nov 30 14:12:48 1997 @@ -10,6 +10,9 @@ * * See the mrouted code for the original history. * + * Protocol Independent Multicast (PIM) data structures included + * Carlos Picoto (cap@di.fc.ul.pt) + * */ #define MRT_BASE 200 @@ -57,16 +60,9 @@ struct in_addr vifc_rmt_addr; /* IPIP tunnel addr */ }; -#define VIFF_TUNNEL 0x1 /* IPIP tunnel */ -#define VIFF_SRCRT 0x2 /* NI */ - - -/* PIM Vif Flags */ -#define VIFF_DR 0x0010 /* designated router */ -#define VIFF_NOMRT 0x0020 /* no neighbor on vif */ -#define VIFF_DOWN 0x0040 /* interface is down */ -#define VIFF_DISABLED 0x0080 /* disabled interafce */ -#define VIFF_REGISTER 0x00A0 /* MIssing cap@di.fc.ul.pt */ +#define VIFF_TUNNEL 0x1 /* IPIP tunnel */ +#define VIFF_SRCRT 0x2 /* NI */ +#define VIFF_REGISTER 0x4 /* register vif */ /* * Cache manipulation structures for mrouted and PIMd @@ -111,23 +107,13 @@ }; /* - * To get RPF from unicast routing table (PIM: cap@di.fc.ul.pt) - */ -struct sioc_rpf_req -{ - unsigned long source; /* Source address */ - unsigned long rpfneighbor; /* RPF */ - vifi_t iif; /* Incoming Interface */ -}; - -/* * This is the format the mroute daemon expects to see IGMP control * data. Magically happens to be like an IP packet as per the original */ struct igmpmsg { - unsigned long unused1,unused2; + __u32 unused1,unused2; unsigned char im_msgtype; /* What is this */ unsigned char im_mbz; /* Must be zero */ unsigned char im_vif; /* Interface (this ought to be a vifi_t!) */ @@ -147,22 +133,19 @@ extern void mroute_close(struct sock *sk); extern void ipmr_forward(struct sk_buff *skb, int is_frag); extern int ip_mr_find_tunnel(__u32, __u32); +extern void ip_mr_init(void); struct vif_device { - union - { - struct device *dev; /* Device we are using */ - struct rtable *rt; /* Route for tunnel */ - } u; + struct device *dev; /* Device we are using */ unsigned long bytes_in,bytes_out; unsigned long pkt_in,pkt_out; /* Statistics */ unsigned long rate_limit; /* Traffic shaping (NI) */ unsigned char threshold; /* TTL threshold */ unsigned short flags; /* Control flags */ - unsigned long local,remote; /* Addresses(remote for tunnels)*/ - unsigned long uptime; + __u32 local,remote; /* Addresses(remote for tunnels)*/ + int link; /* Physical interface index */ }; struct mfc_cache @@ -175,11 +158,9 @@ int mfc_flags; /* Flags on line */ struct sk_buff_head mfc_unresolved; /* Unresolved buffers */ int mfc_queuelen; /* Unresolved buffer counter */ - unsigned mfc_last_assert; + unsigned long mfc_last_assert; int mfc_minvif; int mfc_maxvif; - unsigned long uptime; - unsigned long expire; unsigned long mfc_bytes; unsigned long mfc_pkt; unsigned long mfc_wrong_if; @@ -188,6 +169,7 @@ #define MFC_QUEUED 1 #define MFC_RESOLVED 2 +#define MFC_NOTIFY 4 #define MFC_LINES 64 @@ -210,5 +192,32 @@ #define IGMPMSG_NOCACHE 1 /* Kern cache fill request to mrouted */ #define IGMPMSG_WRONGVIF 2 /* For PIM assert processing (unused) */ #define IGMPMSG_WHOLEPKT 3 /* For PIM Register processing */ + +#ifdef __KERNEL__ + +#define PIM_V1_VERSION __constant_htonl(0x10000000) +#define PIM_V1_REGISTER 1 + +#define PIM_VERSION 2 +#define PIM_REGISTER 1 + +#define PIM_NULL_REGISTER __constant_htonl(0x40000000) + +/* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ + +struct pimreghdr +{ + __u8 type; + __u8 reserved; + __u16 csum; + __u32 flags; +}; + +extern int pim_rcv(struct sk_buff * , unsigned short); +extern int pim_rcv_v1(struct sk_buff * , unsigned short len); + +struct rtmsg; +extern int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm); +#endif #endif diff -u --recursive --new-file v2.1.67/linux/include/linux/net.h linux/include/linux/net.h --- v2.1.67/linux/include/linux/net.h Thu Jun 26 12:33:40 1997 +++ linux/include/linux/net.h Sun Nov 30 14:10:40 1997 @@ -18,11 +18,11 @@ #ifndef _LINUX_NET_H #define _LINUX_NET_H - -#include #include -#define NPROTO 16 /* should be enough for now.. */ +struct poll_table_struct; + +#define NPROTO 32 /* should be enough for now.. */ #define SYS_SOCKET 1 /* sys_socket(2) */ @@ -93,7 +93,7 @@ int flags); int (*getname) (struct socket *sock, struct sockaddr *uaddr, int *usockaddr_len, int peer); - unsigned int (*poll) (struct socket *sock, poll_table *wait); + unsigned int (*poll) (struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); int (*listen) (struct socket *sock, int len); diff -u --recursive --new-file v2.1.67/linux/include/linux/net_alias.h linux/include/linux/net_alias.h --- v2.1.67/linux/include/linux/net_alias.h Mon Jul 7 16:02:47 1997 +++ linux/include/linux/net_alias.h Wed Dec 31 16:00:00 1969 @@ -1,187 +0,0 @@ -/* - * NET_ALIAS network device aliasing definitions. - * - * - * Version: @(#)net_alias.h 0.43 12/20/95 - * - * Author: Juan Jose Ciarlante, - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#ifndef _NET_ALIAS_H -#define _NET_ALIAS_H - -#include - -#ifdef CONFIG_NET_ALIAS -#include -#include -#include - -/* - * max. alias slot number allowed - */ - -#define NET_ALIAS_MAX_SLOT 256 - -struct net_alias; -struct net_alias_info; -struct net_alias_type; - - -/* - * Main alias structure - * Note that *defines* dev & devname. - */ - -struct net_alias -{ - struct device dev; /* alias device defn*/ - char name[IFNAMSIZ]; /* device name defn */ - unsigned hash; /* my hash value: for quick rehash */ - unsigned slot; /* slot number */ - void *data; /* private data */ - struct device *main_dev; /* pointer to main device */ - struct net_alias_type *nat; /* alias type object bound */ - struct net_alias *next; /* next alias (hashed linked list) */ -}; - - -/* - * alias structure pointed by main device - * it holds main device's alias hash table - */ - -struct net_alias_info -{ - int n_aliases; /* num aliases */ - struct device *taildev; /* my last (alias) device */ - struct net_alias *hash_tab[16]; /* hashed alias table */ -}; - -/* - * net_alias_type class - * Declares a generic (AF_ independent) structure that will - * manage generic to family-specific behavior. - */ - -struct net_alias_type -{ - int type; /* aliasing type: address family */ - int n_attach; /* number of aliases attached */ - char name[16]; /* af_name */ - __u32 (*get_addr32) /* get __u32 addr 'representation'*/ - (struct net_alias_type *this, struct sockaddr*); - int (*dev_addr_chk) /* address checking func: */ - (struct net_alias_type *this, struct device *, struct sockaddr *); - struct device * (*dev_select) /* closest alias selector*/ - (struct net_alias_type *this, struct device *, struct sockaddr *sa); - int (*alias_init_1) /* called after alias creation: */ - (struct net_alias_type *this,struct net_alias *alias, struct sockaddr *sa); - int (*alias_done_1) /* called before alias deletion */ - (struct net_alias_type *this, struct net_alias *alias); - int (*alias_print_1) - (struct net_alias_type *this, struct net_alias *alias, char *buf, int len); - struct net_alias_type *next; /* link */ -}; - - -/* - * is dev an alias? - */ - -#ifdef CONFIG_NET_ALIAS - -extern __inline__ int net_alias_is(struct device *dev) -{ - return (dev->my_alias != NULL); -} - -/* - * Does dev have aliases? - */ - -extern __inline__ int net_alias_has(struct device *dev) -{ - return (dev->alias_info != NULL); -} - -/* - * Returns MY 'true' main device - * intended for alias devices - */ - -extern __inline__ struct device *net_alias_main_dev(struct device *dev) -{ - return (net_alias_is(dev))? dev->my_alias->main_dev : dev; -} - - -/* - * Returns NEXT 'true' device - * intended for true devices - */ - -extern __inline__ struct device *net_alias_nextdev(struct device *dev) -{ - return (dev->alias_info)? dev->alias_info->taildev->next : dev->next; -} - -/* - * Sets NEXT 'true' device - * Intended for main devices (treat main device as block: dev+aliases). - */ - -extern __inline__ struct device *net_alias_nextdev_set(struct device *dev, struct device *nextdev) -{ - struct device *pdev = dev; - if (net_alias_has(dev)) - { - pdev = dev->alias_info->taildev; /* point to last dev alias */ - } - pdev->next = nextdev; - return nextdev; -} - -#else - -#define net_alias_has(dev) (0) -#define net_alias_is(dev) (0) -#define net_alias_main_dev(dev) (dev) -#endif - - -extern void net_alias_init(void); - -extern struct device * net_alias_dev_get(char *dev_name, int aliasing_ok, int *err, struct sockaddr *sa, void *data); -extern int net_alias_dev_rehash(struct device *dev, struct sockaddr *sa); - -extern int net_alias_getinfo(char *buf, char **, off_t , int , int ); -extern int net_alias_types_getinfo(char *buf, char **, off_t , int , int ); - -extern int register_net_alias_type(struct net_alias_type *nat, int type); -extern int unregister_net_alias_type(struct net_alias_type *nat); - -extern struct device * net_alias_dev_chk(struct device *main_dev, struct sockaddr *sa, int flags_on, int flags_off); -extern struct device * net_alias_dev_chk32(struct device *main_dev, int family, __u32 addr32, int flags_on, int flags_off); - -extern struct device * net_alias_dev_rcv_sel(struct device *main_dev, struct sockaddr *sa_src, struct sockaddr *sa_dst); -extern struct device * net_alias_dev_rcv_sel32(struct device *main_dev, int family, __u32 src, __u32 dst); - - - -#else - -#define net_alias_is(a) 0 -#define net_alias_main_dev(dev) (dev) -#define net_alias_has(dev) 0 - -#endif - -#endif /* _NET_ALIAS_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/netdevice.h linux/include/linux/netdevice.h --- v2.1.67/linux/include/linux/netdevice.h Thu Sep 4 17:07:31 1997 +++ linux/include/linux/netdevice.h Sun Nov 30 14:11:18 1997 @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -34,7 +35,6 @@ * For future expansion when we will have different priorities. */ -#define DEV_NUMBUFFS 3 /* Number of queues per device */ #define MAX_ADDR_LEN 7 /* Largest hardware address length */ /* @@ -59,18 +59,6 @@ #define MAX_HEADER (LL_MAX_HEADER + 48) #endif -#define IS_MYADDR 1 /* address is (one of) our own */ -#define IS_LOOPBACK 2 /* address is for LOOPBACK */ -#define IS_BROADCAST 3 /* address is a valid broadcast */ -#define IS_INVBCAST 4 /* Wrong netmask bcast not for us (unused)*/ -#define IS_MULTICAST 5 /* Multicast IP address */ - -/* NOTE: move to ipv4_device.h */ - -#define IFF_IP_ADDR_OK 1 -#define IFF_IP_MASK_OK 2 -#define IFF_IP_BRD_OK 4 - struct neighbour; /* @@ -188,10 +176,11 @@ /* The device initialization function. Called only once. */ int (*init)(struct device *dev); + void (*destructor)(struct device *dev); /* Interface index. Unique device identifier */ int ifindex; - struct device *next_up; + int iflink; /* * Some hardware also needs these fields, but they are not @@ -215,7 +204,7 @@ unsigned long last_rx; /* Time of last Rx */ unsigned short flags; /* interface flags (a la BSD) */ - unsigned short family; /* address family ID (AF_INET) */ + unsigned short gflags; unsigned short metric; /* routing metric (not used) */ unsigned short mtu; /* interface MTU value */ unsigned short type; /* interface hardware type */ @@ -227,34 +216,25 @@ unsigned char pad; /* make dev_addr aligned to 8 bytes */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ unsigned char addr_len; /* hardware address length */ - unsigned long pa_addr; /* protocol address */ - - unsigned long pa_brdaddr; /* protocol broadcast addr */ - unsigned long pa_dstaddr; /* protocol P-P other side addr */ - unsigned long pa_mask; /* protocol netmask */ - unsigned short pa_alen; /* protocol address length */ struct dev_mc_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ - - struct ip_mc_list *ip_mc_list; /* IP multicast filter chain */ - unsigned ip_flags; /* IP layer control flags */ - __u32 tx_queue_len; /* Max frames per queue allowed */ + int promiscuity; + int allmulti; /* For load balancing driver pair support */ unsigned long pkt_queue; /* Packets queued */ struct device *slave; /* Slave device */ - struct net_alias_info *alias_info; /* main dev alias info */ - struct net_alias *my_alias; /* alias devs */ /* Protocol specific pointers */ void *atalk_ptr; /* Appletalk link */ - void *ip_ptr; /* Not used yet */ + void *ip_ptr; /* IPv4 specific data */ - /* Pointer to the interface buffers. */ - struct sk_buff_head buffs[DEV_NUMBUFFS]; + struct Qdisc *qdisc; + struct Qdisc *qdisc_sleeping; + unsigned long tx_queue_len; /* Max frames per queue allowed */ /* Pointers to interface service routines. */ int (*open)(struct device *dev); @@ -289,6 +269,8 @@ #define HAVE_CHANGE_MTU int (*change_mtu)(struct device *dev, int new_mtu); + int (*hard_header_parse)(struct sk_buff *skb, + unsigned char *haddr); }; @@ -309,16 +291,8 @@ extern struct device loopback_dev; /* The loopback */ extern struct device *dev_base; /* All devices */ extern struct packet_type *ptype_base[16]; /* Hashed types */ - -/* NOTE: move to INET specific header; - __ip_chk_addr is deprecated, do not use if it's possible. - */ - -extern int __ip_chk_addr(unsigned long addr); -extern struct device *ip_dev_find(unsigned long addr, char *name); -/* This is the wrong place but it'll do for the moment */ -extern void ip_mc_allhost(struct device *dev); -extern int devinet_ioctl(unsigned int cmd, void *); +extern int netdev_dropping; +extern int net_cpu_congestion; extern struct device *dev_getbyhwaddr(unsigned short type, char *hwaddr); extern void dev_add_pack(struct packet_type *pt); @@ -330,16 +304,28 @@ extern int dev_close(struct device *dev); extern int dev_queue_xmit(struct sk_buff *skb); extern void dev_loopback_xmit(struct sk_buff *skb); - +extern int register_netdevice(struct device *dev); +extern int unregister_netdevice(struct device *dev); +extern int register_netdevice_notifier(struct notifier_block *nb); +extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern int dev_new_index(void); +extern struct device *dev_get_by_index(int ifindex); +extern int register_gifconf(int family, int (*func)(struct device *dev, char *bufptr, int len)); +extern int dev_restart(struct device *dev); + #define HAVE_NETIF_RX 1 extern void netif_rx(struct sk_buff *skb); extern void net_bh(void); extern void dev_tint(struct device *dev); extern int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy); extern int dev_ioctl(unsigned int cmd, void *); +extern int dev_change_flags(struct device *, unsigned); +extern void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev); extern void dev_init(void); +extern int netdev_nit; + /* Locking protection for page faults during outputs to devices unloaded during the fault */ extern atomic_t dev_lockct; @@ -365,30 +351,24 @@ * * FIXME: What if this is being run as a real time process ?? * Linus: We need a way to force a yield here ? + * + * FIXME: Though dev_lockct is atomic varible, locking procedure + * is not atomic. */ - + extern __inline__ void dev_lock_wait(void) { - while(atomic_read(&dev_lockct)) + while (atomic_read(&dev_lockct)) { + current->counter = 0; schedule(); + } } -/* - * Buffer initialisation function. This used to appear in all the - * drivers but is now an inline in case we ever want to change the - * schemes used. - */ - extern __inline__ void dev_init_buffers(struct device *dev) { - int i; - for(i=0;ibuffs[i]); - } + /* DO NOTHING */ } - /* These functions live elsewhere (drivers/net/net_init.c, but related) */ extern void ether_setup(struct device *dev); @@ -399,8 +379,6 @@ /* Support for loadable net-drivers */ extern int register_netdev(struct device *dev); extern void unregister_netdev(struct device *dev); -extern int register_netdevice_notifier(struct notifier_block *nb); -extern int unregister_netdevice_notifier(struct notifier_block *nb); extern int register_trdev(struct device *dev); extern void unregister_trdev(struct device *dev); /* Functions used for multicast support */ @@ -408,10 +386,11 @@ extern void dev_mc_delete(struct device *dev, void *addr, int alen, int all); extern void dev_mc_add(struct device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct device *dev); +extern void dev_set_promiscuity(struct device *dev, int inc); +extern void dev_set_allmulti(struct device *dev, int inc); /* Load a device via the kerneld */ extern void dev_load(const char *name); -extern int dev_new_index(void); -extern struct device * dev_get_by_index(int ifindex); + #endif /* __KERNEL__ */ diff -u --recursive --new-file v2.1.67/linux/include/linux/netlink.h linux/include/linux/netlink.h --- v2.1.67/linux/include/linux/netlink.h Thu Dec 12 06:54:20 1996 +++ linux/include/linux/netlink.h Sun Nov 30 14:00:38 1997 @@ -1,20 +1,174 @@ #ifndef __LINUX_NETLINK_H #define __LINUX_NETLINK_H +#define NETLINK_ROUTE 0 /* Routing/device hook */ +#define NETLINK_SKIP 1 /* Reserved for ENskip */ +#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ +#define NETLINK_FIREWALL 3 /* Firewalling hook */ +#define NETLINK_ARPD 8 +#define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ +#define NETLINK_IP6_FW 13 +#define NETLINK_TAPBASE 16 /* 16 to 31 are ethertap */ + +#define MAX_LINKS 32 + +struct sockaddr_nl +{ + sa_family_t nl_family; /* AF_NETLINK */ + unsigned short nl_pad; /* zero */ + __kernel_pid_t nl_pid; /* process pid */ + unsigned nl_groups; /* multicast groups mask */ +}; + struct nlmsghdr { - unsigned long nlmsg_len; /* Length of message including header */ - unsigned long nlmsg_type; /* Message type */ - unsigned long nlmsg_seq; /* Sequence number */ - unsigned long nlmsg_pid; /* Sending process PID */ - unsigned char nlmsg_data[0]; + __u32 nlmsg_len; /* Length of message including header */ + __u16 nlmsg_type; /* Message content */ + __u16 nlmsg_flags; /* Additional flags */ + __u32 nlmsg_seq; /* Sequence number */ + __kernel_pid_t nlmsg_pid; /* Sending process PID */ +}; + +/* Flags values */ + +#define NLM_F_REQUEST 1 /* It is request message. */ +#define NLM_F_MULTI 2 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 4 /* If succeed, reply with ack */ +#define NLM_F_ECHO 8 /* Echo this request */ + +/* Modifiers to GET request */ +#define NLM_F_ROOT 0x100 /* specify tree root */ +#define NLM_F_MATCH 0x200 /* return all matching */ +#define NLM_F_ATOMIC 0x400 /* atomic GET */ +#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH) + +/* Modifiers to NEW request */ +#define NLM_F_REPLACE 0x100 /* Override existing */ +#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */ +#define NLM_F_CREATE 0x400 /* Create, if it does not exist */ + +/* + 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL + 4.4BSD CHANGE NLM_F_REPLACE + + True CHANGE NLM_F_CREATE|NLM_F_REPLACE + Append NLM_F_CREATE + Check NLM_F_EXCL + */ + +#define NLMSG_ALIGNTO 4 +#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) +#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) +#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) +#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ + (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len))) +#define NLMSG_OK(nlh,len) ((nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \ + (nlh)->nlmsg_len <= (len)) + +#define NLMSG_NOOP 0x1 /* Nothing. */ +#define NLMSG_ERROR 0x2 /* Error */ +#define NLMSG_DONE 0x3 /* End of a dump */ +#define NLMSG_OVERRUN 0x4 /* Data lost */ + +struct nlmsgerr +{ + int error; + struct nlmsghdr msg; +}; + +#define NET_MAJOR 36 /* Major 36 is reserved for networking */ + +#ifdef __KERNEL__ + +struct netlink_skb_parms +{ + struct ucred creds; /* Skb credentials */ + pid_t pid; + unsigned groups; + pid_t dst_pid; + unsigned dst_groups; }; -#define NLMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) +#define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) +#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -#define NLMSG_ACK 0x01 /* int - error code */ -#define NLMSG_OVERRUN 0x02 /* unsigned long[2] - start and end - * of lost message sequence numbers. - */ + +extern int netlink_attach(int unit, int (*function)(int,struct sk_buff *skb)); +extern void netlink_detach(int unit); +extern int netlink_post(int unit, struct sk_buff *skb); +extern int init_netlink(void); +extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); +extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); +extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, pid_t pid, int nonblock); +extern void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, pid_t pid, + unsigned group, int allocation); +extern void netlink_set_err(struct sock *ssk, pid_t pid, unsigned group, int code); + +/* + * skb should fit one page. This choice is good for headerless malloc. + * + * FIXME: What is the best size for SLAB???? --ANK + */ +#define NLMSG_GOODSIZE (PAGE_SIZE - ((sizeof(struct sk_buff)+0xF)&~0xF)) + + +struct netlink_callback +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); + int (*done)(struct netlink_callback *cb); + long args[4]; +}; + +#if 0 + +void* nlmsg_broadcast(struct sock*, unsigned long type, int len, unsigned groups); +struct skb_buff *nlmsg_alloc(unsigned long type, int len, + unsigned long seq, unsigned long pid, int allocation); +void __nlmsg_transmit(struct sock*, int allocation); + +extern __inline__ void nlmsg_release(struct sk_buff *skb) +{ + atomic_dec(skb->users); +} + +extern __inline__ void nlmsg_transmit(struct sk_buff *sk, int allocation) +{ + if (sk->write_queue.qlen) + __nlmsg_transmit(sk, allocation); +} #endif + +extern __inline__ struct nlmsghdr * +__nlmsg_put(struct sk_buff *skb, pid_t pid, u32 seq, int type, int len) +{ + struct nlmsghdr *nlh; + int size = NLMSG_LENGTH(len); + + nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); + nlh->nlmsg_type = type; + nlh->nlmsg_len = size; + nlh->nlmsg_flags = 0; + nlh->nlmsg_pid = pid; + nlh->nlmsg_seq = seq; + return nlh; +} + +#define NLMSG_PUT(skb, pid, seq, type, len) \ +({ if (skb_tailroom(skb) < NLMSG_SPACE(len)) goto nlmsg_failure; \ + __nlmsg_put(skb, pid, seq, type, len); }) + +extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + struct nlmsghdr *nlh, + int (*dump)(struct sk_buff *skb, struct netlink_callback*), + int (*done)(struct netlink_callback*)); + + +extern void netlink_proto_init(struct net_proto *pro); + +#endif /* __KERNEL__ */ + +#endif /* __LINUX_NETLINK_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/parport.h linux/include/linux/parport.h --- v2.1.67/linux/include/linux/parport.h Wed Sep 3 20:52:44 1997 +++ linux/include/linux/parport.h Sat Nov 29 16:19:47 1997 @@ -1,4 +1,4 @@ -/* $Id: parport.h,v 1.2.6.3.2.2 1997/04/18 15:03:53 phil Exp $ */ +/* $Id: parport.h,v 1.3 1997/10/19 18:02:00 phil Exp $ */ #ifndef _PARPORT_H_ #define _PARPORT_H_ @@ -160,7 +160,7 @@ /* A parallel port */ struct parport { - unsigned int base; /* base address */ + unsigned long base; /* base address */ unsigned int size; /* IO extent */ char *name; int irq; /* interrupt (or -1 for none) */ @@ -266,11 +266,11 @@ extern int parport_proc_register(struct parport *pp); extern int parport_proc_unregister(struct parport *pp); -/* Prototypes from parport_ksyms.c */ extern void dec_parport_count(void); extern void inc_parport_count(void); extern int parport_probe(struct parport *port, char *buffer, int len); extern void parport_probe_one(struct parport *port); +extern void (*parport_probe_hook)(struct parport *port); #endif /* _PARPORT_H_ */ diff -u --recursive --new-file v2.1.67/linux/include/linux/pci.h linux/include/linux/pci.h --- v2.1.67/linux/include/linux/pci.h Wed Nov 26 16:24:03 1997 +++ linux/include/linux/pci.h Sun Nov 30 13:48:48 1997 @@ -43,8 +43,8 @@ -#ifndef PCI_H -#define PCI_H +#ifndef LINUX_PCI_H +#define LINUX_PCI_H /* * Under PCI, each device has 256 bytes of configuration address space, @@ -404,7 +404,7 @@ #define PCI_DEVICE_ID_MOTOROLA_RAVEN 0x4801 #define PCI_VENDOR_ID_PROMISE 0x105a -#define PCI_DEVICE_ID_PROMISE_IDE_UDMA 0x4d33 +#define PCI_DEVICE_ID_PROMISE_20246 0x4d33 #define PCI_DEVICE_ID_PROMISE_5300 0x5300 #define PCI_VENDOR_ID_N9 0x105d @@ -802,6 +802,7 @@ #define PCI_DEVICE_ID_ARK_STINGARK 0xa099 #define PCI_DEVICE_ID_ARK_2000MT 0xa0a1 +#ifdef __KERNEL__ /* * The PCI interface treats multi-function devices as independent * devices. The slot/function address of each device is encoded @@ -892,5 +893,5 @@ extern const char *pci_strdev (unsigned int vendor, unsigned int device); extern int get_pci_list (char *buf); - -#endif /* PCI_H */ +#endif /* __KERNEL__ */ +#endif /* LINUX_PCI_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/pkt_sched.h linux/include/linux/pkt_sched.h --- v2.1.67/linux/include/linux/pkt_sched.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/pkt_sched.h Sun Nov 30 14:00:38 1997 @@ -0,0 +1,93 @@ +#ifndef __LINUX_PKT_SCHED_H +#define __LINUX_PKT_SCHED_H + +#define PSCHED_TC_INIT 1 +#define PSCHED_TC_DESTROY 2 +#define PSCHED_TC_ATTACH 3 +#define PSCHED_TC_DETACH 4 + + +/* "Logical" priority bands, not depending of concrete packet scheduler. + Every scheduler will map them to real traffic classes, if it have + no more precise machanism. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + + +struct pschedctl +{ + int command; + int handle; + int child; + int ifindex; + char id[IFNAMSIZ]; + int arglen; + char args[0]; +}; + +/* CBQ section */ + +#define CBQ_MAXPRIO 8 +#define CBQ_MAXLEVEL 8 + +/* CSZ section */ + +struct cszctl +{ + int flow_id; + int handle; + unsigned long rate; + unsigned long max_bytes; + unsigned long depth; + unsigned long L_tab[256]; +}; + +struct cszinitctl +{ + int flows; + unsigned cell_log; +}; + +/* TBF section */ + +struct tbfctl +{ + unsigned cell_log; + unsigned long bytes; + unsigned long depth; + unsigned long L_tab[256]; +}; + +/* SFQ section */ + +struct sfqctl +{ + unsigned quantum; + unsigned depth; + unsigned divisor; + unsigned flows; +}; + +/* RED section */ + +struct redctl +{ + unsigned qmaxbytes; /* HARD maximal queue length */ + unsigned qth_min; /* Min average length threshold: A scaled */ + unsigned qth_max; /* Max average length threshold: A scaled */ + char Alog; /* Point position in average lengths */ + char Wlog; /* log(W) */ + char Rlog; /* random number bits */ + char C1log; /* log(1/C1) */ + char Slog; + char Stab[256]; +}; + + +#endif diff -u --recursive --new-file v2.1.67/linux/include/linux/proc_fs.h linux/include/linux/proc_fs.h --- v2.1.67/linux/include/linux/proc_fs.h Wed Nov 12 13:34:28 1997 +++ linux/include/linux/proc_fs.h Sun Nov 30 14:10:45 1997 @@ -81,9 +81,6 @@ PROC_NET_UNIX = 128, PROC_NET_ARP, PROC_NET_ROUTE, - PROC_NET_RTCLASSES, - PROC_NET_RTLOCAL, - PROC_NET_RTRULES, PROC_NET_DEV, PROC_NET_RAW, PROC_NET_RAW6, @@ -118,8 +115,6 @@ PROC_NET_SOCKSTAT6, PROC_NET_RTCACHE, PROC_NET_AX25_BPQETHER, - PROC_NET_ALIAS_TYPES, - PROC_NET_ALIASES, PROC_NET_IP_MASQ_APP, PROC_NET_RT6, PROC_NET_RT6_TREE, diff -u --recursive --new-file v2.1.67/linux/include/linux/route.h linux/include/linux/route.h --- v2.1.67/linux/include/linux/route.h Mon Jun 30 15:25:39 1997 +++ linux/include/linux/route.h Sun Nov 30 14:12:37 1997 @@ -33,9 +33,7 @@ unsigned short rt_flags; short rt_pad2; unsigned long rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; + void *rt_pad4; short rt_metric; /* +1 for binary compatibility! */ char *rt_dev; /* forcing the device at add */ unsigned long rt_mtu; /* per route MTU/Window */ @@ -44,13 +42,11 @@ #endif unsigned long rt_window; /* Window clamping */ unsigned short rt_irtt; /* Initial RTT */ - }; #define RTF_UP 0x0001 /* route usable */ #define RTF_GATEWAY 0x0002 /* destination is a gateway */ - #define RTF_HOST 0x0004 /* host entry (net otherwise) */ #define RTF_REINSTATE 0x0008 /* reinstate route after tmout */ #define RTF_DYNAMIC 0x0010 /* created dyn. (by redirect) */ @@ -60,138 +56,12 @@ #define RTF_WINDOW 0x0080 /* per route window clamping */ #define RTF_IRTT 0x0100 /* Initial round trip time */ #define RTF_REJECT 0x0200 /* Reject route */ -#define RTF_STATIC 0x0400 /* Manually injected route */ -#define RTF_XRESOLVE 0x0800 /* External resolver */ -#define RTF_NOFORWARD 0x1000 /* Forwarding inhibited */ -#define RTF_THROW 0x2000 /* Go to next class */ -#define RTF_NOPMTUDISC 0x4000 /* Do not send packets with DF */ - -#define RTF_MAGIC 0x8000 /* Route added/deleted authomatically, - * when interface changes its state. - */ /* * uses RTF values >= 64k */ -#define RTCF_VALVE 0x00200000 -#define RTCF_MASQ 0x00400000 -#define RTCF_NAT 0x00800000 -#define RTCF_DOREDIRECT 0x01000000 -#define RTCF_LOG 0x02000000 -#define RTCF_DIRECTSRC 0x04000000 - -#define RTF_LOCAL 0x80000000 -#define RTF_INTERFACE 0x40000000 -#define RTF_MULTICAST 0x20000000 -#define RTF_BROADCAST 0x10000000 -#define RTF_NAT 0x08000000 - -#define RTF_ADDRCLASSMASK 0xF8000000 -#define RT_ADDRCLASS(flags) ((__u32)flags>>23) - -#define RT_TOS(tos) ((tos)&IPTOS_TOS_MASK) - -#define RT_LOCALADDR(flags) ((flags&RTF_ADDRCLASSMASK) == (RTF_LOCAL|RTF_INTERFACE)) - -#define RT_CLASS_UNSPEC 0 -#define RT_CLASS_DEFAULT 253 - -#define RT_CLASS_MAIN 254 -#define RT_CLASS_LOCAL 255 -#define RT_CLASS_MAX 255 - -#ifdef _LINUX_IN_H /* hack to check that in.h included */ -/* - * This structure is passed from the kernel to user space by netlink - * routing/device announcements - */ - -struct in_rtmsg -{ - struct in_addr rtmsg_prefix; - struct in_addr rtmsg_gateway; - unsigned rtmsg_flags; - unsigned long rtmsg_mtu; - unsigned long rtmsg_window; - unsigned short rtmsg_rtt; - short rtmsg_metric; - unsigned char rtmsg_tos; - unsigned char rtmsg_class; - unsigned char rtmsg_prefixlen; - unsigned char rtmsg_reserved; - int rtmsg_ifindex; -}; - - -struct in_ifmsg -{ - struct sockaddr ifmsg_lladdr; - struct in_addr ifmsg_prefix; - struct in_addr ifmsg_brd; - unsigned ifmsg_flags; - unsigned long ifmsg_mtu; - short ifmsg_metric; - unsigned char ifmsg_prefixlen; - unsigned char ifmsg_reserved; - int ifmsg_index; - char ifmsg_name[16]; -}; - -enum rtrule_actions -{ - RTP_GO, - RTP_NAT, - RTP_DROP, - RTP_UNREACHABLE, - RTP_PROHIBIT, - RTP_MASQUERADE -}; - -#define RTRF_LOG 1 /* Log route creations */ -#define RTRF_VALVE 2 /* One-way route */ - -struct in_rtrulemsg -{ - struct in_addr rtrmsg_src; - struct in_addr rtrmsg_dst; - struct in_addr rtrmsg_srcmap; - int rtrmsg_ifindex; - unsigned char rtrmsg_srclen; - unsigned char rtrmsg_dstlen; - unsigned char rtrmsg_tos; - unsigned char rtrmsg_class; - unsigned char rtrmsg_flags; - unsigned char rtrmsg_action; - unsigned char rtrmsg_preference; - unsigned char rtrmsg_rtmsgs; - struct in_rtmsg rtrmsg_rtmsg[1]; -}; - -struct in_rtctlmsg -{ - unsigned rtcmsg_flags; - int rtcmsg_delay; -}; - -#define RTCTL_ECHO 1 /* Echo route changes */ -#define RTCTL_FLUSH 2 /* Send flush updates */ -#define RTCTL_ACK 4 /* Send acks */ -#define RTCTL_DELAY 8 /* Set netlink delay */ -#define RTCTL_OWNER 0x10 /* Set netlink reader */ -#endif - -#define RTMSG_ACK NLMSG_ACK -#define RTMSG_OVERRUN NLMSG_OVERRUN -#define RTMSG_NEWDEVICE 0x11 -#define RTMSG_DELDEVICE 0x12 -#define RTMSG_NEWROUTE 0x21 -#define RTMSG_DELROUTE 0x22 -#define RTMSG_NEWRULE 0x31 -#define RTMSG_DELRULE 0x32 -#define RTMSG_CONTROL 0x40 -#define RTMSG_AR_FAILED 0x51 /* Address Resolution failed */ #endif /* _LINUX_ROUTE_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h --- v2.1.67/linux/include/linux/rtnetlink.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/rtnetlink.h Sun Nov 30 14:11:18 1997 @@ -0,0 +1,555 @@ +#ifndef __LINUX_RTNETLINK_H +#define __LINUX_RTNETLINK_H + +#include +#include + +#define RTNL_DEBUG 1 + + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +#define RTM_BASE 0x10 + +#define RTM_NEWLINK (RTM_BASE+0) +#define RTM_DELLINK (RTM_BASE+1) +#define RTM_GETLINK (RTM_BASE+2) + +#define RTM_NEWADDR (RTM_BASE+4) +#define RTM_DELADDR (RTM_BASE+5) +#define RTM_GETADDR (RTM_BASE+6) + +#define RTM_NEWROUTE (RTM_BASE+8) +#define RTM_DELROUTE (RTM_BASE+9) +#define RTM_GETROUTE (RTM_BASE+10) + +#define RTM_NEWNEIGH (RTM_BASE+12) +#define RTM_DELNEIGH (RTM_BASE+13) +#define RTM_GETNEIGH (RTM_BASE+14) + +#define RTM_NEWRULE (RTM_BASE+16) +#define RTM_DELRULE (RTM_BASE+17) +#define RTM_GETRULE (RTM_BASE+18) + +#define RTM_MAX (RTM_BASE+19) + + +/* Generic structure for encapsulation optional route + information. It is reminiscent of sockaddr, but with sa_family + replaced with attribute type. + It would be good, if constructions of sort: + struct something { + struct rtattr rta; + struct a_content a; + } + had correct alignment. It is true for x86, but I have no idea + how to make it on 64bit architectures. Please, teach me. --ANK + */ + +struct rtattr +{ + unsigned short rta_len; + unsigned short rta_type; +/* + unsigned char rta_data[0]; + */ +}; + +enum rtattr_type_t +{ + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_WINDOW, + RTA_RTT, + RTA_MTU, + RTA_IFNAME +}; + +#define RTA_MAX RTA_IFNAME + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4 +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((rta)->rta_len > sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) + + +/* + * "struct rtnexthop" describres all necessary nexthop information, + * i.e. parameters of path to a destination via this nextop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop +{ + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +/* + struct rtattr rtnh_data[0]; + */ +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + (rtnh)->rtnh_len <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + + +struct rtmsg +{ + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_nhs; /* Number of nexthops */ + unsigned char rtm_type; /* See below */ + unsigned short rtm_optlen; /* Byte length of rtm_opt */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_whatsit; /* Unused byte */ + unsigned rtm_flags; +/* + struct rtattr rtm_opt[0]; + struct rtnexthop rtm_nh[0]; + */ +}; + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_RTNH(r) ((struct rtnexthop*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)) \ + + NLMSG_ALIGN((r)->rtm_optlen))) +#define RTM_NHLEN(nlh,r) ((nlh)->nlmsg_len - NLMSG_SPACE(sizeof(struct rtmsg)) - NLMSG_ALIGN((r)->rtm_optlen)) + +/* rtm_type */ + +enum +{ + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ +}; + +#define RTN_MAX RTN_XRESOLVE + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC router advertisment */ + + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, locate on directly attached + link and UNIVERSE is everywhere in the Universe :-) + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t +{ + RT_SCOPE_UNIVERSE=0, +/* User defined values f.e. "site" */ + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_NOPMTUDISC 0x400 /* Do not make PMTU discovery */ +#define RTM_F_EQUALIZE 0x800 /* Multipath equalizer: NI */ + +/* Reserved table identifiers */ + +enum rt_class_t +{ + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255 +}; +#define RT_TABLE_MAX RT_TABLE_LOCAL + + +/********************************************************* + * Interface address. + ****/ + +struct ifaddrmsg +{ + unsigned char ifa_family; + unsigned char ifa_prefixlen; /* The prefix length */ + unsigned char ifa_flags; /* Flags */ + unsigned char ifa_scope; /* See above */ + int ifa_index; /* Link index */ +/* + struct rtattr ifa_data[0]; + */ +}; + +enum +{ + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST +}; + +#define IFA_MAX IFA_ANYCAST + +/* ifa_flags */ + +#define IFA_F_SECONDARY 1 + + +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) + +/* + Important comment: + IFA_ADDRESS is prefix address, rather than local interface address. + It makes no difference for normally configured broadcast interfaces, + but for point-to-point IFA_ADDRESS is DESTINATION address, + local address is supplied in IFA_LOCAL attribute. + */ + +/************************************************************** + * Neighbour discovery. + ****/ + +struct ndmsg +{ + unsigned char nd_family; + int nd_ifindex; /* Link index */ + unsigned nd_flags; +/* + struct rtattr nd_data[0]; + */ +}; + +enum +{ + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, +}; + +#define NDA_MAX NDA_LLADDR + +#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg +{ + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg +{ + unsigned char ifi_family; /* Dummy */ + unsigned char ifi_addrlen; /* Length of HW address */ + unsigned short ifi_pad__; + int ifi_index; /* Link index */ + int ifi_link; /* Physical device */ + char ifi_name[IFNAMSIZ]; + struct sockaddr ifi_address; /* HW address */ + struct sockaddr ifi_broadcast; /* HW broadcast */ + unsigned ifi_flags; /* IFF_* flags */ + int ifi_mtu; /* Link mtu */ + char ifi_qdiscname[IFNAMSIZ];/* Id of packet scheduler */ + int ifi_qdisc; /* Packet scheduler handle */ +}; + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neiher of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* ifi_link. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_NDISC 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_MROUTE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_NDISC 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_MROUTE 0x800 + + +#ifdef __KERNEL__ + +struct kern_rta +{ + void *rta_dst; + void *rta_src; + int *rta_iif; + int *rta_oif; + void *rta_gw; + u32 *rta_priority; + void *rta_prefsrc; + unsigned *rta_window; + unsigned *rta_rtt; + unsigned *rta_mtu; + unsigned char *rta_ifname; +}; + +struct kern_ifa +{ + void *ifa_address; + void *ifa_local; + unsigned char *ifa_label; + void *ifa_broadcast; + void *ifa_anycast; +}; + + +extern atomic_t rtnl_rlockct; +extern struct wait_queue *rtnl_wait; + +#ifdef CONFIG_RTNETLINK +extern struct sock *rtnl; + +struct rtnetlink_link +{ + int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr); + int (*dumpit)(struct sk_buff *, struct netlink_callback *cb); +}; + +extern struct rtnetlink_link * rtnetlink_links[NPROTO]; +extern int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb); + + +extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); + +#define RTA_PUT(skb, attrtype, attrlen, data) \ +({ if (skb_tailroom(skb) < RTA_SPACE(attrlen)) goto rtattr_failure; \ + __rta_fill(skb, attrtype, attrlen, data); }) + +extern unsigned long rtnl_wlockct; + +/* NOTE: these locks are not interrupt safe, are not SMP safe, + * they are even not atomic. 8)8)8) ... and it is not a bug. + * Really, if these locks will be programmed correctly, + * all the addressing/routing machine would become SMP safe, + * but is absolutely useless at the moment, because all the kernel + * is not reenterable in any case. --ANK + * + * Well, atomic_* and set_bit provide the only thing here: + * gcc is confused not to overoptimize them, that's all. + * I remember as gcc splitted ++ operation, but cannot reproduce + * it with gcc-2.7.*. --ANK + * + * One more note: rwlock facility should be written and put + * to a kernel wide location: f.e. current implementation of semaphores + * (especially, for x86) looks like a wonder. It would be good + * to have something similar for rwlock. Recursive lock could be also + * useful thing. --ANK + */ + +extern __inline__ int rtnl_shlock_nowait(void) +{ + atomic_inc(&rtnl_rlockct); + if (test_bit(0, &rtnl_wlockct)) { + atomic_dec(&rtnl_rlockct); + return -EAGAIN; + } + return 0; +} + +extern __inline__ void rtnl_shlock(void) +{ + while (rtnl_shlock_nowait()) + sleep_on(&rtnl_wait); +} + +/* Check for possibility to PROMOTE shared lock to exclusive. + Shared lock must be already grabbed with rtnl_shlock*(). + */ + +extern __inline__ int rtnl_exlock_nowait(void) +{ + if (atomic_read(&rtnl_rlockct) > 1) + return -EAGAIN; + if (test_and_set_bit(0, &rtnl_wlockct)) + return -EAGAIN; + return 0; +} + +extern __inline__ void rtnl_exlock(void) +{ + while (rtnl_exlock_nowait()) + sleep_on(&rtnl_wait); +} + +#if 0 +extern __inline__ void rtnl_shunlock(void) +{ + atomic_dec(&rtnl_rlockct); + if (atomic_read(&rtnl_rlockct) <= 1) { + wake_up(&rtnl_wait); + if (rtnl->receive_queue.qlen) + rtnl->data_ready(rtnl, 0); + } +} +#else + +/* The problem: inline requires to include and, hence, + almost all of net includes :-( + */ + +#define rtnl_shunlock() ({ \ + atomic_dec(&rtnl_rlockct); \ + if (atomic_read(&rtnl_rlockct) <= 1) { \ + wake_up(&rtnl_wait); \ + if (rtnl->receive_queue.qlen) \ + rtnl->data_ready(rtnl, 0); \ + } \ +}) +#endif + +/* Release exclusive lock. Note, that we do not wake up rtnetlink socket, + * it will be done later after releasing shared lock. + */ + +extern __inline__ void rtnl_exunlock(void) +{ + clear_bit(0, &rtnl_wlockct); + wake_up(&rtnl_wait); +} + +#else + +extern __inline__ void rtnl_shlock(void) +{ + while (atomic_read(&rtnl_rlockct)) + sleep_on(&rtnl_wait); + atomic_inc(&rtnl_rlockct); +} + +extern __inline__ void rtnl_shunlock(void) +{ + if (atomic_dec_and_test(&rtnl_rlockct)) + wake_up(&rtnl_wait); +} + +extern __inline__ void rtnl_exlock(void) +{ +} + +extern __inline__ void rtnl_exunlock(void) +{ +} + +#endif + +extern void rtnl_lock(void); +extern void rtnl_unlock(void); +extern void rtnetlink_init(void); + +#endif /* __KERNEL__ */ + + +#endif /* __LINUX_RTNETLINK_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/scc.h linux/include/linux/scc.h --- v2.1.67/linux/include/linux/scc.h Sun Apr 13 10:18:22 1997 +++ linux/include/linux/scc.h Sat Nov 29 16:29:37 1997 @@ -184,7 +184,7 @@ #define RXINT 0x04 #define SPINT 0x06 -#ifdef SCC_DELAY +#ifdef CONFIG_SCC_DELAY #define Inb(port) inb_p(port) #define Outb(port, val) outb_p(val, port) #else diff -u --recursive --new-file v2.1.67/linux/include/linux/sched.h linux/include/linux/sched.h --- v2.1.67/linux/include/linux/sched.h Mon Oct 20 10:36:53 1997 +++ linux/include/linux/sched.h Sun Nov 30 14:10:40 1997 @@ -19,6 +19,7 @@ #include #include #include +#include /* * cloning flags: @@ -165,14 +166,14 @@ struct signal_struct { atomic_t count; - struct sigaction action[32]; + struct k_sigaction action[_NSIG]; spinlock_t siglock; }; #define INIT_SIGNALS { \ ATOMIC_INIT(1), \ - { {0,}, }, \ + { {{0,}}, }, \ SPIN_LOCK_UNLOCKED } struct task_struct { @@ -180,8 +181,6 @@ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ long counter; long priority; - unsigned long signal; - unsigned long blocked; /* bitmap of masked signals */ unsigned long flags; /* per process flags, defined below */ int errno; long debugreg[8]; /* Hardware debugging registers */ @@ -196,11 +195,10 @@ unsigned long personality; int dumpable:1; int did_exec:1; - /* shouldn't this be pid_t? */ - int pid; - int pgrp; - int tty_old_pgrp; - int session; + pid_t pid; + pid_t pgrp; + pid_t tty_old_pgrp; + pid_t session; /* boolean value for session group leader */ int leader; int ngroups; @@ -220,8 +218,8 @@ struct task_struct **tarray_ptr; struct wait_queue *wait_chldexit; /* for wait4() */ - unsigned short uid,euid,suid,fsuid; - unsigned short gid,egid,sgid,fsgid; + uid_t uid,euid,suid,fsuid; + gid_t gid,egid,sgid,fsgid; unsigned long timeout, policy, rt_priority; unsigned long it_real_value, it_prof_value, it_virt_value; unsigned long it_real_incr, it_prof_incr, it_virt_incr; @@ -257,6 +255,8 @@ struct mm_struct *mm; /* signal handlers */ struct signal_struct *sig; + sigset_t signal, blocked; + struct signal_queue *sigqueue, **sigqueue_tail; /* SMP state */ int has_cpu; int processor; @@ -271,17 +271,17 @@ */ #define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ /* Not implemented yet, only for 486*/ -#define PF_PTRACED 0x00000010 /* set if ptrace (0) has been called. */ +#define PF_STARTING 0x00000002 /* being created */ +#define PF_EXITING 0x00000004 /* getting shut down */ +#define PF_SIGPENDING 0x00000008 /* at least on unblocked sig ready */ +#define PF_PTRACED 0x00000010 /* set if ptrace (0) has been called */ #define PF_TRACESYS 0x00000020 /* tracing system calls */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_STARTING 0x00000002 /* being created */ -#define PF_EXITING 0x00000004 /* getting shut down */ - -#define PF_USEDFPU 0x00100000 /* Process used the FPU this quantum (SMP only) */ +#define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ #define PF_DTRACE 0x00200000 /* delayed trace (used on m68k) */ #define PF_ONSIGSTK 0x00400000 /* works on signal stack (m68k only) */ @@ -308,7 +308,7 @@ * your own risk!. Base=0, limit=0x1fffff (=2MB) */ #define INIT_TASK \ -/* state etc */ { 0,DEF_PRIORITY,DEF_PRIORITY,0,0,0,0, \ +/* state etc */ { 0,DEF_PRIORITY,DEF_PRIORITY,0,0, \ /* debugregs */ { 0, }, \ /* exec domain */&default_exec_domain, \ /* binfmt */ NULL, \ @@ -336,7 +336,7 @@ /* fs */ &init_fs, \ /* files */ &init_files, \ /* mm */ &init_mm, \ -/* signals */ &init_signals, \ +/* signals */ &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, \ /* SMP */ 0,0,0,0, \ /* locks */ INIT_LOCKS \ } @@ -445,15 +445,67 @@ extern void FASTCALL(wake_up_interruptible(struct wait_queue ** p)); extern void FASTCALL(wake_up_process(struct task_struct * tsk)); -extern void notify_parent(struct task_struct * tsk, int signal); -extern void force_sig(unsigned long sig,struct task_struct * p); -extern int send_sig(unsigned long sig,struct task_struct * p,int priv); extern int in_group_p(gid_t grp); +extern void flush_signals(struct task_struct *); +extern void flush_signal_handlers(struct task_struct *); +extern int dequeue_signal(sigset_t *block, siginfo_t *); +extern int send_sig_info(int, struct siginfo *info, struct task_struct *); +extern int force_sig_info(int, struct siginfo *info, struct task_struct *); +extern int kill_pg_info(int, struct siginfo *info, pid_t); +extern int kill_sl_info(int, struct siginfo *info, pid_t); +extern int kill_proc_info(int, struct siginfo *info, pid_t); +extern int kill_something_info(int, struct siginfo *info, int); +extern void notify_parent(struct task_struct * tsk, int); +extern void force_sig(int sig, struct task_struct * p); +extern int send_sig(int sig, struct task_struct * p, int priv); +extern int kill_pg(pid_t, int, int); +extern int kill_sl(pid_t, int, int); +extern int kill_proc(pid_t, int, int); +extern int do_sigaction(int sig, const struct k_sigaction *act, + struct k_sigaction *oact); + extern inline int signal_pending(struct task_struct *p) { - return (p->signal &~ p->blocked) != 0; + return (p->flags & PF_SIGPENDING) != 0; } + +/* Reevaluate whether the task has signals pending delivery. + This is required every time the blocked sigset_t changes. + All callers should have t->sigmask_lock. */ + +static inline void recalc_sigpending(struct task_struct *t) +{ + unsigned long ready, nflags; + long i; + + switch (_NSIG_WORDS) { + default: + for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;) + ready |= t->signal.sig[i] &~ t->blocked.sig[i]; + break; + + case 4: ready = t->signal.sig[3] &~ t->blocked.sig[3]; + ready |= t->signal.sig[2] &~ t->blocked.sig[2]; + ready |= t->signal.sig[1] &~ t->blocked.sig[1]; + ready |= t->signal.sig[0] &~ t->blocked.sig[0]; + break; + + case 2: ready = t->signal.sig[1] &~ t->blocked.sig[1]; + ready |= t->signal.sig[0] &~ t->blocked.sig[0]; + break; + + case 1: ready = t->signal.sig[0] &~ t->blocked.sig[0]; + } + + /* Poor gcc has trouble with conditional moves... */ + nflags = t->flags &~ PF_SIGPENDING; + if (ready) + nflags = t->flags | PF_SIGPENDING; + + t->flags = nflags; +} + extern int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), diff -u --recursive --new-file v2.1.67/linux/include/linux/signal.h linux/include/linux/signal.h --- v2.1.67/linux/include/linux/signal.h Mon Sep 30 07:47:39 1996 +++ linux/include/linux/signal.h Sun Nov 30 10:59:03 1997 @@ -2,5 +2,204 @@ #define _LINUX_SIGNAL_H #include +#include -#endif +#ifdef __KERNEL__ +/* + * Real Time signals may be queued. + */ + +struct signal_queue +{ + struct signal_queue *next; + siginfo_t info; +}; + +/* + * Define some primitives to manipulate sigset_t. + */ + +#ifndef __HAVE_ARCH_SIG_BITOPS +#include + +/* We don't use for these because there is no need to + be atomic. */ +extern inline void sigaddset(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + if (_NSIG_WORDS == 1) + set->sig[0] |= 1UL << sig; + else + set->sig[sig / _NSIG_BPW] |= 1UL << (sig % _NSIG_BPW); +} + +extern inline void sigdelset(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + if (_NSIG_WORDS == 1) + set->sig[0] &= ~(1UL << sig); + else + set->sig[sig / _NSIG_BPW] &= ~(1UL << (sig % _NSIG_BPW)); +} + +extern inline int sigismember(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + if (_NSIG_WORDS == 1) + return 1 & (set->sig[0] >> sig); + else + return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); +} + +extern inline int sigfindinword(unsigned long word) +{ + return ffz(~word); +} + +#define sigmask(sig) (1UL << ((sig) - 1)) + +#endif /* __HAVE_ARCH_SIG_BITOPS */ + +#ifndef __HAVE_ARCH_SIG_SETOPS +#include + +#define _SIG_SET_BINOP(name, op) \ +extern inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ +{ \ + unsigned long a0, a1, a2, a3, b0, b1, b2, b3; \ + unsigned long i; \ + \ + for (i = 0; i < _NSIG_WORDS/4; ++i) { \ + a0 = a->sig[4*i+0]; a1 = a->sig[4*i+1]; \ + a2 = a->sig[4*i+2]; a3 = a->sig[4*i+3]; \ + b0 = b->sig[4*i+0]; b1 = b->sig[4*i+1]; \ + b2 = b->sig[4*i+2]; b3 = b->sig[4*i+3]; \ + r->sig[4*i+0] = op(a0, b0); \ + r->sig[4*i+1] = op(a1, b1); \ + r->sig[4*i+2] = op(a2, b2); \ + r->sig[4*i+3] = op(a3, b3); \ + } \ + switch (_NSIG_WORDS % 4) { \ + case 3: \ + a0 = a->sig[4*i+0]; a1 = a->sig[4*i+1]; a2 = a->sig[4*i+2]; \ + b0 = b->sig[4*i+0]; b1 = b->sig[4*i+1]; b2 = b->sig[4*i+2]; \ + r->sig[4*i+0] = op(a0, b0); \ + r->sig[4*i+1] = op(a1, b1); \ + r->sig[4*i+2] = op(a2, b2); \ + break; \ + case 2: \ + a0 = a->sig[4*i+0]; a1 = a->sig[4*i+1]; \ + b0 = b->sig[4*i+0]; b1 = b->sig[4*i+1]; \ + r->sig[4*i+0] = op(a0, b0); \ + r->sig[4*i+1] = op(a1, b1); \ + break; \ + case 1: \ + a0 = a->sig[4*i+0]; b0 = b->sig[4*i+0]; \ + r->sig[4*i+0] = op(a0, b0); \ + break; \ + } \ +} + +#define _sig_or(x,y) ((x) | (y)) +_SIG_SET_BINOP(sigorsets, _sig_or) + +#define _sig_and(x,y) ((x) & (y)) +_SIG_SET_BINOP(sigandsets, _sig_and) + +#define _sig_nand(x,y) ((x) & ~(y)) +_SIG_SET_BINOP(signandsets, _sig_nand) + +#undef _SIG_SET_BINOP +#undef _sig_or +#undef _sig_and +#undef _sig_nand + +#define _SIG_SET_OP(name, op) \ +extern inline void name(sigset_t *set) \ +{ \ + unsigned long i; \ + \ + for (i = 0; i < _NSIG_WORDS/4; ++i) { \ + set->sig[4*i+0] = op(set->sig[4*i+0]); \ + set->sig[4*i+1] = op(set->sig[4*i+1]); \ + set->sig[4*i+2] = op(set->sig[4*i+2]); \ + set->sig[4*i+3] = op(set->sig[4*i+3]); \ + } \ + switch (_NSIG_WORDS % 4) { \ + case 3: set->sig[4*i+2] = op(set->sig[4*i+2]); \ + case 2: set->sig[4*i+1] = op(set->sig[4*i+1]); \ + case 1: set->sig[4*i+0] = op(set->sig[4*i+0]); \ + } \ +} + +#define _sig_not(x) (~(x)) +_SIG_SET_OP(signotset, _sig_not) + +#undef _SIG_SET_OP +#undef _sig_not + +extern inline void sigemptyset(sigset_t *set) +{ + switch (_NSIG_WORDS) { + default: + memset(set, 0, sizeof(sigset_t)); + break; + case 2: set->sig[1] = 0; + case 1: set->sig[0] = 0; + break; + } +} + +extern inline void sigfillset(sigset_t *set) +{ + switch (_NSIG_WORDS) { + default: + memset(set, -1, sizeof(sigset_t)); + break; + case 2: set->sig[1] = -1; + case 1: set->sig[0] = -1; + break; + } +} + +/* Some extensions for manipulating the low 32 signals in particular. */ + +extern inline void sigaddsetmask(sigset_t *set, unsigned long mask) +{ + set->sig[0] |= mask; +} + +extern inline void sigdelsetmask(sigset_t *set, unsigned long mask) +{ + set->sig[0] &= ~mask; +} + +extern inline void siginitset(sigset_t *set, unsigned long mask) +{ + set->sig[0] = mask; + switch (_NSIG_WORDS) { + default: + memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1)); + break; + case 2: set->sig[1] = 0; + case 1: + } +} + +extern inline void siginitsetinv(sigset_t *set, unsigned long mask) +{ + set->sig[0] = ~mask; + switch (_NSIG_WORDS) { + default: + memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1)); + break; + case 2: set->sig[1] = -1; + case 1: + } +} + +#endif /* __HAVE_ARCH_SIG_SETOPS */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SIGNAL_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/skbuff.h linux/include/linux/skbuff.h --- v2.1.67/linux/include/linux/skbuff.h Mon Jul 7 16:02:05 1997 +++ linux/include/linux/skbuff.h Sun Nov 30 14:10:46 1997 @@ -90,15 +90,10 @@ arp; /* Has IP/ARP resolution finished */ unsigned char tries, /* Times tried */ inclone, /* Inline clone */ - priority, pkt_type, /* Packet class */ pkt_bridged, /* Tracker for bridging */ ip_summed; /* Driver fed us an IP checksum */ -#define PACKET_HOST 0 /* To us */ -#define PACKET_BROADCAST 1 /* To all */ -#define PACKET_MULTICAST 2 /* To group */ -#define PACKET_OTHERHOST 3 /* To someone else */ -#define PACKET_NDISC 17 /* Outgoing NDISC packet */ + __u32 priority; atomic_t users; /* User count - see datagram.c,tcp.c */ unsigned short protocol; /* Packet protocol from driver. */ unsigned short security; /* Security level of packet */ @@ -447,13 +442,17 @@ return skb->data; } -extern __inline__ unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) +extern __inline__ char *__skb_pull(struct sk_buff *skb, unsigned int len) { + skb->len-=len; + return skb->data+=len; +} + +extern __inline__ unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) +{ if (len > skb->len) return NULL; - skb->data+=len; - skb->len-=len; - return skb->data; + return __skb_pull(skb,len); } extern __inline__ int skb_headroom(struct sk_buff *skb) @@ -472,11 +471,16 @@ skb->tail+=len; } +extern __inline__ void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + skb->len = len; + skb->tail = skb->data+len; +} + extern __inline__ void skb_trim(struct sk_buff *skb, unsigned int len) { if (skb->len > len) { - skb->len = len; - skb->tail = skb->data+len; + __skb_trim(skb, len); } } @@ -515,8 +519,15 @@ skb->sk = NULL; } +extern __inline__ void skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=skb_dequeue(list))!=NULL) + kfree_skb(skb,0); +} + extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); -extern unsigned int datagram_poll(struct socket *sock, poll_table *wait); +extern unsigned int datagram_poll(struct socket *sock, struct poll_table_struct *wait); extern int skb_copy_datagram(struct sk_buff *from, int offset, char *to,int size); extern int skb_copy_datagram_iovec(struct sk_buff *from, int offset, struct iovec *to,int size); extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); diff -u --recursive --new-file v2.1.67/linux/include/linux/socket.h linux/include/linux/socket.h --- v2.1.67/linux/include/linux/socket.h Mon Jun 16 16:36:00 1997 +++ linux/include/linux/socket.h Sun Nov 30 14:00:39 1997 @@ -12,8 +12,7 @@ * 1003.1g requires sa_family_t and that sa_data is char. */ -struct sockaddr -{ +struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ char sa_data[14]; /* 14 bytes of protocol address */ }; @@ -29,8 +28,7 @@ * belong in an obscure libc emulation or the bin. */ -struct msghdr -{ +struct msghdr { void * msg_name; /* Socket name */ int msg_namelen; /* Length of name */ struct iovec * msg_iov; /* Data blocks */ @@ -57,7 +55,8 @@ * Table 5-14 of POSIX 1003.1g */ -#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr(mhdr, cmsg) +#define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg)) +#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg)) #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) @@ -65,18 +64,19 @@ #define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len)) #define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len)) -#define CMSG_FIRSTHDR(msg) ((msg)->msg_controllen >= sizeof(struct cmsghdr) ? \ - (struct cmsghdr *)(msg)->msg_control : \ - (struct cmsghdr *)NULL) +#define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \ + (struct cmsghdr *)(ctl) : \ + (struct cmsghdr *)NULL) +#define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen) /* * This mess will go away with glibc */ #ifdef __KERNEL__ -#define KINLINE extern __inline__ +#define __KINLINE extern __inline__ #else -#define KINLINE static +#define __KINLINE static #endif @@ -84,20 +84,23 @@ * Get the next cmsg header */ -KINLINE struct cmsghdr * cmsg_nxthdr(struct msghdr *mhdr, - struct cmsghdr *cmsg) +__KINLINE struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, + struct cmsghdr *__cmsg) { - unsigned char * ptr; + unsigned char * __ptr; - if (cmsg->cmsg_len < sizeof(struct cmsghdr)) - { + if (__cmsg->cmsg_len < sizeof(struct cmsghdr)) return NULL; - } - ptr = ((unsigned char *) cmsg) + CMSG_ALIGN(cmsg->cmsg_len); - if (ptr >= (unsigned char *) mhdr->msg_control + mhdr->msg_controllen) + __ptr = ((unsigned char *) __cmsg) + CMSG_ALIGN(__cmsg->cmsg_len); + if (__ptr >= (unsigned char *) __ctl + __size) return NULL; - return (struct cmsghdr *) ptr; + return (struct cmsghdr *) __ptr; +} + +__KINLINE struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg) +{ + return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } /* "Socket"-level control message types: */ @@ -106,8 +109,7 @@ #define SCM_CREDENTIALS 0x02 /* rw: struct ucred */ #define SCM_CONNECT 0x03 /* rw: struct scm_connect */ -struct ucred -{ +struct ucred { __kernel_pid_t pid; __kernel_uid_t uid; __kernel_gid_t gid; @@ -144,6 +146,9 @@ #define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/ #define AF_SECURITY 14 /* Security callback pseudo AF */ #define pseudo_AF_KEY 15 /* PF_KEY key management API */ +#define AF_NETLINK 16 +#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */ +#define AF_PACKET 17 /* Packet family */ #define AF_MAX 32 /* For now.. */ /* Protocol families, same as address families. */ @@ -164,6 +169,9 @@ #define PF_NETBEUI AF_NETBEUI #define PF_SECURITY AF_SECURITY #define PF_KEY pseudo_AF_KEY +#define PF_NETLINK AF_NETLINK +#define PF_ROUTE AF_ROUTE +#define PF_PACKET AF_PACKET #define PF_MAX AF_MAX @@ -196,6 +204,9 @@ /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ #define SOL_IP 0 +/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */ +#define SOL_TCP 6 +#define SOL_UDP 17 #define SOL_IPV6 41 #define SOL_ICMPV6 58 #define SOL_RAW 255 @@ -206,8 +217,7 @@ #define SOL_ROSE 260 #define SOL_DECNET 261 #define SOL_X25 262 -#define SOL_TCP 6 -#define SOL_UDP 17 +#define SOL_PACKET 263 /* IPX options */ #define IPX_TYPE 1 @@ -216,24 +226,19 @@ #define TCP_NODELAY 1 #define TCP_MAXSEG 2 -/* The various priorities. */ -#define SOPRI_INTERACTIVE 0 -#define SOPRI_NORMAL 1 -#define SOPRI_BACKGROUND 2 - #ifdef __KERNEL__ extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); extern int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, int len); -extern unsigned int csum_partial_copy_fromiovecend(unsigned char *kdata, - struct iovec *iov, - int offset, - int len, int csum); +extern int csum_partial_copy_fromiovecend(unsigned char *kdata, + struct iovec *iov, + int offset, + int len, int *csump); extern int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode); extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen); extern int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr); -extern void put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); #endif #endif /* _LINUX_SOCKET_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/sockios.h linux/include/linux/sockios.h --- v2.1.67/linux/include/linux/sockios.h Thu Sep 4 17:07:31 1997 +++ linux/include/linux/sockios.h Sun Nov 30 14:00:39 1997 @@ -53,18 +53,19 @@ #define SIOCSIFSLAVE 0x8930 #define SIOCADDMULTI 0x8931 /* Multicast address lists */ #define SIOCDELMULTI 0x8932 -#define SIOGIFINDEX 0x8933 /* name -> if_index mapping */ -#define SIOGIFNAME 0x8934 /* if_index -> name mapping */ -#define SIOCGIFCOUNT 0x8935 /* get number of interfaces */ +#define SIOCGIFINDEX 0x8933 /* name -> if_index mapping */ +#define SIOGIFINDEX SIOCGIFINDEX /* misprint compatibility :-) */ +#define SIOCSIFPFLAGS 0x8934 /* set/get extended flags set */ +#define SIOCGIFPFLAGS 0x8935 #define SIOCDIFADDR 0x8936 /* delete PA address */ +#define SIOCSIFHWBROADCAST 0x8937 /* set hardware broadcast addr */ +#define SIOCGIFCOUNT 0x8938 /* get number of devices */ #define SIOCGIFBR 0x8940 /* Bridging support */ #define SIOCSIFBR 0x8941 /* Set bridging options */ /* ARP cache control calls. */ -#define OLD_SIOCDARP 0x8950 /* old delete ARP table entry */ -#define OLD_SIOCGARP 0x8951 /* old get ARP table entry */ -#define OLD_SIOCSARP 0x8952 /* old set ARP table entry */ + /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ #define SIOCDARP 0x8953 /* delete ARP table entry */ #define SIOCGARP 0x8954 /* get ARP table entry */ #define SIOCSARP 0x8955 /* set ARP table entry */ diff -u --recursive --new-file v2.1.67/linux/include/linux/sysctl.h linux/include/linux/sysctl.h --- v2.1.67/linux/include/linux/sysctl.h Sat Oct 25 02:44:18 1997 +++ linux/include/linux/sysctl.h Sun Nov 30 14:00:39 1997 @@ -150,14 +150,15 @@ NET_IPV4_RFC1812_FILTER, NET_IPV4_LOG_MARTIANS, NET_IPV4_SOURCE_ROUTE, - NET_IPV4_ADDRMASK_AGENT, - NET_IPV4_BOOTP_AGENT, + NET_IPV4_SEND_REDIRECTS, + NET_IPV4_AUTOCONFIG, NET_IPV4_BOOTP_RELAY, - NET_IPV4_FIB_MODEL, + NET_IPV4_PROXY_ARP, NET_IPV4_NO_PMTU_DISC, NET_IPV4_ACCEPT_REDIRECTS, NET_IPV4_SECURE_REDIRECTS, NET_IPV4_RFC1620_REDIRECTS, + NET_IPV4_RTCACHE_FLUSH, NET_IPV4_TCP_SYN_RETRIES, NET_IPV4_IPFRAG_HIGH_THRESH, NET_IPV4_IPFRAG_LOW_THRESH, @@ -176,6 +177,14 @@ NET_TCP_STDURG, NET_TCP_SYN_TAILDROP, NET_TCP_MAX_SYN_BACKLOG, + NET_IPV4_LOCAL_PORT_RANGE, + NET_IPV4_ICMP_ECHO_IGNORE_ALL, + NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, + NET_IPV4_ICMP_SOURCEQUENCH_RATE, + NET_IPV4_ICMP_DESTUNREACH_RATE, + NET_IPV4_ICMP_TIMEEXCEED_RATE, + NET_IPV4_ICMP_PARAMPROB_RATE, + NET_IPV4_ICMP_ECHOREPLY_RATE }; @@ -198,6 +207,8 @@ NET_IPV6_RTR_SOLICITS, NET_IPV6_RTR_SOLICIT_INTERVAL, NET_IPV6_RTR_SOLICIT_DELAY, + + NET_IPV6_ICMPV6_TIME, }; /* /proc/sys/net/ipx */ diff -u --recursive --new-file v2.1.67/linux/include/linux/tcp.h linux/include/linux/tcp.h --- v2.1.67/linux/include/linux/tcp.h Mon Jul 7 16:02:45 1997 +++ linux/include/linux/tcp.h Sun Nov 30 14:11:18 1997 @@ -68,4 +68,18 @@ TCP_CLOSING /* now a valid state */ }; +enum { + TCPF_ESTABLISHED = (1 << 1), + TCPF_SYN_SENT = (1 << 2), + TCPF_SYN_RECV = (1 << 3), + TCPF_FIN_WAIT1 = (1 << 4), + TCPF_FIN_WAIT2 = (1 << 5), + TCPF_TIME_WAIT = (1 << 6), + TCPF_CLOSE = (1 << 7), + TCPF_CLOSE_WAIT = (1 << 8), + TCPF_LAST_ACK = (1 << 9), + TCPF_LISTEN = (1 << 10), + TCPF_CLOSING = (1 << 11) +}; + #endif /* _LINUX_TCP_H */ diff -u --recursive --new-file v2.1.67/linux/include/linux/time.h linux/include/linux/time.h --- v2.1.67/linux/include/linux/time.h Fri Dec 27 02:04:48 1996 +++ linux/include/linux/time.h Sun Nov 30 10:59:03 1997 @@ -1,6 +1,8 @@ #ifndef _LINUX_TIME_H #define _LINUX_TIME_H +#include + #ifndef _STRUCT_TIMESPEC #define _STRUCT_TIMESPEC struct timespec { @@ -9,6 +11,30 @@ }; #endif /* _STRUCT_TIMESPEC */ +/* + * change timeval to jiffies, trying to avoid the + * most obvious overflows.. + */ +static inline unsigned long +timespec_to_jiffies(struct timespec *value) +{ + unsigned long sec = value->tv_sec; + long nsec = value->tv_nsec; + + if (sec > ((long)(~0UL >> 1) / HZ)) + return ~0UL >> 1; + nsec += 1000000000L / HZ - 1; + nsec /= 1000000000L / HZ; + return HZ * sec + nsec; +} + +static inline void +jiffies_to_timespec(unsigned long jiffies, struct timespec *value) +{ + value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ); + value->tv_sec = jiffies / HZ; +} + struct timeval { int tv_sec; /* seconds */ int tv_usec; /* microseconds */ diff -u --recursive --new-file v2.1.67/linux/include/net/dst.h linux/include/net/dst.h --- v2.1.67/linux/include/net/dst.h Mon Jul 7 16:04:02 1997 +++ linux/include/net/dst.h Sun Nov 30 14:12:37 1997 @@ -40,6 +40,9 @@ unsigned window; unsigned pmtu; unsigned rtt; + unsigned long rate_last; /* rate limiting for ICMP */ + unsigned long rate_tokens; + int error; struct neighbour *neighbour; @@ -49,7 +52,7 @@ int (*output)(struct sk_buff*); struct dst_ops *ops; - + char info[0]; }; @@ -57,12 +60,14 @@ struct dst_ops { unsigned short family; - struct dst_entry * (*check)(struct dst_entry *, u32 cookie); + struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); struct dst_entry * (*reroute)(struct dst_entry *, struct sk_buff *); void (*destroy)(struct dst_entry *); }; +#ifdef __KERNEL__ + extern struct dst_entry * dst_garbage_list; extern atomic_t dst_total; @@ -122,5 +127,6 @@ } __dst_free(dst); } +#endif #endif /* _NET_DST_H */ diff -u --recursive --new-file v2.1.67/linux/include/net/gc.h linux/include/net/gc.h --- v2.1.67/linux/include/net/gc.h Sun Mar 24 22:58:22 1996 +++ linux/include/net/gc.h Wed Dec 31 16:00:00 1969 @@ -1,46 +0,0 @@ -/* - * Interface routines assumed by gc() - * - * Copyright (C) Barak A. Pearlmutter. - * Released under the GPL version 2 or later. - * - */ - -typedef struct object *pobj; /* pointer to a guy of the type we gc */ - -/* - * How to mark and unmark objects - */ - -extern void gc_mark(pobj); -extern void gc_unmark(pobj); -extern int gc_marked(pobj); - -/* - * How to count and access an object's children - */ - -extern int n_children(pobj); /* how many children */ -extern pobj child_n(pobj, int); /* child i, numbered 0..n-1 */ - -/* - * How to access the root set - */ - -extern int root_size(void); /* number of things in root set */ -extern pobj root_elt(int); /* element i of root set, numbered 0..n-1 */ - -/* - * How to access the free list - */ - -extern void clear_freelist(void); -extern void add_to_free_list(pobj); - -/* - * How to iterate through all objects in memory - */ - -extern int N_OBJS; -extern pobj obj_number(int); - diff -u --recursive --new-file v2.1.67/linux/include/net/icmp.h linux/include/net/icmp.h --- v2.1.67/linux/include/net/icmp.h Mon Jul 7 16:04:02 1997 +++ linux/include/net/icmp.h Sun Nov 30 14:12:37 1997 @@ -33,6 +33,9 @@ extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern void icmp_init(struct net_proto_family *ops); +/* Move into dst.h ? */ +extern int xrlim_allow(struct dst_entry *dst, int timeout); + /* CONFIG_IP_TRANSPARENT_PROXY */ extern int icmp_chkaddr(struct sk_buff *skb); diff -u --recursive --new-file v2.1.67/linux/include/net/inet_common.h linux/include/net/inet_common.h --- v2.1.67/linux/include/net/inet_common.h Sun Sep 7 13:10:43 1997 +++ linux/include/net/inet_common.h Sun Nov 30 14:00:39 1997 @@ -28,7 +28,7 @@ struct msghdr *msg, int size, struct scm_cookie *scm); extern int inet_shutdown(struct socket *sock, int how); -extern unsigned int inet_poll(struct socket *sock, poll_table *wait); +extern unsigned int inet_poll(struct socket *sock, struct poll_table_struct *wait); extern int inet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen); diff -u --recursive --new-file v2.1.67/linux/include/net/ip.h linux/include/net/ip.h --- v2.1.67/linux/include/net/ip.h Mon Jul 7 16:04:03 1997 +++ linux/include/net/ip.h Sun Nov 30 14:12:38 1997 @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #ifndef _SNMP_H @@ -38,21 +40,30 @@ struct ip_options opt; /* Compiled IP options */ u16 redirport; /* Redirect port */ unsigned char flags; - char vif; #define IPSKB_MASQUERADED 1 #define IPSKB_TRANSLATED 2 -#define IPSKB_TUNNELED 4 +#define IPSKB_FORWARDED 4 }; struct ipcm_cookie { u32 addr; + int oif; struct ip_options *opt; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) +struct ip_ra_chain +{ + struct ip_ra_chain *next; + struct sock *sk; + void (*destructor)(struct sock *); +}; + +extern struct ip_ra_chain *ip_ra_chain; + /* IP flags. */ #define IP_CE 0x8000 /* Flag: "Congestion" */ #define IP_DF 0x4000 /* Flag: "Don't Fragment" */ @@ -134,20 +145,23 @@ int secure_redirects; int rfc1620_redirects; int rfc1812_filter; - int addrmask_agent; + int send_redirects; int log_martians; int source_route; int multicast_route; - int bootp_agent; + int proxy_arp; int bootp_relay; - int fib_model; + int autoconfig; int no_pmtu_disc; }; extern struct ipv4_config ipv4_config; +extern int sysctl_local_port_range[2]; #define IS_ROUTER (ip_statistics.IpForwarding == 1) +extern int ip_call_ra_chain(struct sk_buff *skb); + /* * Functions provided by ip_fragment.o */ @@ -165,7 +179,7 @@ * Functions provided by ip_options.c */ -extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 daddr, u32 saddr, int is_frag); +extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 daddr, struct rtable *rt, int is_frag); extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); extern void ip_options_fragment(struct sk_buff *skb); extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); @@ -179,9 +193,12 @@ */ extern void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb); -extern int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **devp); +extern int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc); extern int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen); extern int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen); +extern int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); extern int ipv4_backlog_rcv(struct sock *sk, struct sk_buff *skb); + + #endif /* _IP_H */ diff -u --recursive --new-file v2.1.67/linux/include/net/ip_alias.h linux/include/net/ip_alias.h --- v2.1.67/linux/include/net/ip_alias.h Mon Dec 25 20:03:01 1995 +++ linux/include/net/ip_alias.h Wed Dec 31 16:00:00 1969 @@ -1,23 +0,0 @@ -/* - * IP_ALIAS (AF_INET) aliasing definitions. - * - * - * Version: @(#)ip_alias.h 0.43 12/20/95 - * - * Author: Juan Jose Ciarlante, - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#ifndef _IP_ALIAS_H -#define _IP_ALIAS_H - -extern int ip_alias_init(void); -extern int ip_alias_done(void); - -#endif /* _IP_ALIAS_H */ diff -u --recursive --new-file v2.1.67/linux/include/net/ip_fib.h linux/include/net/ip_fib.h --- v2.1.67/linux/include/net/ip_fib.h Thu Dec 12 06:54:21 1996 +++ linux/include/net/ip_fib.h Sun Nov 30 14:12:48 1997 @@ -16,118 +16,190 @@ #ifndef _NET_IP_FIB_H #define _NET_IP_FIB_H +#include -struct fib_node +struct fib_nh { - struct fib_node *fib_next; - u32 fib_key; - struct fib_info *fib_info; - short fib_metric; - u8 fib_tos; - u8 fib_flag; + struct device *nh_dev; + unsigned nh_flags; + unsigned char nh_scope; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int nh_weight; + int nh_power; +#endif + int nh_oif; + u32 nh_gw; }; -#define FIBFLG_DOWN 1 /* Ignore this node */ -#define FIBFLG_THROW 2 /* Class lookup failed */ -#define FIBFLG_REJECT 4 /* Route lookup failed */ - -#define MAGIC_METRIC 0x7FFF - /* * This structure contains data shared by many of routes. - */ + */ struct fib_info { struct fib_info *fib_next; struct fib_info *fib_prev; - u32 fib_gateway; - struct device *fib_dev; int fib_refcnt; - unsigned long fib_window; unsigned fib_flags; - unsigned short fib_mtu; - unsigned short fib_irtt; + int fib_protocol; + u32 fib_prefsrc; + unsigned fib_mtu; + unsigned fib_rtt; + unsigned fib_window; + int fib_nhs; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int fib_power; +#endif + struct fib_nh fib_nh[0]; +#define fib_dev fib_nh[0].nh_dev }; -struct fib_zone + +#ifdef CONFIG_IP_MULTIPLE_TABLES +struct fib_rule; +#endif + +struct fib_result { - struct fib_zone *fz_next; - struct fib_node **fz_hash; - int fz_nent; - int fz_divisor; - u32 fz_hashmask; - int fz_logmask; - u32 fz_mask; + u32 *prefix; + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + struct fib_info *fi; +#ifdef CONFIG_IP_MULTIPLE_TABLES + struct fib_rule *r; +#endif }; -struct fib_class +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +#define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) +#define FIB_RES_RESET(res) ((res).nh_sel = 0) + +#else /* CONFIG_IP_ROUTE_MULTIPATH */ + +#define FIB_RES_NH(res) ((res).fi->fib_nh[0]) +#define FIB_RES_RESET(res) + +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ + +#define FIB_RES_PREFSRC(res) ((res).fi->fib_prefsrc ? : __fib_res_prefsrc(&res)) +#define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw) +#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) +#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) + +struct fib_table { - unsigned char cl_id; - unsigned char cl_auto; - struct fib_zone *fib_zones[33]; - struct fib_zone *fib_zone_list; - int cl_users; + unsigned char tb_id; + unsigned tb_stamp; + int (*tb_lookup)(struct fib_table *tb, const struct rt_key *key, struct fib_result *res); + int (*tb_insert)(struct fib_table *table, struct rtmsg *r, + struct kern_rta *rta, struct nlmsghdr *n, + struct netlink_skb_parms *req); + int (*tb_delete)(struct fib_table *table, struct rtmsg *r, + struct kern_rta *rta, struct nlmsghdr *n, + struct netlink_skb_parms *req); + int (*tb_dump)(struct fib_table *table, struct sk_buff *skb, + struct netlink_callback *cb); + int (*tb_flush)(struct fib_table *table); + int (*tb_get_info)(struct fib_table *table, char *buf, + int first, int count); + + unsigned char tb_data[0]; }; -struct fib_rule +#ifndef CONFIG_IP_MULTIPLE_TABLES + +extern struct fib_table *local_table; +extern struct fib_table *main_table; + +extern __inline__ struct fib_table *fib_get_table(int id) { - struct fib_rule *cl_next; - struct fib_class *cl_class; - u32 cl_src; - u32 cl_srcmask; - u32 cl_dst; - u32 cl_dstmask; - u32 cl_srcmap; - u8 cl_action; - u8 cl_flags; - u8 cl_tos; - u8 cl_preference; - struct device *cl_dev; -}; + if (id != RT_TABLE_LOCAL) + return main_table; + return local_table; +} -struct fib_result +extern __inline__ struct fib_table *fib_new_table(int id) { - struct fib_node *f; - struct fib_rule *fr; - int fm; -}; + return fib_get_table(id); +} -void ip_fib_init(void); -unsigned ip_fib_chk_addr(u32 addr); -int ip_fib_chk_default_gw(u32 addr, struct device*); - -int fib_lookup(struct fib_result *, u32 daddr, u32 src, u8 tos, struct device *devin, - struct device *devout); - -static __inline__ struct fib_info * -fib_lookup_info(u32 dst, u32 src, u8 tos, struct device *devin, - struct device *devout) -{ - struct fib_result res; - if (fib_lookup(&res, dst, src, tos, devin, devout) < 0) - return NULL; - return res.f->fib_info; +extern __inline__ int fib_lookup(const struct rt_key *key, struct fib_result *res) +{ + if (local_table->tb_lookup(local_table, key, res)) + return main_table->tb_lookup(main_table, key, res); + return 0; } -static __inline__ struct device * get_gw_dev(u32 gw, struct device *dev) +#else /* CONFIG_IP_MULTIPLE_TABLES */ +#define local_table (fib_tables[RT_TABLE_LOCAL]) +#define main_table (fib_tables[RT_TABLE_MAIN]) + +extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; +extern int fib_lookup(const struct rt_key *key, struct fib_result *res); +extern struct fib_table *__fib_new_table(int id); + +extern __inline__ struct fib_table *fib_get_table(int id) { - struct fib_info * fi; + if (id == 0) + id = RT_TABLE_MAIN; - fi = fib_lookup_info(gw, 0, 1, &loopback_dev, dev); - if (fi) - return fi->fib_dev; - return NULL; + return fib_tables[id]; } -extern int ip_rt_event(int event, struct device *dev); -extern int ip_rt_ioctl(unsigned int cmd, void *arg); -extern void ip_rt_change_broadcast(struct device *, u32); -extern void ip_rt_change_dstaddr(struct device *, u32); -extern void ip_rt_change_netmask(struct device *, u32); -extern void ip_rt_multicast_event(struct device *dev); +extern __inline__ struct fib_table *fib_new_table(int id) +{ + if (id == 0) + id = RT_TABLE_MAIN; + + return fib_tables[id] ? : __fib_new_table(id); +} +#endif /* CONFIG_IP_MULTIPLE_TABLES */ -extern struct device * ip_dev_find_tunnel(u32 daddr, u32 saddr); +/* Exported by fib_frontend.c */ +extern void ip_fib_init(void); +extern void fib_flush(void); +extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb); +extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, + struct device *dev, u32 *spec_dst); +extern void fib_select_multipath(const struct rt_key *key, struct fib_result *res); + +/* Exported by fib_semantics.c */ +extern int ip_fib_check_default(u32 gw, struct device *dev); +extern void fib_release_info(struct fib_info *); +extern int fib_semantic_match(int type, struct fib_info *, + const struct rt_key *, struct fib_result*); +extern struct fib_info *fib_create_info(const struct rtmsg *r, struct kern_rta *rta, + const struct nlmsghdr *, int *err); +extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, struct kern_rta *rta, struct fib_info *fi); +extern int fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event, + u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, + struct fib_info *fi); +extern int fib_sync_down(u32 local, struct device *dev); +extern int fib_sync_up(struct device *dev); +extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, + struct kern_rta *rta, struct rtentry *r); +extern void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer); +extern u32 __fib_res_prefsrc(struct fib_result *res); + +/* Exported by fib_hash.c */ +extern struct fib_table *fib_hash_init(int id); + +#ifdef CONFIG_IP_MULTIPLE_TABLES +/* Exported by fib_rules.c */ + +extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb); +extern u32 fib_rules_map_destination(u32 daddr, struct fib_result *res); +extern u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags); +extern void fib_rules_init(void); +#endif #endif _NET_FIB_H diff -u --recursive --new-file v2.1.67/linux/include/net/ip_masq.h linux/include/net/ip_masq.h --- v2.1.67/linux/include/net/ip_masq.h Mon Jul 7 16:04:18 1997 +++ linux/include/net/ip_masq.h Sun Nov 30 14:12:51 1997 @@ -88,14 +88,14 @@ /* * functions called from ip layer */ -extern int ip_fw_masquerade(struct sk_buff **, struct device *); -extern int ip_fw_masq_icmp(struct sk_buff **, struct device *); -extern int ip_fw_demasquerade(struct sk_buff **, struct device *); +extern int ip_fw_masquerade(struct sk_buff **, __u32 maddr); +extern int ip_fw_masq_icmp(struct sk_buff **); +extern int ip_fw_demasquerade(struct sk_buff **); /* * ip_masq obj creation/deletion functions. */ -extern struct ip_masq *ip_masq_new(struct device *dev, int proto, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned flags); +extern struct ip_masq *ip_masq_new(__u32 maddr, int proto, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned flags); extern void ip_masq_set_expire(struct ip_masq *ms, unsigned long tout); @@ -116,9 +116,9 @@ int (*masq_done_1) /* ip_masq fin. */ (struct ip_masq_app *, struct ip_masq *); int (*pkt_out) /* output (masquerading) hook */ - (struct ip_masq_app *, struct ip_masq *, struct sk_buff **, struct device *); + (struct ip_masq_app *, struct ip_masq *, struct sk_buff **, __u32); int (*pkt_in) /* input (demasq) hook */ - (struct ip_masq_app *, struct ip_masq *, struct sk_buff **, struct device *); + (struct ip_masq_app *, struct ip_masq *, struct sk_buff **); }; /* @@ -147,8 +147,8 @@ * output and input app. masquerading hooks. * */ -extern int ip_masq_app_pkt_out(struct ip_masq *, struct sk_buff **skb_p, struct device *dev); -extern int ip_masq_app_pkt_in(struct ip_masq *, struct sk_buff **skb_p, struct device *dev); +extern int ip_masq_app_pkt_out(struct ip_masq *, struct sk_buff **skb_p, __u32 maddr); +extern int ip_masq_app_pkt_in(struct ip_masq *, struct sk_buff **skb_p); /* * service routine(s). diff -u --recursive --new-file v2.1.67/linux/include/net/ipconfig.h linux/include/net/ipconfig.h --- v2.1.67/linux/include/net/ipconfig.h Wed Dec 31 16:00:00 1969 +++ linux/include/net/ipconfig.h Sun Nov 30 14:00:39 1997 @@ -0,0 +1,19 @@ +/* + * $Id: ipconfig.h,v 1.2 1997/10/17 12:41:16 mj Exp $ + * + * Copyright (C) 1997 Martin Mares + * + * Automatic IP Layer Configuration + */ + +extern __u32 root_server_addr; +extern u8 root_server_path[]; +extern u32 ic_myaddr; +extern u32 ic_servaddr; +extern u32 ic_gateway; +extern u32 ic_netmask; +extern int ic_bootp_flag; +extern int ic_rarp_flag; +extern int ic_enable; +extern int ic_host_name_set; +extern int ic_set_manually; diff -u --recursive --new-file v2.1.67/linux/include/net/ipip.h linux/include/net/ipip.h --- v2.1.67/linux/include/net/ipip.h Thu Dec 12 06:54:21 1996 +++ linux/include/net/ipip.h Sun Nov 30 14:00:39 1997 @@ -1,8 +1,33 @@ #ifndef __NET_IPIP_H #define __NET_IPIP_H 1 -extern void ipip_err(struct sk_buff *skb, unsigned char*); -extern int ipip_rcv(struct sk_buff *skb, unsigned short len); - +#include + +/* Keep error state on tunnel for 30 sec */ +#define IPTUNNEL_ERR_TIMEO (30*HZ) + +struct ip_tunnel +{ + struct ip_tunnel *next; + struct device *dev; + struct net_device_stats stat; + + int recursion; /* Depth of hard_start_xmit recursion */ + int err_count; /* Number of arrived ICMP errors */ + unsigned long err_time; /* Time when the last ICMP error arrived */ + + /* These four fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ + __u32 o_seqno; /* The last output seqno */ + int hlen; /* Precalculated GRE header length */ + int mlink; + + struct ip_tunnel_parm parms; +}; + +extern int ipip_init(void); +extern int ipgre_init(void); +extern int sit_init(void); +extern void sit_cleanup(void); #endif diff -u --recursive --new-file v2.1.67/linux/include/net/netlink.h linux/include/net/netlink.h --- v2.1.67/linux/include/net/netlink.h Thu Sep 4 17:07:31 1997 +++ linux/include/net/netlink.h Wed Dec 31 16:00:00 1969 @@ -1,65 +0,0 @@ -#ifndef __NET_NETLINK_H -#define __NET_NETLINK_H - -#define NET_MAJOR 36 /* Major 18 is reserved for networking */ -#define MAX_LINKS 32 -#define MAX_QBYTES 32768 /* Maximum bytes in the queue */ - -#include - -extern int netlink_attach(int unit, int (*function)(int,struct sk_buff *skb)); -extern int netlink_donothing(int, struct sk_buff *skb); -extern void netlink_detach(int unit); -extern int netlink_post(int unit, struct sk_buff *skb); -extern int init_netlink(void); - -/* - * skb should fit one page. This choice is good for headerless malloc. - */ -#define NLMSG_GOODSIZE (PAGE_SIZE - ((sizeof(struct sk_buff)+0xF)&~0xF)-32) - -#define NLMSG_RECOVERY_TIMEO (HZ/2) /* If deleivery was failed, - retry after */ - -struct nlmsg_ctl -{ - struct timer_list nlmsg_timer; - struct sk_buff *nlmsg_skb; /* Partially built skb */ - int nlmsg_unit; - int nlmsg_delay; /* Time to delay skb send*/ - int nlmsg_maxsize; /* Maximal message size */ - int nlmsg_force; /* post immediately */ - unsigned long nlmsg_overrun_start; /* seqno starting lossage*/ - unsigned long nlmsg_overrun_end; /* the last lost message */ - char nlmsg_overrun; /* overrun flag */ -}; - -void* nlmsg_send(struct nlmsg_ctl*, unsigned long type, int len, - unsigned long seq, unsigned long pid); -void nlmsg_transmit(struct nlmsg_ctl*); - -extern __inline__ void nlmsg_ack(struct nlmsg_ctl* ctl, unsigned long seq, - unsigned long pid, int err) -{ - int *r; - - start_bh_atomic(); - r = nlmsg_send(ctl, NLMSG_ACK, sizeof(r), seq, pid); - if (r) - *r = err; - end_bh_atomic(); -} - - -#define NETLINK_ROUTE 0 /* Routing/device hook */ -#define NETLINK_SKIP 1 /* Reserved for ENskip */ -#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ -#define NETLINK_FIREWALL 3 /* Firewalling hook */ -#define NETLINK_FREE 4 /* PSI devices - 4 to 7 (obsolete) */ -#define NETLINK_ARPD 8 /* ARP daemon for big switched networks */ -#define NETLINK_IPSEC 10 /* IPSEC (JI) */ -#define NETLINK_ROUTE6 11 /* Af_inet6 route communication channel */ -#define NETLINK_IP6_FW 13 /* IPv6 firewall trap outs */ -#define NETLINK_DNRT 14 /* DECnet routing messages */ -#define NETLINK_TAPBASE 16 /* 16->31 are the ethertap devices */ -#endif diff -u --recursive --new-file v2.1.67/linux/include/net/pkt_sched.h linux/include/net/pkt_sched.h --- v2.1.67/linux/include/net/pkt_sched.h Wed Dec 31 16:00:00 1969 +++ linux/include/net/pkt_sched.h Sun Nov 30 14:00:39 1997 @@ -0,0 +1,164 @@ +#ifndef __NET_PKT_SCHED_H +#define __NET_PKT_SCHED_H + +#include + +struct Qdisc_ops +{ + struct Qdisc_ops *next; + char id[IFNAMSIZ]; + int refcnt; + int priv_size; + int (*enqueue)(struct sk_buff *skb, struct Qdisc *); + struct sk_buff * (*dequeue)(struct Qdisc *); + void (*reset)(struct Qdisc *); + void (*destroy)(struct Qdisc *); + int (*init)(struct Qdisc *, void *arg); + int (*control)(struct Qdisc *, void *); +}; + +struct Qdisc_head +{ + struct Qdisc_head *forw; +}; + +extern struct Qdisc_head qdisc_head; + +struct Qdisc +{ + struct Qdisc_head h; + int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); + struct sk_buff * (*dequeue)(struct Qdisc *dev); + struct Qdisc_ops *ops; + int handle; + struct Qdisc *parent; + struct sk_buff_head q; + struct device *dev; + struct sk_buff_head failure_q; + unsigned long dropped; + unsigned long tx_last; + unsigned long tx_timeo; + + char data[0]; +}; + + +/* Yes, it is slow for [34]86, but we have no choice. + 10 msec resolution is appropriate only for bandwidth < 32Kbit/sec. + + RULE: + Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth + + Normal IP packet size ~ 512byte, hence: + + 0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for + 10Mbit ethernet. + + 10msec resolution -> <50Kbit/sec. + + The result: [34]86 is not good choice for QoS router :-( + */ + + +typedef struct timeval psched_time_t; + +/* On 64bit architecures it would be clever to define: +typedef u64 psched_time_t; + and make all this boring arithmetics directly + */ + +#ifndef SCHEDULE_ONLY_LOW_BANDWIDTH +#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) +#else +#define PSCHED_GET_TIME(stamp) ((stamp) = xtime) +#endif + +#define PSCHED_TDIFF(tv1, tv2) \ +({ \ + int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ + int __delta = (tv1).tv_usec - (tv2).tv_usec; \ + if (__delta_sec) { \ + switch (__delta_sec) { \ + default: \ + __delta = 0; \ + case 2: \ + __delta += 1000000; \ + case 1: \ + __delta += 1000000; \ + } \ + } \ + __delta; \ +}) + +#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ +({ \ + int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ + int __delta = (tv1).tv_usec - (tv2).tv_usec; \ + switch (__delta_sec) { \ + default: \ + __delta = (bound); guard; break; \ + case 2: \ + __delta += 1000000; \ + case 1: \ + __delta += 1000000; \ + case 0: ; \ + } \ + __delta; \ +}) + +#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) + +#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ + (tv1).tv_sec < (tv2).tv_sec) || \ + (tv1).tv_sec < (tv2).tv_sec) + +#define PSCHED_TADD2(tv, delta, tv_res) \ +({ \ + int __delta = (tv).tv_usec + (delta); \ + (tv_res).tv_sec = (tv).tv_sec; \ + if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \ + (tv_res).tv_sec = __delta; \ +}) + +#define PSCHED_TADD(tv, delta) \ +({ \ + (tv).tv_usec += (delta); \ + if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \ + (tv).tv_usec -= 1000000; } \ +}) + +/* Set/check that undertime is in the "past perfect"; + it depends on concrete representation of system time + */ + +#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) +#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) + + +extern struct Qdisc noop_qdisc; + +int register_qdisc(struct Qdisc_ops *qops); +int unregister_qdisc(struct Qdisc_ops *qops); +void dev_init_scheduler(struct device *dev); +void dev_shutdown(struct device *dev); +void dev_activate(struct device *dev); +void dev_deactivate(struct device *dev); +void qdisc_reset(struct Qdisc *qdisc); +void qdisc_destroy(struct Qdisc *qdisc); +int pktsched_init(void); + +void qdisc_run_queues(void); +int qdisc_restart(struct device *dev); + +extern __inline__ void qdisc_wakeup(struct device *dev) +{ + if (!dev->tbusy) { + struct Qdisc *q = dev->qdisc; + if (qdisc_restart(dev) && q->h.forw == NULL) { + q->h.forw = qdisc_head.forw; + qdisc_head.forw = &q->h; + } + } +} + +#endif diff -u --recursive --new-file v2.1.67/linux/include/net/protocol.h linux/include/net/protocol.h --- v2.1.67/linux/include/net/protocol.h Mon Jul 7 16:02:45 1997 +++ linux/include/net/protocol.h Sun Nov 30 14:11:18 1997 @@ -37,7 +37,7 @@ struct inet_protocol { int (*handler)(struct sk_buff *skb, unsigned short len); - void (*err_handler)(struct sk_buff *skb, unsigned char *dp); + void (*err_handler)(struct sk_buff *skb, unsigned char *dp, int len); struct inet_protocol *next; unsigned char protocol; unsigned char copy:1; diff -u --recursive --new-file v2.1.67/linux/include/net/raw.h linux/include/net/raw.h --- v2.1.67/linux/include/net/raw.h Tue Mar 4 10:25:26 1997 +++ linux/include/net/raw.h Sun Nov 30 14:00:39 1997 @@ -32,6 +32,7 @@ extern struct sock *raw_v4_lookup(struct sock *sk, unsigned short num, - unsigned long raddr, unsigned long laddr); + unsigned long raddr, unsigned long laddr, + int dif); #endif /* _RAW_H */ diff -u --recursive --new-file v2.1.67/linux/include/net/rose.h linux/include/net/rose.h --- v2.1.67/linux/include/net/rose.h Mon Jul 7 08:19:59 1997 +++ linux/include/net/rose.h Sun Nov 30 14:00:39 1997 @@ -129,10 +129,9 @@ unsigned char cause, diagnostic; unsigned short vs, vr, va, vl; unsigned long t1, t2, t3, hb, idle; - unsigned short fraglen; + struct sk_buff_head ack_queue; struct timer_list timer; struct timer_list idletimer; - struct sk_buff_head frag_queue; struct sock *sk; /* Backlink to socket */ } rose_cb; @@ -181,10 +180,8 @@ extern void rose_transmit_link(struct sk_buff *, struct rose_neigh *); /* rose_out.c */ -extern void rose_output(struct sock *, struct sk_buff *); extern void rose_kick(struct sock *); extern void rose_enquiry_response(struct sock *); -extern void rose_check_iframes_acked(struct sock *, unsigned short); /* rose_route.c */ extern void rose_rt_device_down(struct device *); @@ -204,6 +201,8 @@ /* rose_subr.c */ extern void rose_clear_queues(struct sock *); +extern void rose_frames_acked(struct sock *, unsigned short); +extern void rose_requeue_frames(struct sock *); extern int rose_validate_nr(struct sock *, unsigned short); extern void rose_write_internal(struct sock *, int); extern int rose_decode(struct sk_buff *, int *, int *, int *, int *, int *); diff -u --recursive --new-file v2.1.67/linux/include/net/route.h linux/include/net/route.h --- v2.1.67/linux/include/net/route.h Mon Jul 7 16:04:03 1997 +++ linux/include/net/route.h Sun Nov 30 14:12:38 1997 @@ -22,9 +22,10 @@ #ifndef _ROUTE_H #define _ROUTE_H -#include +#include #include - +#include +#include #define RT_HASH_DIVISOR 256 #define RT_CACHE_MAX_SIZE 256 @@ -37,12 +38,12 @@ /* * Cache invalidations can be delayed by: */ -#define RT_FLUSH_DELAY (2*HZ) +#define RT_FLUSH_DELAY (5*HZ) #define RT_REDIRECT_NUMBER 9 #define RT_REDIRECT_LOAD (HZ/50) /* 20 msec */ #define RT_REDIRECT_SILENCE (RT_REDIRECT_LOAD<<(RT_REDIRECT_NUMBER+1)) - /* 20sec */ +/* 20sec */ #define RT_ERROR_LOAD (1*HZ) @@ -55,7 +56,17 @@ #include -struct rtable +struct rt_key +{ + __u32 dst; + __u32 src; + int iif; + int oif; + __u8 tos; + __u8 scope; +}; + +struct rtable { union { @@ -64,92 +75,76 @@ } u; unsigned rt_flags; + unsigned rt_type; - u32 rt_dst; /* Path destination */ - u32 rt_src; /* Path source */ - struct device *rt_src_dev; /* Path source device */ + __u32 rt_dst; /* Path destination */ + __u32 rt_src; /* Path source */ + int rt_iif; /* Info on neighbour */ - u32 rt_gateway; + __u32 rt_gateway; /* Cache lookup keys */ - struct - { - u32 dst; - u32 src; - struct device *src_dev; - struct device *dst_dev; - u8 tos; - } key; + struct rt_key key; /* Miscellaneous cached information */ - u32 rt_spec_dst; /* RFC1122 specific destination */ - u32 rt_src_map; - u32 rt_dst_map; + __u32 rt_spec_dst; /* RFC1122 specific destination */ + +#ifdef CONFIG_IP_ROUTE_NAT + __u32 rt_src_map; + __u32 rt_dst_map; +#endif /* ICMP statistics */ unsigned long last_error; unsigned long errors; }; - -#define RTF_IFBRD (RTF_UP|RTF_MAGIC|RTF_LOCAL|RTF_BROADCAST) -#define RTF_IFLOCAL (RTF_UP|RTF_MAGIC|RTF_LOCAL|RTF_INTERFACE) -#define RTF_IFPREFIX (RTF_UP|RTF_MAGIC|RTF_INTERFACE) - -/* - * Flags not visible at user level. - */ -#define RTF_INTERNAL 0xFFFF8000 /* to get RTF_MAGIC as well... */ - -/* - * Flags saved in FIB. - */ -#define RTF_FIB (RTF_UP|RTF_GATEWAY|RTF_REJECT|RTF_THROW|RTF_STATIC|\ - RTF_XRESOLVE|RTF_NOPMTUDISC|RTF_NOFORWARD|RTF_INTERNAL) - +#ifdef __KERNEL__ extern void ip_rt_init(void); extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw, u32 src, u8 tos, struct device *dev); extern void ip_rt_check_expire(void); extern void ip_rt_advice(struct rtable **rp, int advice); extern void rt_cache_flush(int how); -extern int ip_route_output(struct rtable **, u32 dst, u32 src, u8 tos, struct device *devout); -extern int ip_route_output_dev(struct rtable **, u32 dst, u32 src, u8 tos, int); +extern int ip_route_output(struct rtable **, u32 dst, u32 src, u8 tos, int oif); extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct device *devin); extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern void ip_rt_send_redirect(struct sk_buff *skb); -static __inline__ void ip_rt_put(struct rtable * rt) +extern unsigned inet_addr_type(u32 addr); +extern void ip_rt_multicast_event(struct in_device *); +extern int ip_rt_ioctl(unsigned int cmd, void *arg); +extern void ip_rt_get_source(u8 *src, struct rtable *rt); + + +extern __inline__ void ip_rt_put(struct rtable * rt) { if (rt) dst_release(&rt->u.dst); } -static __inline__ char rt_tos2priority(u8 tos) +extern __u8 ip_tos2prio[16]; + +extern __inline__ char rt_tos2priority(u8 tos) { - if (tos & IPTOS_LOWDELAY) - return SOPRI_INTERACTIVE; - if (tos & (IPTOS_THROUGHPUT|IPTOS_MINCOST)) - return SOPRI_BACKGROUND; - return SOPRI_NORMAL; + return ip_tos2prio[IPTOS_TOS(tos)>>1]; } - -static __inline__ int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos) +extern __inline__ int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif) { int err; - err = ip_route_output(rp, dst, src, tos, NULL); + err = ip_route_output(rp, dst, src, tos, oif); if (err || (dst && src)) return err; dst = (*rp)->rt_dst; src = (*rp)->rt_src; ip_rt_put(*rp); *rp = NULL; - return ip_route_output(rp, dst, src, tos, NULL); + return ip_route_output(rp, dst, src, tos, oif); } -static __inline__ void ip_ll_header(struct sk_buff *skb) +extern __inline__ void ip_ll_header(struct sk_buff *skb) { struct rtable *rt = (struct rtable*)skb->dst; struct device *dev = rt->u.dst.dev; @@ -169,6 +164,7 @@ skb->mac.raw = skb->data; } +#endif #endif /* _ROUTE_H */ diff -u --recursive --new-file v2.1.67/linux/include/net/sit.h linux/include/net/sit.h --- v2.1.67/linux/include/net/sit.h Sun Nov 3 01:04:42 1996 +++ linux/include/net/sit.h Wed Dec 31 16:00:00 1969 @@ -1,39 +0,0 @@ -/* - * SIT tunneling device - definitions - * Linux INET6 implementation - * - * Authors: - * Pedro Roque - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _NET_SIT_H -#define _NET_SIT_H - -struct sit_mtu_info { - __u32 addr; /* IPv4 destination */ - unsigned long tstamp; /* last use tstamp */ - __u32 mtu; /* Path MTU */ - struct sit_mtu_info *next; -}; - -struct sit_vif { - char name[8]; - struct device *dev; - struct sit_vif *next; -}; - -extern int sit_init(void); -extern void sit_cleanup(void); - -extern struct device * sit_add_tunnel(__u32 dstaddr); - -#define SIT_GC_TIMEOUT (3*60*HZ) -#define SIT_GC_FREQUENCY (2*60*HZ) - -#endif diff -u --recursive --new-file v2.1.67/linux/include/net/slhc_vj.h linux/include/net/slhc_vj.h --- v2.1.67/linux/include/net/slhc_vj.h Sun May 5 21:28:54 1996 +++ linux/include/net/slhc_vj.h Sun Nov 30 14:00:39 1997 @@ -112,8 +112,8 @@ * int int32 long 32 bits */ -typedef unsigned char byte_t; -typedef unsigned long int32; +typedef __u8 byte_t; +typedef __u32 int32; /* * "state" data for each active tcp conversation on the wire. This is diff -u --recursive --new-file v2.1.67/linux/include/net/sock.h linux/include/net/sock.h --- v2.1.67/linux/include/net/sock.h Thu Sep 4 17:07:31 1997 +++ linux/include/net/sock.h Sun Nov 30 14:11:19 1997 @@ -44,8 +44,10 @@ #include /* struct ipv6_mc_socklist */ #endif +#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE) +#include +#endif #include /* struct tcphdr */ -#include #include #include /* struct sk_buff */ @@ -75,8 +77,6 @@ #include #endif -#include - #include /* @@ -98,18 +98,20 @@ int inflight; }; -/* - * IP packet socket options - */ +#ifdef CONFIG_NETLINK +struct netlink_callback; -struct inet_packet_opt +struct netlink_opt { - struct notifier_block notifier; /* Used when bound */ - struct device *bound_dev; - unsigned long dev_stamp; - struct packet_type *prot_hook; - char device_name[15]; + pid_t pid; + unsigned groups; + pid_t dst_pid; + unsigned dst_groups; + int (*handler)(int unit, struct sk_buff *skb); + atomic_t locks; + struct netlink_callback *cb; }; +#endif /* * Once the IPX ncpd patches are in these are going into protinfo @@ -184,6 +186,12 @@ #endif /* IPV6 */ +#if defined(CONFIG_INET) || defined(CONFIG_INET_MODULE) +struct raw_opt { + struct icmp_filter filter; +}; +#endif + struct tcp_opt { @@ -374,6 +382,7 @@ broadcast, nonagle, bsdism; + int bound_dev_if; unsigned long lingertime; int proc; @@ -401,9 +410,6 @@ __u32 rcv_saddr; /* Bound address */ struct dst_entry *dst_cache; - - unsigned short max_unacked; - /* * mss is min(mtu, max_window) */ @@ -422,6 +428,9 @@ union { struct tcp_opt af_tcp; +#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE) + struct raw_opt tp_raw4; +#endif #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct raw6_opt tp_raw; #endif @@ -435,8 +444,8 @@ volatile unsigned char state; unsigned short ack_backlog; unsigned short max_ack_backlog; - unsigned char priority; unsigned char debug; + __u32 priority; int rcvbuf; int sndbuf; unsigned short type; @@ -462,8 +471,10 @@ #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE) struct ipx_opt af_ipx; #endif +#if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE) + struct packet_opt *af_packet; +#endif #ifdef CONFIG_INET - struct inet_packet_opt af_packet; #ifdef CONFIG_NUTCP struct tcp_opt af_tcp; #endif @@ -483,6 +494,9 @@ #if defined(CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE) dn_cb *dn; #endif +#ifdef CONFIG_NETLINK + struct netlink_opt af_netlink; +#endif } protinfo; /* @@ -560,7 +574,8 @@ void (*write_wakeup)(struct sock *sk); void (*read_wakeup)(struct sock *sk); - unsigned int (*poll)(struct socket *sock, poll_table *wait); + unsigned int (*poll)(struct socket *sock, + struct poll_table_struct *wait); int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); @@ -783,7 +798,7 @@ extern int sock_no_getname(struct socket *, struct sockaddr *, int *, int); extern unsigned int sock_no_poll(struct socket *, - poll_table *); + struct poll_table_struct *); extern int sock_no_ioctl(struct socket *, unsigned int, unsigned long); extern int sock_no_listen(struct socket *, int); diff -u --recursive --new-file v2.1.67/linux/include/net/tcp.h linux/include/net/tcp.h --- v2.1.67/linux/include/net/tcp.h Thu Sep 4 17:07:31 1997 +++ linux/include/net/tcp.h Sun Nov 30 14:12:39 1997 @@ -226,6 +226,7 @@ struct or_calltable { void (*rtx_syn_ack) (struct sock *sk, struct open_request *req); void (*destructor) (struct open_request *req); + void (*send_reset) (struct sk_buff *skb); }; struct tcp_v4_open_req { @@ -306,11 +307,6 @@ struct open_request *req, struct dst_entry *dst); -#if 0 - __u32 (*init_sequence) (struct sock *sk, - struct sk_buff *skb); -#endif - struct sock * (*get_sock) (struct sk_buff *skb, struct tcphdr *th); @@ -330,15 +326,6 @@ void (*addr2sockaddr) (struct sock *sk, struct sockaddr *); - void (*send_reset) (struct sk_buff *skb); - - struct open_request * (*search_open_req) (struct tcp_opt *, void *, - struct tcphdr *, - struct open_request **); - - struct sock * (*cookie_check) (struct sock *, struct sk_buff *, - void *); - int sockaddr_len; }; @@ -371,7 +358,7 @@ extern unsigned short tcp_good_socknum(void); extern void tcp_v4_err(struct sk_buff *skb, - unsigned char *); + unsigned char *, int); extern void tcp_shutdown (struct sock *sk, int how); @@ -399,7 +386,7 @@ extern void tcp_close(struct sock *sk, unsigned long timeout); extern struct sock * tcp_accept(struct sock *sk, int flags); -extern unsigned int tcp_poll(struct socket *sock, poll_table *wait); +extern unsigned int tcp_poll(struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen); @@ -485,8 +472,7 @@ extern void tcp_probe_timer(unsigned long); extern struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, - void *); - + struct open_request *req); /* * TCP slow timer @@ -546,9 +532,9 @@ extern __inline const int tcp_connected(const int state) { - return(state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT || - state == TCP_FIN_WAIT1 || state == TCP_FIN_WAIT2 || - state == TCP_SYN_RECV); + return ((1 << state) & + (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1| + TCPF_FIN_WAIT2|TCPF_SYN_RECV)); } /* diff -u --recursive --new-file v2.1.67/linux/include/net/udp.h linux/include/net/udp.h --- v2.1.67/linux/include/net/udp.h Tue Mar 4 10:25:26 1997 +++ linux/include/net/udp.h Sun Nov 30 14:00:39 1997 @@ -40,7 +40,7 @@ extern struct proto udp_prot; -extern void udp_err(struct sk_buff *, unsigned char *); +extern void udp_err(struct sk_buff *, unsigned char *, int); extern int udp_connect(struct sock *sk, struct sockaddr *usin, int addr_len); diff -u --recursive --new-file v2.1.67/linux/include/net/x25.h linux/include/net/x25.h --- v2.1.67/linux/include/net/x25.h Mon Jul 7 08:19:59 1997 +++ linux/include/net/x25.h Sun Nov 30 14:00:39 1997 @@ -123,6 +123,7 @@ unsigned short vs, vr, va, vl; unsigned long t2, t21, t22, t23; unsigned short fraglen; + struct sk_buff_head ack_queue; struct sk_buff_head fragment_queue; struct sk_buff_head interrupt_in_queue; struct sk_buff_head interrupt_out_queue; @@ -183,7 +184,6 @@ extern void x25_output(struct sock *, struct sk_buff *); extern void x25_kick(struct sock *); extern void x25_enquiry_response(struct sock *); -extern void x25_check_iframes_acked(struct sock *, unsigned short); /* x25_route.c */ extern struct device *x25_get_route(x25_address *); @@ -195,6 +195,8 @@ /* x25_subr.c */ extern void x25_clear_queues(struct sock *); +extern void x25_frames_acked(struct sock *, unsigned short); +extern void x25_requeue_frames(struct sock *); extern int x25_validate_nr(struct sock *, unsigned short); extern void x25_write_internal(struct sock *, int); extern int x25_decode(struct sock *, struct sk_buff *, int *, int *, int *, int *, int *); diff -u --recursive --new-file v2.1.67/linux/init/main.c linux/init/main.c --- v2.1.67/linux/init/main.c Wed Nov 26 16:24:03 1997 +++ linux/init/main.c Sun Nov 30 13:48:48 1997 @@ -75,6 +75,7 @@ extern long powermac_init(unsigned long, unsigned long); extern void sysctl_init(void); extern void filescache_init(void); +extern void signals_init(void); extern void smp_setup(char *str, int *ints); extern void no_scroll(char *str, int *ints); @@ -220,8 +221,14 @@ #ifdef CONFIG_RISCOM8 extern void riscom8_setup(char *str, int *ints); #endif -#ifdef CONFIG_BAYCOM -extern void baycom_setup(char *str, int *ints); +#ifdef CONFIG_BAYCOM_PAR +extern void baycom_par_setup(char *str, int *ints); +#endif +#ifdef CONFIG_BAYCOM_SER_FDX +extern void baycom_ser_fdx_setup(char *str, int *ints); +#endif +#ifdef CONFIG_BAYCOM_SER_HDX +extern void baycom_ser_hdx_setup(char *str, int *ints); #endif #ifdef CONFIG_SOUNDMODEM extern void sm_setup(char *str, int *ints); @@ -541,8 +548,14 @@ #ifdef CONFIG_RISCOM8 { "riscom8=", riscom8_setup }, #endif -#ifdef CONFIG_BAYCOM - { "baycom=", baycom_setup }, +#ifdef CONFIG_BAYCOM_PAR + { "baycom_par=", baycom_par_setup }, +#endif +#ifdef CONFIG_BAYCOM_SER_FDX + { "baycom_ser_fdx=", baycom_ser_fdx_setup }, +#endif +#ifdef CONFIG_BAYCOM_SER_HDX + { "baycom_ser_hdx=", baycom_ser_hdx_setup }, #endif #ifdef CONFIG_SOUNDMODEM { "soundmodem=", sm_setup }, @@ -683,6 +696,10 @@ { "hdb", 0x0340 }, { "hdc", 0x1600 }, { "hdd", 0x1640 }, + { "hde", 0x2100 }, + { "hdf", 0x2140 }, + { "hdg", 0x2200 }, + { "hdh", 0x2240 }, { "sda", 0x0800 }, { "sdb", 0x0810 }, { "sdc", 0x0820 }, @@ -955,6 +972,7 @@ dcache_init(); vma_init(); buffer_init(); + signals_init(); inode_init(); file_table_init(); sock_init(); diff -u --recursive --new-file v2.1.67/linux/kernel/Makefile linux/kernel/Makefile --- v2.1.67/linux/kernel/Makefile Tue Jan 9 23:27:39 1996 +++ linux/kernel/Makefile Sun Nov 30 12:37:29 1997 @@ -12,11 +12,13 @@ O_TARGET := kernel.o O_OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \ - module.o exit.o signal.o itimer.o info.o time.o softirq.o \ - resource.o sysctl.o + module.o exit.o itimer.o info.o time.o softirq.o resource.o \ + sysctl.o acct.o + +OX_OBJS += signal.o ifeq ($(CONFIG_MODULES),y) -OX_OBJS = ksyms.o +OX_OBJS += ksyms.o endif include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.67/linux/kernel/acct.c linux/kernel/acct.c --- v2.1.67/linux/kernel/acct.c Wed Dec 31 16:00:00 1969 +++ linux/kernel/acct.c Sun Nov 30 12:34:44 1997 @@ -0,0 +1,335 @@ +/* + * linux/kernel/acct.c + * + * BSD Process Accounting for Linux + * + * Author: Marco van Wieringen + * + * Some code based on ideas and code from: + * Thomas K. Dyas + * + * This file implements BSD-style process accounting. Whenever any + * process exits, an accounting record of type "struct acct" is + * written to the file specified with the acct() system call. It is + * up to user-level programs to do useful things with the accounting + * log. The kernel just provides the raw accounting information. + * + * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. + * + */ + +#include +#include +#include + +#ifdef CONFIG_BSD_PROCESS_ACCT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * These constants control the amount of freespace that suspend and + * resume the process accounting system, and the time delay between + * each check. + */ + +#define RESUME (4) /* More than 4% free space will resume */ +#define SUSPEND (2) /* Less than 2% free space will suspend */ +#define ACCT_TIMEOUT (30 * HZ) /* 30 second timeout between checks */ + +/* + * External references and all of the globals. + */ +extern int close_fp(struct file *); + +void acct_timeout(unsigned long); + +static volatile int acct_active = 0; +static volatile int acct_needcheck = 0; +static struct file *acct_file = NULL; +static struct timer_list acct_timer = { NULL, NULL, 0, 0, acct_timeout }; + +/* + * Called whenever the timer says to check the free space. + */ +void acct_timeout(unsigned long unused) +{ + acct_needcheck = 1; +} + +/* + * Check the amount of free space and suspend/resume accordingly. + */ +static void check_free_space(void) +{ + unsigned long fs; + struct statfs sbuf; + + if (!acct_file || !acct_needcheck) + return; + + if (!acct_file->f_dentry->d_inode->i_sb->s_op || + !acct_file->f_dentry->d_inode->i_sb->s_op->statfs) + return; + + fs = get_fs(); + set_fs(KERNEL_DS); + acct_file->f_dentry->d_inode->i_sb->s_op->statfs(acct_file->f_dentry->d_inode->i_sb, &sbuf, sizeof(struct statfs)); + set_fs(fs); + + if (acct_active) { + if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100) { + acct_active = 0; + printk(KERN_INFO "Process accounting paused\r\n"); + } + } else { + if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100) { + acct_active = 1; + printk(KERN_INFO "Process accounting resumed\r\n"); + } + } + del_timer(&acct_timer); + acct_needcheck = 0; + acct_timer.expires = jiffies + ACCT_TIMEOUT; + add_timer(&acct_timer); +} + +/* + * sys_acct() is the only system call needed to implement process + * accounting. It takes the name of the file where accounting records + * should be written. If the filename is NULL, accounting will be + * shutdown. + */ +asmlinkage int sys_acct(const char *name) +{ + struct dentry *dentry; + struct inode *inode; + char *tmp; + int error = -EPERM; + + lock_kernel(); + if (!suser()) + goto out; + + if (name == (char *)NULL) { + if (acct_active) { + acct_process(0); + del_timer(&acct_timer); + acct_active = 0; + acct_needcheck = 0; + close_fp(acct_file); + } + error = 0; + goto out; + } else { + if (!acct_active) { + tmp = getname(name); + error = PTR_ERR(tmp); + if (IS_ERR(tmp)) + goto out; + + dentry = open_namei(tmp, O_RDWR, 0600); + putname(tmp); + + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out; + + inode = dentry->d_inode; + + if (!S_ISREG(inode->i_mode)) { + dput(dentry); + error = -EACCES; + goto out; + } + + if (!inode->i_op || !inode->i_op->default_file_ops || + !inode->i_op->default_file_ops->write) { + dput(dentry); + error = -EIO; + goto out; + } + + if ((acct_file = get_empty_filp()) != (struct file *)NULL) { + acct_file->f_mode = (O_WRONLY + 1) & O_ACCMODE; + acct_file->f_flags = O_WRONLY; + acct_file->f_dentry = dentry; + acct_file->f_pos = inode->i_size; + acct_file->f_reada = 0; + acct_file->f_op = inode->i_op->default_file_ops; + if ((error = get_write_access(acct_file->f_dentry->d_inode)) == 0) { + if (acct_file->f_op && acct_file->f_op->open) + error = acct_file->f_op->open(inode, acct_file); + if (error == 0) { + acct_needcheck = 0; + acct_active = 1; + acct_timer.expires = jiffies + ACCT_TIMEOUT; + add_timer(&acct_timer); + error = 0; + goto out; + } + put_write_access(acct_file->f_dentry->d_inode); + } + acct_file->f_count--; + } else + error = -EUSERS; + dput(dentry); + } else + error = -EBUSY; + } +out: + unlock_kernel(); + return error; +} + +void acct_auto_close(kdev_t dev) +{ + if (acct_active && acct_file && acct_file->f_dentry->d_inode->i_dev == dev) + sys_acct((char *)NULL); +} + +/* + * encode an unsigned long into a comp_t + * + * This routine has been adopted from the encode_comp_t() function in + * the kern_acct.c file of the FreeBSD operating system. The encoding + * is a 13-bit fraction with a 3-bit (base 8) exponent. + */ + +#define MANTSIZE 13 /* 13 bit mantissa. */ +#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ +#define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ + +static comp_t encode_comp_t(unsigned long value) +{ + int exp, rnd; + + exp = rnd = 0; + while (value > MAXFRACT) { + rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */ + value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ + exp++; + } + + /* + * If we need to round up, do it (and handle overflow correctly). + */ + if (rnd && (++value > MAXFRACT)) { + value >>= EXPSIZE; + exp++; + } + + /* + * Clean it up and polish it off. + */ + exp <<= MANTSIZE; /* Shift the exponent into place */ + exp += value; /* and add on the mantissa. */ + return exp; +} + +/* + * Write an accounting entry for an exiting process + * + * The acct_process() call is the workhorse of the process + * accounting system. The struct acct is built here and then written + * into the accounting file. This function should only be called from + * do_exit(). + */ +#define KSTK_EIP(stack) (((unsigned long *)(stack))[1019]) +#define KSTK_ESP(stack) (((unsigned long *)(stack))[1022]) + +int acct_process(long exitcode) +{ + struct acct ac; + unsigned long fs; + unsigned long vsize; + + /* + * First check to see if there is enough free_space to continue the process + * accounting system. Check_free_space toggle's the acct_active flag so we + * need to check that after check_free_space. + */ + check_free_space(); + + if (!acct_active) + return 0; + + + /* + * Fill the accounting struct with the needed info as recorded by the different + * kernel functions. + */ + memset((caddr_t)&ac, 0, sizeof(struct acct)); + + strncpy(ac.ac_comm, current->comm, ACCT_COMM); + ac.ac_comm[ACCT_COMM - 1] = '\0'; + + ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ)); + ac.ac_etime = encode_comp_t(jiffies - current->start_time); + ac.ac_utime = encode_comp_t(current->times.tms_utime); + ac.ac_stime = encode_comp_t(current->times.tms_stime); + ac.ac_uid = current->uid; + ac.ac_gid = current->gid; + ac.ac_tty = (current->tty) ? kdev_t_to_nr(current->tty->device) : 0; + + ac.ac_flag = 0; + if (current->flags & PF_FORKNOEXEC) + ac.ac_flag |= AFORK; + if (current->flags & PF_SUPERPRIV) + ac.ac_flag |= ASU; + if (current->flags & PF_DUMPCORE) + ac.ac_flag |= ACORE; + if (current->flags & PF_SIGNALED) + ac.ac_flag |= AXSIG; + + vsize = 0; + if (current->mm) { + struct vm_area_struct *vma = current->mm->mmap; + while (vma) { + vsize += vma->vm_end - vma->vm_start; + vma = vma->vm_next; + } + } + vsize = vsize / 1024; + ac.ac_mem = encode_comp_t(vsize); + ac.ac_io = encode_comp_t(current->io_usage); + ac.ac_rw = encode_comp_t(ac.ac_io / 1024); + ac.ac_minflt = encode_comp_t(current->min_flt); + ac.ac_majflt = encode_comp_t(current->maj_flt); + ac.ac_swaps = encode_comp_t(current->nswap); + ac.ac_exitcode = exitcode; + + /* + * Kernel segment override to datasegment and write it to the accounting file. + */ + fs = get_fs(); + set_fs(KERNEL_DS); + acct_file->f_op->write(acct_file, (char *)&ac, + sizeof(struct acct), &acct_file->f_pos); + set_fs(fs); + return 0; +} + +#else +/* + * Dummy system call when BSD process accounting is not configured + * into the kernel. + */ + +asmlinkage int sys_acct(const char * filename) +{ + return -ENOSYS; +} +#endif diff -u --recursive --new-file v2.1.67/linux/kernel/exit.c linux/kernel/exit.c --- v2.1.67/linux/kernel/exit.c Wed Sep 24 20:05:48 1997 +++ linux/kernel/exit.c Sun Nov 30 12:41:39 1997 @@ -20,113 +20,17 @@ #include #include #include +#include #include #include #include extern void sem_exit (void); -extern void acct_process (long exitcode); extern void kerneld_exit(void); int getrusage(struct task_struct *, int, struct rusage *); -static inline void generate(unsigned long sig, struct task_struct * p) -{ - unsigned flags; - unsigned long mask = 1 << (sig-1); - struct sigaction * sa = sig + p->sig->action - 1; - - /* - * Optimize away the signal, if it's a signal that can - * be handled immediately (ie non-blocked and untraced) - * and that is ignored (either explicitly or by default) - */ - spin_lock_irqsave(&p->sig->siglock, flags); - if (!(mask & p->blocked) && !(p->flags & PF_PTRACED)) { - /* don't bother with ignored signals (but SIGCHLD is special) */ - if (sa->sa_handler == SIG_IGN && sig != SIGCHLD) - goto out; - /* some signals are ignored by default.. (but SIGCONT already did its deed) */ - if ((sa->sa_handler == SIG_DFL) && - (sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH || sig == SIGURG)) - goto out; - } - spin_lock(&p->sigmask_lock); - p->signal |= mask; - spin_unlock(&p->sigmask_lock); - if (p->state == TASK_INTERRUPTIBLE && signal_pending(p)) - wake_up_process(p); -out: - spin_unlock_irqrestore(&p->sig->siglock, flags); -} - -/* - * Force a signal that the process can't ignore: if necessary - * we unblock the signal and change any SIG_IGN to SIG_DFL. - */ -void force_sig(unsigned long sig, struct task_struct * p) -{ - sig--; - if (p->sig) { - unsigned flags; - unsigned long mask = 1UL << sig; - struct sigaction *sa = p->sig->action + sig; - - spin_lock_irqsave(&p->sig->siglock, flags); - - spin_lock(&p->sigmask_lock); - p->signal |= mask; - p->blocked &= ~mask; - spin_unlock(&p->sigmask_lock); - - if (sa->sa_handler == SIG_IGN) - sa->sa_handler = SIG_DFL; - if (p->state == TASK_INTERRUPTIBLE) - wake_up_process(p); - - spin_unlock_irqrestore(&p->sig->siglock, flags); - } -} - -int send_sig(unsigned long sig,struct task_struct * p,int priv) -{ - if (!p || sig > 32) - return -EINVAL; - if (!priv && ((sig != SIGCONT) || (current->session != p->session)) && - (current->euid ^ p->suid) && (current->euid ^ p->uid) && - (current->uid ^ p->suid) && (current->uid ^ p->uid) && - !suser()) - return -EPERM; - - if (sig && p->sig) { - unsigned flags; - spin_lock_irqsave(&p->sigmask_lock, flags); - if ((sig == SIGKILL) || (sig == SIGCONT)) { - if (p->state == TASK_STOPPED) - wake_up_process(p); - p->exit_code = 0; - p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) | - (1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) ); - } - if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU) - p->signal &= ~(1<<(SIGCONT-1)); - spin_unlock_irqrestore(&p->sigmask_lock, flags); - - /* Actually generate the signal */ - generate(sig,p); - } - return 0; -} - -void notify_parent(struct task_struct * tsk, int signal) -{ - struct task_struct * parent = tsk->p_pptr; - - send_sig(signal, parent, 1); - wake_up_interruptible(&parent->wait_chldexit); -} - static void release(struct task_struct * p) { if (p != current) { @@ -179,118 +83,6 @@ } /* - * kill_pg() sends a signal to a process group: this is what the tty - * control characters do (^C, ^Z etc) - */ -int kill_pg(int pgrp, int sig, int priv) -{ - int retval; - - retval = -EINVAL; - if (sig >= 0 && sig <= 32 && pgrp > 0) { - struct task_struct *p; - int found = 0; - - retval = -ESRCH; - read_lock(&tasklist_lock); - for_each_task(p) { - if (p->pgrp == pgrp) { - int err = send_sig(sig,p,priv); - if (err != 0) - retval = err; - else - found++; - } - } - read_unlock(&tasklist_lock); - if (found) - retval = 0; - } - return retval; -} - -/* - * kill_sl() sends a signal to the session leader: this is used - * to send SIGHUP to the controlling process of a terminal when - * the connection is lost. - */ -int kill_sl(int sess, int sig, int priv) -{ - int retval; - - retval = -EINVAL; - if (sig >= 0 && sig <= 32 && sess > 0) { - struct task_struct *p; - int found = 0; - - retval = -ESRCH; - read_lock(&tasklist_lock); - for_each_task(p) { - if (p->leader && p->session == sess) { - int err = send_sig(sig,p,priv); - - if (err) - retval = err; - else - found++; - } - } - read_unlock(&tasklist_lock); - if (found) - retval = 0; - } - return retval; -} - -int kill_proc(int pid, int sig, int priv) -{ - int retval; - - retval = -EINVAL; - if (sig >= 0 && sig <= 32) { - struct task_struct *p = find_task_by_pid(pid); - - if(p) - retval = send_sig(sig, p, priv); - else - retval = -ESRCH; - } - return retval; -} - -/* - * POSIX specifies that kill(-1,sig) is unspecified, but what we have - * is probably wrong. Should make it like BSD or SYSV. - */ -asmlinkage int sys_kill(int pid,int sig) -{ - if (!pid) - return kill_pg(current->pgrp,sig,0); - - if (pid == -1) { - int retval = 0, count = 0; - struct task_struct * p; - - read_lock(&tasklist_lock); - for_each_task(p) { - if (p->pid > 1 && p != current) { - int err; - ++count; - if ((err = send_sig(sig,p,0)) != -EPERM) - retval = err; - } - } - read_unlock(&tasklist_lock); - return count ? retval : -ESRCH; - } - if (pid < 0) - return kill_pg(-pid,sig,0); - - /* Normal kill */ - return kill_proc(pid,sig,0); -} - -/* * Determine if a process group is "orphaned", according to the POSIX * definition in 2.2.2.52. Orphaned process groups are not to be affected * by terminal-generated stop signals. Newly orphaned process groups are @@ -398,7 +190,7 @@ void exit_files(struct task_struct *tsk) { - __exit_files(tsk); + __exit_files(tsk); } static inline void __exit_fs(struct task_struct *tsk) @@ -417,7 +209,7 @@ void exit_fs(struct task_struct *tsk) { - __exit_fs(tsk); + __exit_fs(tsk); } static inline void __exit_sighand(struct task_struct *tsk) @@ -429,6 +221,8 @@ if (atomic_dec_and_test(&sig->count)) kfree(sig); } + + flush_signals(tsk); } void exit_sighand(struct task_struct *tsk) @@ -527,9 +321,11 @@ { if (in_interrupt()) printk("Aiee, killing interrupt handler\n"); + if (current == task[0]) + panic("Attempted to kill the idle task!"); fake_volatile: - acct_process(code); current->flags |= PF_EXITING; + acct_process(code); del_timer(¤t->real_timer); sem_exit(); kerneld_exit(); diff -u --recursive --new-file v2.1.67/linux/kernel/fork.c linux/kernel/fork.c --- v2.1.67/linux/kernel/fork.c Wed Oct 15 16:04:24 1997 +++ linux/kernel/fork.c Sun Nov 30 10:59:03 1997 @@ -459,7 +459,7 @@ p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; - p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV); + p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV|PF_SIGPENDING); p->flags |= PF_FORKNOEXEC; p->pid = get_pid(clone_flags); p->next_run = NULL; @@ -467,7 +467,9 @@ p->p_pptr = p->p_opptr = current; p->p_cptr = NULL; init_waitqueue(&p->wait_chldexit); - p->signal = 0; + sigemptyset(¤t->signal); + p->sigqueue = NULL; + p->sigqueue_tail = &p->sigqueue; p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); diff -u --recursive --new-file v2.1.67/linux/kernel/ksyms.c linux/kernel/ksyms.c --- v2.1.67/linux/kernel/ksyms.c Sat Nov 29 11:25:12 1997 +++ linux/kernel/ksyms.c Sun Nov 30 10:59:03 1997 @@ -331,9 +331,6 @@ EXPORT_SYMBOL(loops_per_sec); EXPORT_SYMBOL(need_resched); EXPORT_SYMBOL(kstat); -EXPORT_SYMBOL(kill_proc); -EXPORT_SYMBOL(kill_pg); -EXPORT_SYMBOL(kill_sl); /* misc */ EXPORT_SYMBOL(panic); @@ -352,9 +349,6 @@ EXPORT_SYMBOL(_ctype); EXPORT_SYMBOL(secure_tcp_sequence_number); EXPORT_SYMBOL(get_random_bytes); - -/* Signal interfaces */ -EXPORT_SYMBOL(send_sig); /* Program loader interfaces */ EXPORT_SYMBOL(setup_arg_pages); diff -u --recursive --new-file v2.1.67/linux/kernel/sched.c linux/kernel/sched.c --- v2.1.67/linux/kernel/sched.c Mon Oct 20 10:36:53 1997 +++ linux/kernel/sched.c Sun Nov 30 10:59:03 1997 @@ -85,8 +85,6 @@ unsigned long prof_len = 0; unsigned long prof_shift = 0; -#define _S(nr) (1<<((nr)-1)) - extern void mem_use(void); unsigned long volatile jiffies=0; @@ -1419,28 +1417,6 @@ return 0; } -/* - * change timeval to jiffies, trying to avoid the - * most obvious overflows.. - */ -static unsigned long timespectojiffies(struct timespec *value) -{ - unsigned long sec = (unsigned) value->tv_sec; - long nsec = value->tv_nsec; - - if (sec > (LONG_MAX / HZ)) - return LONG_MAX; - nsec += 1000000000L / HZ - 1; - nsec /= 1000000000L / HZ; - return HZ * sec + nsec; -} - -static void jiffiestotimespec(unsigned long jiffies, struct timespec *value) -{ - value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ); - value->tv_sec = jiffies / HZ; -} - asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) { struct timespec t; @@ -1466,7 +1442,7 @@ return 0; } - expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies; + expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies; current->timeout = expire; current->state = TASK_INTERRUPTIBLE; @@ -1474,8 +1450,8 @@ if (expire > jiffies) { if (rmtp) { - jiffiestotimespec(expire - jiffies - - (expire > jiffies + 1), &t); + jiffies_to_timespec(expire - jiffies - + (expire > jiffies + 1), &t); if (copy_to_user(rmtp, &t, sizeof(struct timespec))) return -EFAULT; } @@ -1524,6 +1500,19 @@ printk(" %5d\n", p->p_osptr->pid); else printk("\n"); + + { + extern char * render_sigset_t(sigset_t *set, char *buffer); + struct signal_queue *q; + char s[sizeof(sigset_t)*2+1], b[sizeof(sigset_t)*2+1]; + + render_sigset_t(&p->signal, s); + render_sigset_t(&p->blocked, b); + printk("\tsig: %d %s %s :", signal_pending(p), s, b); + for (q = p->sigqueue; q ; q = q->next) + printk(" %d", q->info.si_signo); + printk(" X\n"); + } } void show_state(void) diff -u --recursive --new-file v2.1.67/linux/kernel/signal.c linux/kernel/signal.c --- v2.1.67/linux/kernel/signal.c Wed Apr 23 19:01:29 1997 +++ linux/kernel/signal.c Sun Nov 30 10:59:03 1997 @@ -2,8 +2,11 @@ * linux/kernel/signal.c * * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson */ +#include #include #include #include @@ -14,192 +17,921 @@ #include #include #include +#include #include -#define _S(nr) (1<<((nr)-1)) +/* + * SLAB caches for signal bits. + */ -#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) +#define DEBUG_SIG 0 + +#if DEBUG_SIG +#define SIG_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */) +#else +#define SIG_SLAB_DEBUG 0 +#endif + +static kmem_cache_t *signal_queue_cachep; + +void +signals_init(void) +{ + signal_queue_cachep = + kmem_cache_create("signal_queue", + sizeof(struct signal_queue), + __alignof__(struct signal_queue), + SIG_SLAB_DEBUG, NULL, NULL); +} -#ifndef __alpha__ /* - * This call isn't used by all ports, in particular, the Alpha - * uses osf_sigprocmask instead. Maybe it should be moved into - * arch-dependent dir? + * Flush all pending signals for a task. + */ + +void +flush_signals(struct task_struct *t) +{ + struct signal_queue *q, *n; + + t->flags &= ~PF_SIGPENDING; + sigemptyset(&t->signal); + q = t->sigqueue; + t->sigqueue = NULL; + t->sigqueue_tail = &t->sigqueue; + + while (q) { + n = q->next; + kmem_cache_free(signal_queue_cachep, q); + q = n; + } +} + +/* + * Flush all handlers for a task. + */ + +void +flush_signal_handlers(struct task_struct *t) +{ + memset(t->sig->action, 0, sizeof(t->sig->action)); +} + +/* + * Dequeue a signal and return the element to the caller, which is + * expected to free it. * + * All callers of must be holding current->sigmask_lock. + */ + +int +dequeue_signal(sigset_t *mask, siginfo_t *info) +{ + unsigned long i, *s, *m, x; + int sig = 0; + +#if DEBUG_SIG +printk("SIG dequeue (%s:%d): %d ", current->comm, current->pid, + (current->flags & PF_SIGPENDING) != 0); +#endif + + /* Find the first desired signal that is pending. */ + s = current->signal.sig; + m = mask->sig; + switch (_NSIG_WORDS) { + default: + for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m) + if ((x = *s &~ *m) != 0) { + sig = ffz(~x) + i*_NSIG_BPW + 1; + break; + } + break; + + case 2: if ((x = s[0] &~ m[0]) != 0) + sig = 1; + else if ((x = s[1] &~ m[0]) != 0) + sig = _NSIG_BPW + 1; + else + break; + sig += ffz(~x); + break; + + case 1: if ((x = *s &~ *m) != 0) sig = ffz(~x) + 1; + break; + } + + if (sig) { + int reset = 1; + + /* Collect the siginfo appropriate to this signal. */ + if (sig < SIGRTMIN) { + /* XXX: As an extension, support queueing exactly + one non-rt signal if SA_SIGINFO is set, so that + we can get more detailed information about the + cause of the signal. */ + /* Deciding not to init these couple of fields is + more expensive that just initializing them. */ + info->si_signo = sig; + info->si_errno = 0; + info->si_code = 0; + info->si_pid = 0; + info->si_uid = 0; + } else { + struct signal_queue *q, **pp; + pp = ¤t->sigqueue; + q = current->sigqueue; + + /* Find the one we're interested in ... */ + for ( ; q ; pp = &q->next, q = q->next) + if (q->info.si_signo == sig) + break; + if (q) { + if ((*pp = q->next) == NULL) + current->sigqueue_tail = pp; + *info = q->info; + kmem_cache_free(signal_queue_cachep,q); + + /* then see if this signal is still pending. */ + q = *pp; + while (q) { + if (q->info.si_signo == sig) { + reset = 0; + break; + } + q = q->next; + } + } else { + /* Ok, it wasn't in the queue. It must have + been sent either by a non-rt mechanism and + we ran out of queue space. So zero out the + info. */ + info->si_signo = sig; + info->si_errno = 0; + info->si_code = 0; + info->si_pid = 0; + info->si_uid = 0; + } + } + + if (reset) + sigdelset(¤t->signal, sig); + recalc_sigpending(current); + + /* XXX: Once POSIX.1b timers are in, if si_code == SI_TIMER, + we need to xchg out the timer overrun values. */ + } else { + /* XXX: Once CLONE_PID is in to join those "threads" that are + part of the same "process", look for signals sent to the + "process" as well. */ + + /* Sanity check... */ + if (mask == ¤t->blocked && + (current->flags & PF_SIGPENDING) != 0) { + printk(KERN_CRIT "SIG: sigpending lied\n"); + current->flags &= ~PF_SIGPENDING; + } + } + +#if DEBUG_SIG +printk(" %d -> %d\n", (current->flags & PF_SIGPENDING) != 0, sig); +#endif + + return sig; +} + +int +send_sig_info(int sig, struct siginfo *info, struct task_struct *t) +{ + struct k_sigaction *ka; + unsigned long flags; + int ret; + +#if DEBUG_SIG +printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); +#endif + + ret = -EINVAL; + if (sig < 0 || sig > _NSIG) + goto out_nolock; + + /* If t->sig is gone, we must be trying to kill the task. So + pretend that it doesn't exist anymore. */ + ret = -ESRCH; + if (t->sig == NULL) + goto out_nolock; + + /* The somewhat baroque permissions check... */ + ret = -EPERM; + if ((!info || ((unsigned long)info != 1 && SI_FROMUSER(info))) + && ((sig != SIGCONT) || (current->session != t->session)) + && (current->euid ^ t->suid) && (current->euid ^ t->uid) + && (current->uid ^ t->suid) && (current->uid ^ t->uid) + && !suser()) + goto out_nolock; + + /* The null signal is a permissions and process existance probe. + No signal is actually delivered. */ + ret = 0; + if (!sig) + goto out_nolock; + + ka = &t->sig->action[sig-1]; + spin_lock_irqsave(&t->sigmask_lock, flags); + + switch (sig) { + case SIGKILL: case SIGCONT: + /* Wake up the process if stopped. */ + if (t->state == TASK_STOPPED) + wake_up_process(t); + t->exit_code = 0; + sigdelsetmask(&t->signal, (sigmask(SIGSTOP)|sigmask(SIGTSTP)| + sigmask(SIGTTOU)|sigmask(SIGTTIN))); + /* Inflict this corner case with recalculaions, not mainline */ + recalc_sigpending(t); + break; + + case SIGSTOP: case SIGTSTP: + case SIGTTIN: case SIGTTOU: + /* If we're stopping again, cancel SIGCONT */ + sigdelset(&t->signal, SIGCONT); + /* Inflict this corner case with recalculaions, not mainline */ + recalc_sigpending(t); + break; + } + + /* Optimize away the signal, if it's a signal that can be + handled immediately (ie non-blocked and untraced) and + that is ignored (either explicitly or by default). */ + + if (!(t->flags & PF_PTRACED) && !sigismember(&t->blocked, sig) + /* Don't bother with ignored sigs (SIGCHLD is special) */ + && ((ka->sa.sa_handler == SIG_IGN && sig != SIGCHLD) + /* Some signals are ignored by default.. (but SIGCONT + already did its deed) */ + || (ka->sa.sa_handler == SIG_DFL + && (sig == SIGCONT || sig == SIGCHLD + || sig == SIGWINCH || sig == SIGURG)))) { + goto out; + } + + if (sig < SIGRTMIN) { + /* Non-real-time signals are not queued. */ + /* XXX: As an extension, support queueing exactly one + non-rt signal if SA_SIGINFO is set, so that we can + get more detailed information about the cause of + the signal. */ + if (sigismember(&t->signal, sig)) + goto out; + } else { + /* Real-time signals must be queued if sent by sigqueue, or + some other real-time mechanism. It is implementation + defined whether kill() does so. We attempt to do so, on + the principle of least surprise, but since kill is not + allowed to fail with EAGAIN when low on memory we just + make sure at least one signal gets delivered and don't + pass on the info struct. */ + + struct signal_queue *q = (struct signal_queue *) + kmem_cache_alloc(signal_queue_cachep, GFP_KERNEL); + + if (q) { + q->next = NULL; + *t->sigqueue_tail = q; + t->sigqueue_tail = &q->next; + switch ((unsigned long) info) { + case 0: + q->info.si_signo = sig; + q->info.si_errno = 0; + q->info.si_code = SI_USER; + q->info.si_pid = current->pid; + q->info.si_uid = current->uid; + break; + case 1: + q->info.si_signo = sig; + q->info.si_errno = 0; + q->info.si_code = SI_KERNEL; + q->info.si_pid = 0; + q->info.si_uid = 0; + break; + default: + q->info = *info; + break; + } + } else { + /* If this was sent by a rt mechanism, try again. */ + if (info->si_code < 0) { + ret = -EAGAIN; + goto out; + } + /* Otherwise, mention that the signal is pending, + but don't queue the info. */ + } + } + + sigaddset(&t->signal, sig); + if (!sigismember(&t->blocked, sig)) + t->flags |= PF_SIGPENDING; + +out: + spin_unlock_irqrestore(&t->sigmask_lock, flags); + if (t->state == TASK_INTERRUPTIBLE && signal_pending(t)) + wake_up_process(t); + +out_nolock: +#if DEBUG_SIG +printk(" %d -> %d\n", (t->flags & PF_SIGPENDING) != 0, ret); +#endif + + return ret; +} + +/* + * Force a signal that the process can't ignore: if necessary + * we unblock the signal and change any SIG_IGN to SIG_DFL. + */ + +int +force_sig_info(int sig, struct siginfo *info, struct task_struct *t) +{ + if (t->sig == NULL) + return -ESRCH; + + if (t->sig->action[sig-1].sa.sa_handler == SIG_IGN) + t->sig->action[sig-1].sa.sa_handler = SIG_DFL; + sigdelset(&t->blocked, sig); + + return send_sig_info(sig, info, t); +} + +/* + * kill_pg() sends a signal to a process group: this is what the tty + * control characters do (^C, ^Z etc) + */ + +int +kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) +{ + int retval = -EINVAL; + if (pgrp > 0) { + struct task_struct *p; + int found = 0; + + retval = -ESRCH; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pgrp == pgrp) { + int err = send_sig_info(sig, info, p); + if (err != 0) + retval = err; + else + found++; + } + } + read_unlock(&tasklist_lock); + if (found) + retval = 0; + } + return retval; +} + +/* + * kill_sl() sends a signal to the session leader: this is used + * to send SIGHUP to the controlling process of a terminal when + * the connection is lost. + */ + +int +kill_sl_info(int sig, struct siginfo *info, pid_t sess) +{ + int retval = -EINVAL; + if (sess > 0) { + struct task_struct *p; + int found = 0; + + retval = -ESRCH; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->leader && p->session == sess) { + int err = send_sig_info(sig, info, p); + if (err) + retval = err; + else + found++; + } + } + read_unlock(&tasklist_lock); + if (found) + retval = 0; + } + return retval; +} + +inline int +kill_proc_info(int sig, struct siginfo *info, pid_t pid) +{ + struct task_struct *p = find_task_by_pid(pid); + return p ? send_sig_info(sig, info, p) : -ESRCH; +} + +/* + * kill_something() interprets pid in interesting ways just like kill(2). + * + * POSIX specifies that kill(-1,sig) is unspecified, but what we have + * is probably wrong. Should make it like BSD or SYSV. + */ + +int +kill_something_info(int sig, struct siginfo *info, int pid) +{ + if (!pid) { + return kill_pg_info(sig, info, current->pgrp); + } else if (pid == -1) { + int retval = 0, count = 0; + struct task_struct * p; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pid > 1 && p != current) { + int err = send_sig_info(sig, info, p); + ++count; + if (err != -EPERM) + retval = err; + } + } + read_unlock(&tasklist_lock); + return count ? retval : -ESRCH; + } else if (pid < 0) { + return kill_pg_info(sig, info, -pid); + } else { + return kill_proc_info(sig, info, pid); + } +} + +/* + * These are for backward compatibility with the rest of the kernel source. + */ + +int +send_sig(int sig, struct task_struct *p, int priv) +{ + return send_sig_info(sig, (void*)(long)(priv != 0), p); +} + +void +force_sig(int sig, struct task_struct *p) +{ + force_sig_info(sig, (void*)1L, p); +} + +int +kill_pg(pid_t pgrp, int sig, int priv) +{ + return kill_pg_info(sig, (void *)(long)(priv != 0), pgrp); +} + +int +kill_sl(pid_t sess, int sig, int priv) +{ + return kill_sl_info(sig, (void *)(long)(priv != 0), sess); +} + +int +kill_proc(pid_t pid, int sig, int priv) +{ + return kill_proc_info(sig, (void *)(long)(priv != 0), pid); +} + +/* + * Let a parent know about a status change of a child. + */ + +void +notify_parent(struct task_struct *tsk, int sig) +{ + struct siginfo info; + int why; + + info.si_signo = sig; + info.si_errno = 0; + info.si_pid = tsk->pid; + + /* FIXME: find out whether or not this is supposed to be c*time. */ + info.si_utime = tsk->times.tms_utime; + info.si_stime = tsk->times.tms_stime; + + why = SI_KERNEL; /* shouldn't happen */ + switch (tsk->state) { + case TASK_ZOMBIE: + if (tsk->exit_code & 0x80) + why = CLD_DUMPED; + else if (tsk->exit_code & 0x7f) + why = CLD_KILLED; + else + why = CLD_EXITED; + break; + case TASK_STOPPED: + /* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */ + why = CLD_STOPPED; + break; + + default: + printk(KERN_DEBUG "eh? notify_parent with state %ld?\n", + tsk->state); + break; + } + info.si_code = why; + + send_sig_info(sig, &info, tsk->p_pptr); + wake_up_interruptible(&tsk->p_pptr->wait_chldexit); +} + +EXPORT_SYMBOL(dequeue_signal); +EXPORT_SYMBOL(flush_signals); +EXPORT_SYMBOL(force_sig); +EXPORT_SYMBOL(force_sig_info); +EXPORT_SYMBOL(kill_pg); +EXPORT_SYMBOL(kill_pg_info); +EXPORT_SYMBOL(kill_proc); +EXPORT_SYMBOL(kill_proc_info); +EXPORT_SYMBOL(kill_sl); +EXPORT_SYMBOL(kill_sl_info); +EXPORT_SYMBOL(notify_parent); +EXPORT_SYMBOL(recalc_sigpending); +EXPORT_SYMBOL(send_sig); +EXPORT_SYMBOL(send_sig_info); + + +/* + * System call entry points. + */ + +/* * We don't need to get the kernel lock - this is all local to this * particular thread.. (and that's good, because this is _heavily_ * used by various programs) - * - * No SMP locking would prevent the inherent races present in this - * routine, thus we do not perform any locking at all. */ -asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset) + +asmlinkage int +sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize) { - sigset_t old_set = current->blocked; + sigset_t old_set, new_set; - if (set) { - sigset_t new_set; + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; - if(get_user(new_set, set)) + if (set) { + if (copy_from_user(&new_set, set, sizeof(*set))) return -EFAULT; + sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); + + spin_lock_irq(¤t->sigmask_lock); + old_set = current->blocked; - new_set &= _BLOCKABLE; switch (how) { default: return -EINVAL; case SIG_BLOCK: - new_set |= old_set; + sigorsets(&new_set, &old_set, &new_set); break; case SIG_UNBLOCK: - new_set = old_set & ~new_set; + signandsets(&new_set, &old_set, &new_set); break; case SIG_SETMASK: break; } + current->blocked = new_set; - } - if (oset) { - if(put_user(old_set, oset)) + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (oset) { + if (copy_to_user(oset, &old_set, sizeof(*oset))) + return -EFAULT; + } + } else if (oset) { + spin_lock_irq(¤t->sigmask_lock); + old_set = current->blocked; + spin_unlock_irq(¤t->sigmask_lock); + + if (copy_to_user(oset, &old_set, sizeof(*oset))) return -EFAULT; } + return 0; } -/* - * For backwards compatibility? Functionality superseded by sigprocmask. - */ -asmlinkage int sys_sgetmask(void) +asmlinkage int +sys_rt_sigpending(sigset_t *set, size_t sigsetsize) { - /* SMP safe */ - return current->blocked; -} + sigset_t pending; -asmlinkage int sys_ssetmask(int newmask) -{ - int old; + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; spin_lock_irq(¤t->sigmask_lock); - old = current->blocked; - current->blocked = newmask & _BLOCKABLE; + sigandsets(&pending, ¤t->blocked, ¤t->signal); spin_unlock_irq(¤t->sigmask_lock); - return old; + return copy_to_user(set, &pending, sizeof(*set)); } -#endif - -asmlinkage int sys_sigpending(sigset_t *set) +asmlinkage int +sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, + const struct timespec *uts, size_t sigsetsize) { - int ret; + sigset_t these; + struct timespec ts; + unsigned long expire; + siginfo_t info; + int ret, sig; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&these, uthese, sizeof(these))) + return -EFAULT; + else { + /* Invert the set of allowed signals to get those we + want to block. */ + signotset(&these); + } + + if (uts) { + if (copy_from_user(&ts, uts, sizeof(ts))) + return -EFAULT; + if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0 + || ts.tv_sec < 0) + return -EINVAL; + } - /* fill in "set" with signals pending but blocked. */ spin_lock_irq(¤t->sigmask_lock); - ret = put_user(current->blocked & current->signal, set); + sig = dequeue_signal(&these, &info); + if (!sig) { + /* None ready -- temporarily unblock those we're interested + in so that we'll be awakened when they arrive. */ + sigset_t oldblocked = current->blocked; + sigandsets(¤t->blocked, ¤t->blocked, &these); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (uts) { + expire = (timespec_to_jiffies(&ts) + + (ts.tv_sec || ts.tv_nsec)); + expire += jiffies; + current->timeout = expire; + } + + current->state = TASK_INTERRUPTIBLE; + schedule(); + + spin_lock_irq(¤t->sigmask_lock); + sig = dequeue_signal(&these, &info); + current->blocked = oldblocked; + recalc_sigpending(current); + } spin_unlock_irq(¤t->sigmask_lock); + + if (sig) { + ret = sig; + if (uinfo) { + if (copy_to_user(uinfo, &info, sizeof(siginfo_t))) + ret = -EFAULT; + } + } else { + ret = !uts || expire > jiffies ? -EINTR : -EAGAIN; + } + return ret; } -/* - * POSIX 3.3.1.3: - * "Setting a signal action to SIG_IGN for a signal that is pending - * shall cause the pending signal to be discarded, whether or not - * it is blocked." - * - * "Setting a signal action to SIG_DFL for a signal that is pending - * and whose default action is to ignore the signal (for example, - * SIGCHLD), shall cause the pending signal to be discarded, whether - * or not it is blocked" - * - * Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal - * isn't actually ignored, but does automatic child reaping, while - * SIG_DFL is explicitly said by POSIX to force the signal to be ignored.. - * - * All callers of check_pending must be holding current->sig->siglock. - */ -inline void check_pending(int signum) +asmlinkage int +sys_kill(int pid, int sig) { - struct sigaction *p; + struct siginfo info; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->pid; + info.si_uid = current->uid; - p = signum - 1 + current->sig->action; - spin_lock(¤t->sigmask_lock); - if (p->sa_handler == SIG_IGN) { - current->signal &= ~_S(signum); - } else if (p->sa_handler == SIG_DFL) { - if (signum == SIGCONT || - signum == SIGCHLD || - signum != SIGWINCH) - current->signal &= ~_S(signum); - } - spin_unlock(¤t->sigmask_lock); + return kill_something_info(sig, &info, pid); } -#ifndef __alpha__ -/* - * For backwards compatibility? Functionality superseded by sigaction. - */ -asmlinkage unsigned long sys_signal(int signum, __sighandler_t handler) +asmlinkage int +sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo) { - struct sigaction tmp; + siginfo_t info; - if (signum<1 || signum>32) - return -EINVAL; - if (signum==SIGKILL || signum==SIGSTOP) + if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) + return -EFAULT; + + /* Not even root can pretend to send signals from the kernel. + Nor can they impersonate a kill(), which adds source info. */ + if (info.si_code >= 0) + return -EPERM; + + /* POSIX.1b doesn't mention process groups. */ + return kill_proc_info(sig, &info, pid); +} + +int +do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) +{ + struct k_sigaction *k; + + if (sig < 1 || sig > _NSIG || + (act && (sig == SIGKILL || sig == SIGSTOP))) return -EINVAL; - if (handler != SIG_DFL && handler != SIG_IGN) { - if(verify_area(VERIFY_READ, handler, 1)) + + spin_lock_irq(¤t->sigmask_lock); + k = ¤t->sig->action[sig-1]; + + if (oact) *oact = *k; + + if (act) { + *k = *act; + + /* + * POSIX 3.3.1.3: + * "Setting a signal action to SIG_IGN for a signal that is + * pending shall cause the pending signal to be discarded, + * whether or not it is blocked." + * + * "Setting a signal action to SIG_DFL for a signal that is + * pending and whose default action is to ignore the signal + * (for example, SIGCHLD), shall cause the pending signal to + * be discarded, whether or not it is blocked" + * + * Note the silly behaviour of SIGCHLD: SIG_IGN means that the + * signal isn't actually ignored, but does automatic child + * reaping, while SIG_DFL is explicitly said by POSIX to force + * the signal to be ignored. + */ + + if (k->sa.sa_handler == SIG_IGN + || (k->sa.sa_handler == SIG_DFL + && (sig == SIGCONT || + sig == SIGCHLD || + sig != SIGWINCH))) { + /* So dequeue any that might be pending. + XXX: process-wide signals? */ + if (sig >= SIGRTMIN && + sigismember(¤t->signal, sig)) { + struct signal_queue *q, **pp; + pp = ¤t->sigqueue; + q = current->sigqueue; + while (q) { + if (q->info.si_signo != sig) + pp = &q->next; + else { + *pp = q->next; + kmem_cache_free(signal_queue_cachep, q); + } + q = *pp; + } + + } + sigdelset(¤t->signal, sig); + recalc_sigpending(current); + } + } + + spin_unlock_irq(¤t->sigmask_lock); + + return 0; +} + +#if !defined(__alpha__) +/* Alpha has its own versions with special arguments. */ + +asmlinkage int +sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset) +{ + old_sigset_t old_set, new_set; + + if (set) { + if (copy_from_user(&new_set, set, sizeof(*set))) + return -EFAULT; + new_set &= ~(sigmask(SIGKILL)|sigmask(SIGSTOP)); + + spin_lock_irq(¤t->sigmask_lock); + old_set = current->blocked.sig[0]; + + switch (how) { + default: + return -EINVAL; + case SIG_BLOCK: + sigaddsetmask(¤t->blocked, new_set); + break; + case SIG_UNBLOCK: + sigdelsetmask(¤t->blocked, new_set); + break; + case SIG_SETMASK: + siginitset(¤t->blocked, new_set); + break; + } + + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (oset) { + if (copy_to_user(oset, &old_set, sizeof(*oset))) + return -EFAULT; + } + } else if (oset) { + old_set = current->blocked.sig[0]; + if (copy_to_user(oset, &old_set, sizeof(*oset))) return -EFAULT; } - memset(&tmp, 0, sizeof(tmp)); - tmp.sa_handler = handler; - tmp.sa_flags = SA_ONESHOT | SA_NOMASK; + return 0; +} + +asmlinkage int +sys_sigpending(old_sigset_t *set) +{ + old_sigset_t pending; - spin_lock_irq(¤t->sig->siglock); - handler = current->sig->action[signum-1].sa_handler; - current->sig->action[signum-1] = tmp; - check_pending(signum); - spin_unlock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sigmask_lock); + pending = current->blocked.sig[0] & current->signal.sig[0]; + spin_unlock_irq(¤t->sigmask_lock); - return (unsigned long) handler; + return copy_to_user(set, &pending, sizeof(*set)); } -#endif -#ifndef __sparc__ -asmlinkage int sys_sigaction(int signum, const struct sigaction * action, - struct sigaction * oldaction) +asmlinkage int +sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact, + size_t sigsetsize) { - struct sigaction new_sa, *p; + struct k_sigaction new_sa, old_sa; + int ret; - if (signum < 1 || signum > 32) + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) return -EINVAL; - p = signum - 1 + current->sig->action; - - if (action) { - if (copy_from_user(&new_sa, action, sizeof(struct sigaction))) + if (act) { + if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) return -EFAULT; - if (signum==SIGKILL || signum==SIGSTOP) - return -EINVAL; } - if (oldaction) { - /* In the clone() case we could copy half consistant - * state to the user, however this could sleep and - * deadlock us if we held the signal lock on SMP. So for - * now I take the easy way out and do no locking. - */ - if (copy_to_user(oldaction, p, sizeof(struct sigaction))) + ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); + + if (!ret && oact) { + if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) return -EFAULT; } - if (action) { - spin_lock_irq(¤t->sig->siglock); - *p = new_sa; - check_pending(signum); - spin_unlock_irq(¤t->sig->siglock); - } - return 0; + return ret; } #endif + +#if !defined(__alpha__) +/* + * For backwards compatibility. Functionality superseded by sigprocmask. + */ +asmlinkage int +sys_sgetmask(void) +{ + /* SMP safe */ + return current->blocked.sig[0]; +} + +asmlinkage int +sys_ssetmask(int newmask) +{ + int old; + + spin_lock_irq(¤t->sigmask_lock); + old = current->blocked.sig[0]; + + siginitset(¤t->blocked, newmask & ~(sigmask(SIGKILL)| + sigmask(SIGSTOP))); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + return old; +} + +/* + * For backwards compatibility. Functionality superseded by sigaction. + */ +asmlinkage unsigned long +sys_signal(int sig, __sighandler_t handler) +{ + struct k_sigaction new_sa, old_sa; + int ret; + + new_sa.sa.sa_handler = handler; + new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; + + ret = do_sigaction(sig, &new_sa, &old_sa); + + return ret ? ret : (unsigned long)old_sa.sa.sa_handler; +} +#endif /* !alpha */ diff -u --recursive --new-file v2.1.67/linux/kernel/sys.c linux/kernel/sys.c --- v2.1.67/linux/kernel/sys.c Sat Oct 25 02:44:18 1997 +++ linux/kernel/sys.c Sun Nov 30 12:34:44 1997 @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -160,8 +159,6 @@ } -extern asmlinkage int sys_kill(int, int); - /* * Reboot system call: for obvious reasons only root may call it, * and even root needs to set up some magic numbers in the registers @@ -169,7 +166,6 @@ * You can also set the meaning of the ctrl-alt-del-key here. * * reboot doesn't sync: do that yourself before calling this. - * */ asmlinkage int sys_reboot(int magic1, int magic2, int cmd, void * arg) { @@ -322,127 +318,6 @@ return 0; } -static char acct_active = 0; -static struct file acct_file; - -int acct_process(long exitcode) -{ - struct acct ac; - unsigned long fs; - - if (acct_active) { - strncpy(ac.ac_comm, current->comm, ACCT_COMM); - ac.ac_comm[ACCT_COMM-1] = '\0'; - ac.ac_utime = current->times.tms_utime; - ac.ac_stime = current->times.tms_stime; - ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ)); - ac.ac_etime = CURRENT_TIME - ac.ac_btime; - ac.ac_uid = current->uid; - ac.ac_gid = current->gid; - ac.ac_tty = (current)->tty == NULL ? -1 : - kdev_t_to_nr(current->tty->device); - ac.ac_flag = 0; - if (current->flags & PF_FORKNOEXEC) - ac.ac_flag |= AFORK; - if (current->flags & PF_SUPERPRIV) - ac.ac_flag |= ASU; - if (current->flags & PF_DUMPCORE) - ac.ac_flag |= ACORE; - if (current->flags & PF_SIGNALED) - ac.ac_flag |= AXSIG; - ac.ac_minflt = current->min_flt; - ac.ac_majflt = current->maj_flt; - ac.ac_exitcode = exitcode; - - /* Kernel segment override */ - fs = get_fs(); - set_fs(KERNEL_DS); - - acct_file.f_op->write(&acct_file, (char *)&ac, sizeof(struct acct), - &acct_file.f_pos); - set_fs(fs); - } - return 0; -} - -asmlinkage int sys_acct(const char *name) -{ - int error = -EPERM; - - lock_kernel(); - if (!suser()) - goto out; - - if (name == (char *)0) { - if (acct_active) { - if (acct_file.f_op->release) - acct_file.f_op->release(acct_file.f_dentry->d_inode, &acct_file); - - if (acct_file.f_dentry != NULL) - dput(acct_file.f_dentry); - - acct_active = 0; - } - error = 0; - } else { - error = -EBUSY; - if (!acct_active) { - struct dentry *dentry; - struct inode *inode; - char *tmp; - - tmp = getname(name); - error = PTR_ERR(tmp); - if (IS_ERR(tmp)) - goto out; - - dentry = open_namei(tmp, O_RDWR, 0600); - putname(tmp); - - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out; - inode = dentry->d_inode; - - error = -EACCES; - if (!S_ISREG(inode->i_mode)) { - dput(dentry); - goto out; - } - - error = -EIO; - if (!inode->i_op || !inode->i_op->default_file_ops || - !inode->i_op->default_file_ops->write) { - dput(dentry); - goto out; - } - - acct_file.f_mode = 3; - acct_file.f_flags = 0; - acct_file.f_count = 1; - acct_file.f_dentry = dentry; - acct_file.f_pos = inode->i_size; - acct_file.f_reada = 0; - acct_file.f_op = inode->i_op->default_file_ops; - - if(acct_file.f_op->open) { - error = acct_file.f_op->open(inode, &acct_file); - if (error) { - dput(dentry); - goto out; - } - } - - acct_active = 1; - error = 0; - } - } -out: - unlock_kernel(); - return error; -} - - /* * Unprivileged users may change the real uid to the effective uid * or vice versa. (BSD-style) diff -u --recursive --new-file v2.1.67/linux/mm/memory.c linux/mm/memory.c --- v2.1.67/linux/mm/memory.c Wed Sep 3 20:52:44 1997 +++ linux/mm/memory.c Sun Nov 30 10:59:03 1997 @@ -79,9 +79,7 @@ void oom(struct task_struct * task) { printk("\nOut of memory for %s.\n", task->comm); - task->sig->action[SIGKILL-1].sa_handler = NULL; - task->blocked &= ~(1<<(SIGKILL-1)); - send_sig(SIGKILL,task,1); + force_sig(SIGKILL, task); } /* diff -u --recursive --new-file v2.1.67/linux/mm/vmscan.c linux/mm/vmscan.c --- v2.1.67/linux/mm/vmscan.c Sat Oct 25 02:44:18 1997 +++ linux/mm/vmscan.c Sun Nov 30 10:59:03 1997 @@ -426,7 +426,7 @@ current->session = 1; current->pgrp = 1; sprintf(current->comm, "kswapd"); - current->blocked = ~0UL; + sigfillset(¤t->blocked); /* * As a kernel thread we want to tamper with system buffers @@ -447,7 +447,7 @@ int fail; kswapd_awake = 0; - current->signal = 0; + flush_signals(current); run_task_queue(&tq_disk); interruptible_sleep_on(&kswapd_wait); kswapd_awake = 1; diff -u --recursive --new-file v2.1.67/linux/net/Config.in linux/net/Config.in --- v2.1.67/linux/net/Config.in Mon Nov 17 18:47:22 1997 +++ linux/net/Config.in Sun Nov 30 14:00:39 1997 @@ -3,9 +3,11 @@ # mainmenu_option next_comment comment 'Networking options' -bool 'Kernel/User network link driver' CONFIG_NETLINK +tristate 'Packet socket' CONFIG_PACKET +bool 'Kernel/User netlink socket' CONFIG_NETLINK if [ "$CONFIG_NETLINK" = "y" ]; then bool 'Routing messages' CONFIG_RTNETLINK + tristate 'Netlink device emulation' CONFIG_NETLINK_DEV fi bool 'Network firewalls' CONFIG_FIREWALL if [ "$CONFIG_FIREWALL" = "y" ]; then @@ -14,11 +16,15 @@ fi fi bool 'Network aliasing' CONFIG_NET_ALIAS +tristate 'BSD Unix domain sockets' CONFIG_UNIX bool 'TCP/IP networking' CONFIG_INET if [ "$CONFIG_INET" = "y" ]; then source net/ipv4/Config.in if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then tristate 'The IPv6 protocol (EXPERIMENTAL)' CONFIG_IPV6 + if [ "$CONFIG_IPV6" != "n" ]; then + source net/ipv6/Config.in + fi fi fi @@ -48,5 +54,17 @@ # bool 'Netbeui (EXPERIMENTAL)' CONFIG_NETBEUI # fi tristate 'WAN router' CONFIG_WAN_ROUTER + bool 'CPU is too slow to handle full bandwidth' CONFIG_CPU_IS_SLOW + bool 'QoS and/or fair queueing' CONFIG_NET_SCHED + if [ "$CONFIG_NET_SCHED" = "y" ]; then + tristate 'CBQ packet scheduler' CONFIG_NET_SCH_CBQ + tristate 'CSZ packet scheduler' CONFIG_NET_SCH_CSZ + tristate 'HFQ packet scheduler' CONFIG_NET_SCH_HFQ + tristate 'RED queueing discipline' CONFIG_NET_SCH_RED + tristate 'SFQ queueing discipline' CONFIG_NET_SCH_SFQ + tristate 'auxiliary TBF queue' CONFIG_NET_SCH_TBF + tristate 'auxiliary FIFO queue' CONFIG_NET_SCH_PFIFO + tristate 'auxiliary PRIO queue' CONFIG_NET_SCH_PRIO + fi fi endmenu diff -u --recursive --new-file v2.1.67/linux/net/Makefile linux/net/Makefile --- v2.1.67/linux/net/Makefile Thu Jul 17 10:06:09 1997 +++ linux/net/Makefile Sun Nov 30 14:00:39 1997 @@ -9,8 +9,8 @@ MOD_SUB_DIRS := ipv4 ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipv6 ipx unix appletalk \ - netrom rose lapb x25 wanrouter sunrpc #decnet -SUB_DIRS := core ethernet unix + netrom rose lapb x25 wanrouter netlink sched packet sunrpc #decnet +SUB_DIRS := core ethernet sched MOD_LIST_NAME := NET_MISC_MODULES ifeq ($(CONFIG_NET),y) @@ -21,6 +21,14 @@ SUB_DIRS += ipv4 endif +ifeq ($(CONFIG_UNIX),y) +SUB_DIRS += unix +else + ifeq ($(CONFIG_UNIX),m) + MOD_SUB_DIRS += unix + endif +endif + ifeq ($(CONFIG_IPV6),y) SUB_DIRS += ipv6 else @@ -29,6 +37,25 @@ endif endif +ifeq ($(CONFIG_NETLINK),y) +SUB_DIRS += netlink + ifeq ($(CONFIG_NETLINK_DEV),m) + MOD_SUB_DIRS += netlink + endif +endif + +ifeq ($(CONFIG_PACKET),y) +SUB_DIRS += packet +else + ifeq ($(CONFIG_PACKET),m) + MOD_SUB_DIRS += packet + endif +endif + +ifeq ($(CONFIG_NET_SCHED),y) + MOD_SUB_DIRS += sched +endif + ifeq ($(CONFIG_BRIDGE),y) SUB_DIRS += bridge endif @@ -133,33 +160,6 @@ ifeq ($(CONFIG_SYSCTL),y) L_OBJS += sysctl_net.o -endif - -CONFIG_NETLINK_BUILTIN := -CONFIG_NETLINK_MODULE := - -ifeq ($(CONFIG_NETLINK), y) - CONFIG_NETLINK_BUILTIN = y -endif - -ifeq ($(CONFIG_IPV6), y) - CONFIG_NETLINK_BUILTIN = y -endif - -ifeq ($(CONFIG_NETLINK), m) - CONFIG_NETLINK_MODULE = y -endif - -ifeq ($(CONFIG_IPV6), m) - CONFIG_NETLINK_MODULE = y -endif - -ifdef CONFIG_NETLINK_BUILTIN -L_OBJS += netlink.o -else - ifdef CONFIG_NETLINK_MODULE - M_OBJS += netlink.o - endif endif include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.67/linux/net/ax25/af_ax25.c linux/net/ax25/af_ax25.c --- v2.1.67/linux/net/ax25/af_ax25.c Wed Sep 24 20:05:48 1997 +++ linux/net/ax25/af_ax25.c Sun Nov 30 14:00:39 1997 @@ -1412,7 +1412,6 @@ /* Datagram frames go straight out of the door as UI */ skb->dev = sk->protinfo.ax25->ax25_dev->dev; - skb->priority = SOPRI_NORMAL; ax25_queue_xmit(skb); diff -u --recursive --new-file v2.1.67/linux/net/ax25/ax25_ds_subr.c linux/net/ax25/ax25_ds_subr.c --- v2.1.67/linux/net/ax25/ax25_ds_subr.c Mon Jul 7 08:19:59 1997 +++ linux/net/ax25/ax25_ds_subr.c Sun Nov 30 14:00:39 1997 @@ -154,7 +154,6 @@ skb->arp = 1; skb->dev = ax25_dev->dev; - skb->priority = SOPRI_NORMAL; skb->protocol = htons(ETH_P_AX25); dev_queue_xmit(skb); diff -u --recursive --new-file v2.1.67/linux/net/ax25/ax25_ip.c linux/net/ax25/ax25_ip.c --- v2.1.67/linux/net/ax25/ax25_ip.c Mon Aug 4 16:25:40 1997 +++ linux/net/ax25/ax25_ip.c Sun Nov 30 14:00:39 1997 @@ -177,7 +177,6 @@ } skb->dev = dev; - skb->priority = SOPRI_NORMAL; ax25_queue_xmit(skb); diff -u --recursive --new-file v2.1.67/linux/net/ax25/ax25_out.c linux/net/ax25/ax25_out.c --- v2.1.67/linux/net/ax25/ax25_out.c Mon Jul 7 08:19:59 1997 +++ linux/net/ax25/ax25_out.c Sun Nov 30 14:00:39 1997 @@ -58,8 +58,16 @@ ax25_dev *ax25_dev; ax25_cb *ax25; - if (skb == NULL) - return 0; + /* + * Take the default packet length for the device if zero is + * specified. + */ + if (paclen == 0) { + if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) + return NULL; + + paclen = ax25_dev->values[AX25_VALUES_PACLEN]; + } /* * Look for an existing connection. @@ -339,7 +347,6 @@ ax25_addr_build(ptr, &ax25->source_addr, &ax25->dest_addr, ax25->digipeat, type, ax25->modulus); skb->dev = ax25->ax25_dev->dev; - skb->priority = SOPRI_NORMAL; ax25_queue_xmit(skb); } diff -u --recursive --new-file v2.1.67/linux/net/ax25/ax25_subr.c linux/net/ax25/ax25_subr.c --- v2.1.67/linux/net/ax25/ax25_subr.c Mon Jul 7 08:19:59 1997 +++ linux/net/ax25/ax25_subr.c Sun Nov 30 14:00:39 1997 @@ -252,7 +252,6 @@ dptr += ax25_addr_build(dptr, dest, src, &retdigi, AX25_RESPONSE, AX25_MODULUS); skb->dev = dev; - skb->priority = SOPRI_NORMAL; ax25_queue_xmit(skb); } diff -u --recursive --new-file v2.1.67/linux/net/core/Makefile linux/net/core/Makefile --- v2.1.67/linux/net/core/Makefile Mon Apr 7 11:35:32 1997 +++ linux/net/core/Makefile Sun Nov 30 14:00:39 1997 @@ -1,5 +1,5 @@ # -# Makefile for the Linux TCP/IP (INET) layer. +# Makefile for the Linux networking core. # # Note! Dependencies are done automagically by 'make dep', which also # removes any old dependencies. DON'T put your own dependencies here @@ -10,7 +10,7 @@ O_TARGET := core.o O_OBJS := sock.o skbuff.o iovec.o datagram.o dst.o scm.o \ - neighbour.o + neighbour.o rtnetlink.o ifeq ($(CONFIG_SYSCTL),y) O_OBJS += sysctl_net_core.o @@ -22,10 +22,6 @@ ifdef CONFIG_FIREWALL OX_OBJS += firewall.o -endif - -ifdef CONFIG_NET_ALIAS -O_OBJS += net_alias.o endif endif diff -u --recursive --new-file v2.1.67/linux/net/core/dev.c linux/net/core/dev.c --- v2.1.67/linux/net/core/dev.c Tue Sep 23 16:48:50 1997 +++ linux/net/core/dev.c Sun Nov 30 14:00:39 1997 @@ -15,6 +15,7 @@ * Florian la Roche * Alan Cox * David Hinds + * Alexey Kuznetsov * * Changes: * Alan Cox : device private ioctl copies fields back. @@ -61,24 +62,20 @@ #include #include #include -#include #include #include #include -#include #include #include #include -#include -#include #include #include -#include +#include #include #include #include #include -#include +#include #include #ifdef CONFIG_KERNELD #include @@ -90,6 +87,7 @@ extern int plip_init(void); #endif + const char *if_port_text[] = { "unknown", "BNC", @@ -101,12 +99,6 @@ }; /* - * The list of devices, that are able to output. - */ - -static struct device *dev_up_base; - -/* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. * @@ -130,16 +122,17 @@ struct packet_type *ptype_all = NULL; /* Taps */ /* - * Device list lock + * Device list lock. Setting it provides that interface + * will not disappear unexpectedly while kernel sleeps. */ atomic_t dev_lockct = ATOMIC_INIT(0); - + /* * Our notifier list */ -struct notifier_block *netdev_chain=NULL; +static struct notifier_block *netdev_chain=NULL; /* * Device drivers call our routines to queue packets here. We empty the @@ -148,14 +141,6 @@ static struct sk_buff_head backlog; -/* - * We don't overdo the queue or we will thrash memory badly. - */ - -static int backlog_size = 0; - - - /****************************************************************************************** Protocol management and registration routines @@ -166,7 +151,7 @@ * For efficiency */ -static int dev_nit=0; +int netdev_nit=0; /* * Add a protocol ID to the list. Now that the input handler is @@ -179,7 +164,7 @@ int hash; if(pt->type==htons(ETH_P_ALL)) { - dev_nit++; + netdev_nit++; pt->next=ptype_all; ptype_all=pt; } @@ -201,7 +186,7 @@ struct packet_type **pt1; if(pt->type==htons(ETH_P_ALL)) { - dev_nit--; + netdev_nit--; pt1=&ptype_all; } else @@ -258,7 +243,6 @@ for (dev = dev_base; dev != NULL; dev = dev->next) { if (dev->type == type && - !(dev->flags&(IFF_LOOPBACK|IFF_NOARP)) && memcmp(dev->dev_addr, ha, dev->addr_len) == 0) return(dev); } @@ -312,19 +296,20 @@ void dev_load(const char *name) { - if(!dev_get(name)) { -#ifdef CONFIG_NET_ALIAS - const char *sptr; - - for (sptr=name ; *sptr ; sptr++) if(*sptr==':') break; - if (!(*sptr && *(sptr+1))) -#endif + if(!dev_get(name)) request_module(name); - } } #endif - + +static int +default_rebuild_header(struct sk_buff *skb) +{ + printk(KERN_DEBUG "%s: !skb->arp & !rebuild_header -- BUG!\n", skb->dev->name); + kfree_skb(skb, FREE_WRITE); + return 1; +} + /* * Prepare an interface for use. */ @@ -334,6 +319,13 @@ int ret = 0; /* + * Is it already up? + */ + + if (dev->flags&IFF_UP) + return 0; + + /* * Call device private open method */ @@ -341,29 +333,39 @@ ret = dev->open(dev); /* - * If it went open OK then set the flags + * If it went open OK then: */ if (ret == 0) { + /* + * nil rebuild_header routine, + * that should be never called and used as just bug trap. + */ + + if (dev->rebuild_header == NULL) + dev->rebuild_header = default_rebuild_header; + + /* + * Set the flags. + */ dev->flags |= (IFF_UP | IFF_RUNNING); + /* - * Initialise multicasting status + * Initialize multicasting status */ dev_mc_upload(dev); - notifier_call_chain(&netdev_chain, NETDEV_UP, dev); - + /* - * Passive non transmitting devices (including - * aliases) need not be on this chain. + * Wakeup transmit queue engine */ - if (!net_alias_is(dev) && dev->tx_queue_len) - { - cli(); - dev->next_up = dev_up_base; - dev_up_base = dev; - sti(); - } + dev_activate(dev); + + /* + * ... and announce new interface. + */ + notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + } return(ret); } @@ -375,17 +377,24 @@ int dev_close(struct device *dev) { - int ct=0; - struct device **devp; + if (!(dev->flags&IFF_UP)) + return 0; + + dev_deactivate(dev); + + dev_lock_wait(); /* * Call the device specific close. This cannot fail. * Only if device is UP */ - if ((dev->flags & IFF_UP) && dev->stop) + if (dev->stop) dev->stop(dev); + if (dev->start) + printk("dev_close: bug %s still running\n", dev->name); + /* * Device is now down. */ @@ -397,36 +406,7 @@ */ notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); - /* - * Flush the multicast chain - */ - dev_mc_discard(dev); - - /* - * Purge any queued packets when we down the link - */ - while(ctbuffs[ct]))!=NULL) - kfree_skb(skb,FREE_WRITE); - ct++; - } - /* - * The device is no longer up. Drop it from the list. - */ - - devp = &dev_up_base; - while (*devp) - { - if (*devp == dev) - { - *devp = dev->next_up; - break; - } - devp = &(*devp)->next_up; - } return(0); } @@ -451,7 +431,7 @@ * taps currently in use. */ -static void queue_xmit_nit(struct sk_buff *skb, struct device *dev) +void dev_queue_xmit_nit(struct sk_buff *skb, struct device *dev) { struct packet_type *ptype; get_fast_time(&skb->stamp); @@ -467,180 +447,111 @@ struct sk_buff *skb2; if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) break; - skb2->mac.raw = skb2->data; - skb2->nh.raw = skb2->h.raw = skb2->data + dev->hard_header_len; - ptype->func(skb2, skb->dev, ptype); - } - } -} - -/* - * Send (or queue for sending) a packet. - * - * IMPORTANT: When this is called to resend frames. The caller MUST - * already have locked the sk_buff. Apart from that we do the - * rest of the magic. - */ - -static void do_dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri) -{ - unsigned long flags; - struct sk_buff_head *list; - int retransmission = 0; /* used to say if the packet should go */ - /* at the front or the back of the */ - /* queue - front is a retransmit try */ - /* - * Negative priority is used to flag a frame that is being pulled from the - * queue front as a retransmit attempt. It therefore goes back on the queue - * start on a failure. - */ - - if (pri < 0) - { - pri = -pri-1; - retransmission = 1; - } + /* Code, following below is wrong. -#ifdef CONFIG_NET_DEBUG - if (pri >= DEV_NUMBUFFS) - { - printk(KERN_WARNING "bad priority in do_dev_queue_xmit.\n"); - pri = 1; - } -#endif - - /* - * If we are bridging and this is directly generated output - * pass the frame via the bridge. - */ - -#ifdef CONFIG_BRIDGE - if(skb->pkt_bridged!=IS_BRIDGED && br_stats.flags & BR_UP) - { - if(br_tx_frame(skb)) - return; - } -#endif - - list = dev->buffs + pri; - - save_flags(flags); + The only reason, why it does work is that + ONLY packet sockets receive outgoing + packets. If such a packet will be (occasionally) + received by normal packet handler, which expects + that mac header is pulled... + */ - /* - * If this isn't a retransmission, use the first packet instead. - * Note: We don't do strict priority ordering here. We will in - * fact kick the queue that is our priority. The dev_tint reload - * does strict priority queueing. In effect what we are doing here - * is to add some random jitter to the queues and to do so by - * saving clocks. Doing a perfect priority queue isn't a good idea - * as you get some fascinating timing interactions. - */ + /* More sensible variant. skb->nh should be correctly + set by sender, so that the second statement is + just protection against buggy protocols. + */ + skb2->mac.raw = skb2->data; - if (!retransmission) - { - /* avoid overrunning the device queue.. */ - if (skb_queue_len(list) > dev->tx_queue_len) - { - dev_kfree_skb(skb, FREE_WRITE); - return; - } + if (skb2->nh.raw < skb2->data || skb2->nh.raw >= skb2->tail) { + if (net_ratelimit()) + printk(KERN_DEBUG "protocol %04x is buggy, dev %s\n", skb2->protocol, dev->name); + skb2->nh.raw = skb2->data; + if (dev->hard_header) + skb2->nh.raw += dev->hard_header_len; + } - /* copy outgoing packets to any sniffer packet handlers */ - if (dev_nit) - queue_xmit_nit(skb,dev); - - if (skb_queue_len(list)) { - cli(); - __skb_queue_tail(list, skb); - skb = __skb_dequeue(list); - restore_flags(flags); + skb2->h.raw = skb2->nh.raw; + skb2->pkt_type = PACKET_OUTGOING; + ptype->func(skb2, skb->dev, ptype); } } - if (dev->hard_start_xmit(skb, dev) == 0) { - /* - * Packet is now solely the responsibility of the driver - */ - return; - } - - /* - * Transmission failed, put skb back into a list. Once on the list it's safe and - * no longer device locked (it can be freed safely from the device queue) - */ - cli(); - __skb_queue_head(list,skb); - restore_flags(flags); } /* - * Entry point for transmitting frames. + * Fast path for loopback frames. */ +void dev_loopback_xmit(struct sk_buff *skb) +{ + struct sk_buff *newskb=skb_clone(skb, GFP_ATOMIC); + if (newskb==NULL) + return; + + skb_pull(newskb, newskb->nh.raw - newskb->data); + newskb->ip_summed = CHECKSUM_UNNECESSARY; + if (newskb->dst==NULL) + printk(KERN_DEBUG "BUG: packet without dst looped back 1\n"); + netif_rx(newskb); +} + int dev_queue_xmit(struct sk_buff *skb) { struct device *dev = skb->dev; - - start_bh_atomic(); + struct Qdisc *q; /* * If the address has not been resolved. Call the device header rebuilder. * This can cover all protocols and technically not just ARP either. - */ - - if (!skb->arp) - { - /* - * FIXME: we should make the printk for no rebuild - * header a default rebuild_header routine and drop - * this call. Similarly we should make hard_header - * have a default NULL operation not check conditions. - */ - if (dev->rebuild_header) - { - if (dev->rebuild_header(skb)) - { - end_bh_atomic(); - return 0; - } - } - else - printk(KERN_DEBUG "%s: !skb->arp & !rebuild_header!\n", dev->name); - } - - /* - * - * If dev is an alias, switch to its main device. - * "arp" resolution has been made with alias device, so - * arp entries refer to alias, not main. * + * This call must be moved to protocol layer. + * Now it works only for IPv6 and for IPv4 in + * some unusual curcumstances (eql device). --ANK */ + + if (!skb->arp && dev->rebuild_header(skb)) + return 0; - if (net_alias_is(dev)) - skb->dev = dev = net_alias_main_dev(dev); - - do_dev_queue_xmit(skb, dev, skb->priority); - end_bh_atomic(); + q = dev->qdisc; + if (q->enqueue) { + start_bh_atomic(); + q->enqueue(skb, q); + qdisc_wakeup(dev); + end_bh_atomic(); + return 0; + } + + /* The device has no queue. Common case for software devices: + loopback, all the sorts of tunnels... + + Really, it is unlikely that bh protection is necessary here: + virtual devices do not generate EOI events. + However, it is possible, that they rely on bh protection + made by us here. + */ + if (dev->flags&IFF_UP) { + start_bh_atomic(); + if (netdev_nit) + dev_queue_xmit_nit(skb,dev); + if (dev->hard_start_xmit(skb, dev) == 0) { + end_bh_atomic(); + return 0; + } + if (net_ratelimit()) + printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name); + end_bh_atomic(); + } + kfree_skb(skb, FREE_WRITE); return 0; } -/* - * Fast path for loopback frames. - */ - -void dev_loopback_xmit(struct sk_buff *skb) -{ - struct sk_buff *newskb=skb_clone(skb, GFP_ATOMIC); - if (newskb==NULL) - return; - skb_pull(newskb, newskb->nh.raw - newskb->data); - newskb->ip_summed = CHECKSUM_UNNECESSARY; - if (newskb->dst==NULL) - printk(KERN_DEBUG "BUG: packet without dst looped back 1\n"); - netif_rx(newskb); -} +/*======================================================================= + Receiver rotutines + =======================================================================*/ +int netdev_dropping = 0; +atomic_t netdev_rx_dropped; /* * Receive a packet from a device driver and queue it for the upper @@ -649,15 +560,6 @@ void netif_rx(struct sk_buff *skb) { - static int dropping = 0; - - /* - * Any received buffers are un-owned and should be discarded - * when freed. These will be updated later as the frames get - * owners. - */ - - skb->sk = NULL; if(skb->stamp.tv_sec==0) get_fast_time(&skb->stamp); @@ -665,13 +567,14 @@ * Check that we aren't overdoing things. */ - if (!backlog_size) - dropping = 0; - else if (backlog_size > 300) - dropping = 1; + if (!backlog.qlen) + netdev_dropping = 0; + else if (backlog.qlen > 300) + netdev_dropping = 1; - if (dropping) + if (netdev_dropping) { + atomic_inc(&netdev_rx_dropped); kfree_skb(skb, FREE_READ); return; } @@ -681,7 +584,6 @@ */ skb_queue_tail(&backlog,skb); - backlog_size++; /* * If any packet arrived, mark it for processing after the @@ -692,32 +594,37 @@ return; } -/* - * This routine causes all interfaces to try to send some data. - */ - -static void dev_transmit(void) +#ifdef CONFIG_BRIDGE +static inline void handle_bridge(struct skbuff *skb, unsigned short type) { - struct device *dev; - - for (dev = dev_up_base; dev != NULL; dev = dev->next_up) + if (br_stats.flags & BR_UP && br_protocol_ok(ntohs(type))) { - if (dev->flags != 0 && !dev->tbusy) + /* + * We pass the bridge a complete frame. This means + * recovering the MAC header first. + */ + + int offset=skb->data-skb->mac.raw; + cli(); + skb_push(skb,offset); /* Put header back on for bridge */ + if(br_receive_frame(skb)) { - /* - * Kick the device - */ - dev_tint(dev); + sti(); + continue; } + /* + * Pull the MAC header off for the copy going to + * the upper layers. + */ + skb_pull(skb,offset); + sti(); } } +#endif - -/********************************************************************************** - - Receive Queue Processor - -***********************************************************************************/ +#ifdef CONFIG_CPU_IS_SLOW +int net_cpu_congestion; +#endif /* * When we are called the queue is ready to grab, the interrupts are @@ -732,7 +639,15 @@ struct packet_type *ptype; struct packet_type *pt_prev; unsigned short type; - int nit = 301; + unsigned long start_time = jiffies; +#ifdef CONFIG_CPU_IS_SLOW + static unsigned long start_busy = 0; + static unsigned long ave_busy = 0; + + if (start_busy == 0) + start_busy = start_time; + net_cpu_congestion = ave_busy>>8; +#endif /* * Can we send anything now? We want to clear the @@ -741,7 +656,8 @@ * latency on a transmit interrupt bh. */ - dev_transmit(); + if (qdisc_head.forw != &qdisc_head) + qdisc_run_queues(); /* * Any data left to process. This may occur because a @@ -761,55 +677,43 @@ { struct sk_buff * skb = backlog.next; + if (jiffies - start_time > 1) { + /* Give chance to other bottom halves to run */ + mark_bh(NET_BH); + return; + } + /* * We have a packet. Therefore the queue has shrunk */ cli(); __skb_unlink(skb, &backlog); - backlog_size--; sti(); - /* - * We do not want to spin in net_bh infinitely. --ANK - */ - if (--nit <= 0) - { - if (nit == 0) - printk(KERN_WARNING "net_bh: too many loops, dropping...\n"); +#ifdef CONFIG_CPU_IS_SLOW + if (ave_busy > 128*16) { kfree_skb(skb, FREE_WRITE); - continue; + while ((skb = skb_dequeue(&backlog)) != NULL) + kfree_skb(skb, FREE_WRITE); + break; } +#endif + -#ifdef CONFIG_BRIDGE + /* + * Fetch the packet protocol ID. + */ + + type = skb->protocol; + +#ifdef CONFIG_BRIDGE /* * If we are bridging then pass the frame up to the * bridging code (if this protocol is to be bridged). * If it is bridged then move on */ - - if (br_stats.flags & BR_UP && br_protocol_ok(ntohs(skb->protocol))) - { - /* - * We pass the bridge a complete frame. This means - * recovering the MAC header first. - */ - - int offset=skb->data-skb->mac.raw; - cli(); - skb_push(skb,offset); /* Put header back on for bridge */ - if(br_receive_frame(skb)) - { - sti(); - continue; - } - /* - * Pull the MAC header off for the copy going to - * the upper layers. - */ - skb_pull(skb,offset); - sti(); - } + handle_bridge(skb, type); #endif /* @@ -823,12 +727,6 @@ skb->h.raw = skb->nh.raw = skb->data; /* - * Fetch the packet protocol ID. - */ - - type = skb->protocol; - - /* * We got a packet ID. Now loop over the "known protocols" * list. There are two lists. The ptype_all list of taps (normally empty) * and the main protocol list which is hashed perfectly for normal protocols. @@ -837,15 +735,17 @@ pt_prev = NULL; for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next) { - if(pt_prev) - { - struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); - if(skb2) - pt_prev->func(skb2,skb->dev, pt_prev); + if (!ptype->dev || ptype->dev == skb->dev) { + if(pt_prev) + { + struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); + if(skb2) + pt_prev->func(skb2,skb->dev, pt_prev); + } + pt_prev=ptype; } - pt_prev=ptype; } - + for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next) { if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev)) @@ -872,7 +772,7 @@ pt_prev=ptype; } } /* End of protocol list loop */ - + /* * Is there a last item to send to ? */ @@ -883,16 +783,9 @@ * Has an unknown packet has been received ? */ - else + else { kfree_skb(skb, FREE_WRITE); - /* - * Again, see if we can transmit anything now. - * [Ought to take this out judging by tests it slows - * us down not speeds us up] - */ -#ifdef XMIT_EVERY - dev_transmit(); -#endif + } } /* End of queue loop */ /* @@ -903,64 +796,47 @@ * One last output flush. */ - dev_transmit(); + if (qdisc_head.forw != &qdisc_head) + qdisc_run_queues(); + +#ifdef CONFIG_CPU_IS_SLOW +{ + unsigned long start_idle = jiffies; + ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4); + start_busy = 0; +} +#endif } +/* Protocol dependent address dumping routines */ -/* - * This routine is called when an device driver (i.e. an - * interface) is ready to transmit a packet. - */ - -void dev_tint(struct device *dev) -{ - int i; - unsigned long flags; - struct sk_buff_head * head; - - /* - * aliases do not transmit (for now :) ) - */ +static int (*gifconf[NPROTO])(struct device *dev, char *bufptr, int len); - if (net_alias_is(dev)) { - printk(KERN_DEBUG "net alias %s transmits\n", dev->name); - return; - } +int register_gifconf(int family, int (*func)(struct device *dev, char *bufptr, int len)) +{ + if (family<0 || family>=NPROTO) + return -EINVAL; + gifconf[family] = func; + return 0; +} - head = dev->buffs; - save_flags(flags); - cli(); - /* - * Work the queues in priority order - */ - for(i = 0;i < DEV_NUMBUFFS; i++,head++) - { +/* + This ioctl is wrong by design. It really existed in some + old SYSV systems, only was named SIOCGIFNUM. + In multiprotocol environment it is just useless. + Well, SIOCGIFCONF is wrong too, but we have to preserve + it by compatibility reasons. + + If someone wants to achieve the same effect, please, use undocumented + feature of SIOCGIFCONF: it returns buffer length, if buffer + is not supplied. - while (!skb_queue_empty(head)) { - struct sk_buff *skb; + Let's remove it, until someone started to use it. --ANK - skb = head->next; - __skb_unlink(skb, head); - /* - * Stop anyone freeing the buffer while we retransmit it - */ - restore_flags(flags); - /* - * Feed them to the output stage and if it fails - * indicate they re-queue at the front. - */ - do_dev_queue_xmit(skb,dev,-i - 1); - /* - * If we can take no more then stop here. - */ - if (dev->tbusy) - return; - cli(); - } - } - restore_flags(flags); -} + In any case, if someone cannot live without it, it should + be renamed to SIOCGIFNUM. + */ /* @@ -970,20 +846,26 @@ static int dev_ifcount(unsigned int *arg) { struct device *dev; - int err; unsigned int count = 0; for (dev = dev_base; dev != NULL; dev = dev->next) count++; - err = copy_to_user(arg, &count, sizeof(unsigned int)); - if (err) - return -EFAULT; - return 0; + return put_user(count, arg); } /* - * Map an interface index to its name (SIOGIFNAME) + * Map an interface index to its name (SIOCGIFNAME) + */ + +/* + * This call is useful, but I'd remove it too. + * + * The reason is purely aestetical, it is the only call + * from SIOC* family using struct ifreq in reversed manner. + * Besides that, it is pretty silly to put "drawing" facility + * to kernel, it is useful only to print ifindices + * in readable form, is not it? --ANK */ static int dev_ifname(struct ifreq *arg) @@ -1019,7 +901,6 @@ static int dev_ifconf(char *arg) { struct ifconf ifc; - struct ifreq ifr; struct device *dev; char *pos; unsigned int len; @@ -1031,68 +912,51 @@ err = copy_from_user(&ifc, arg, sizeof(struct ifconf)); if (err) - return -EFAULT; - len = ifc.ifc_len; + return -EFAULT; + pos = ifc.ifc_buf; + if (pos==NULL) + ifc.ifc_len=0; + len = ifc.ifc_len; /* - * We now walk the device list filling each active device - * into the array. - */ - - /* * Loop over the interfaces, and write an info block for each. */ - - dev_lock_wait(); - dev_lock_list(); - for (dev = dev_base; dev != NULL; dev = dev->next) - { - /* - * Have we run out of space here ? - */ - - if (len < sizeof(struct ifreq)) - break; + for (dev = dev_base; dev != NULL; dev = dev->next) { + int i; + for (i=0; iname); - (*(struct sockaddr_in *) &ifr.ifr_addr).sin_family = dev->family; - (*(struct sockaddr_in *) &ifr.ifr_addr).sin_addr.s_addr = dev->pa_addr; + if (gifconf[i] == NULL) + continue; + done = gifconf[i](dev, pos, len); - /* - * Write this block to the caller's space. - */ - - err = copy_to_user(pos, &ifr, sizeof(struct ifreq)); - if (err) - return -EFAULT; - pos += sizeof(struct ifreq); - len -= sizeof(struct ifreq); + if (done<0) + return -EFAULT; + + len -= done; + if (pos) + pos += done; + } } - dev_unlock_list(); - /* * All done. Write the updated control block back to the caller. */ - - ifc.ifc_len = (pos - ifc.ifc_buf); - ifc.ifc_req = (struct ifreq *) ifc.ifc_buf; - err = copy_to_user(arg, &ifc, sizeof(struct ifconf)); - if (err) + ifc.ifc_len -= len; + + if (copy_to_user(arg, &ifc, sizeof(struct ifconf))) return -EFAULT; /* * Report how much was filled in */ - return(pos - arg); + return ifc.ifc_len; } - /* * This is invoked by the /proc filesystem handler to display a device * in detail. @@ -1105,7 +969,7 @@ int size; if (stats) - size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %4lu %8lu %8lu %4lu %4lu %4lu %5lu %4lu\n", + size = sprintf(buffer, "%6s:%8lu %7lu %4lu %4lu %4lu %4lu %8lu %8lu %4lu %4lu %4lu %5lu %4lu %4lu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, @@ -1117,7 +981,8 @@ stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, stats->tx_carrier_errors + stats->tx_aborted_errors - + stats->tx_window_errors + stats->tx_heartbeat_errors); + + stats->tx_window_errors + stats->tx_heartbeat_errors, + stats->multicast); else size = sprintf(buffer, "%6s: No statistics available.\n", dev->name); @@ -1252,272 +1117,218 @@ #endif /* CONFIG_PROC_FS */ #endif /* CONFIG_NET_RADIO */ +void dev_set_promiscuity(struct device *dev, int inc) +{ + unsigned short old_flags = dev->flags; -/* - * Perform the SIOCxIFxxx calls. - * - * The socket layer has seen an ioctl the address family thinks is - * for the device. At this point we get invoked to make a decision - */ - -static int dev_ifsioc(void *arg, unsigned int getset) + dev->flags |= IFF_PROMISC; + if ((dev->promiscuity += inc) == 0) + dev->flags &= ~IFF_PROMISC; + if (dev->flags^old_flags) { + dev_mc_upload(dev); + printk(KERN_INFO "device %s %s promiscuous mode\n", + dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "leaved"); + } +} + +void dev_set_allmulti(struct device *dev, int inc) { - struct ifreq ifr; - struct device *dev; - int ret, err; + unsigned short old_flags = dev->flags; + + dev->flags |= IFF_ALLMULTI; + if ((dev->allmulti += inc) == 0) + dev->flags &= ~IFF_ALLMULTI; + if (dev->flags^old_flags) + dev_mc_upload(dev); +} + +int dev_change_flags(struct device *dev, unsigned flags) +{ + int ret; + int old_flags = dev->flags; /* - * Fetch the caller's info block into kernel space + * Set the flags on our device. */ - - err = copy_from_user(&ifr, arg, sizeof(struct ifreq)); - if (err) - return -EFAULT; + + dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_RUNNING|IFF_NOARP| + IFF_SLAVE|IFF_MASTER| + IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | + (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC)); /* - * See which interface the caller is talking about. - */ - + * Load in the correct multicast list now the flags have changed. + */ + + dev_mc_upload(dev); + /* - * - * net_alias_dev_get(): dev_get() with added alias naming magic. - * only allow alias creation/deletion if (getset==SIOCSIFADDR) - * + * Have we downed the interface. We handle IFF_UP ourselves + * according to user attempts to set it, rather than blindly + * setting it. */ - -#ifdef CONFIG_KERNELD - dev_load(ifr.ifr_name); -#endif -#ifdef CONFIG_NET_ALIAS - if ((dev = net_alias_dev_get(ifr.ifr_name, getset == SIOCSIFADDR, &err, NULL, NULL)) == NULL) - return(err); -#else - if ((dev = dev_get(ifr.ifr_name)) == NULL) - return(-ENODEV); -#endif - switch(getset) + ret = 0; + if ((old_flags^flags)&IFF_UP) /* Bit is different ? */ { - case SIOCGIFFLAGS: /* Get interface flags */ - ifr.ifr_flags = dev->flags; - goto rarok; + if(old_flags&IFF_UP) /* Gone down */ + ret=dev_close(dev); + else /* Come up */ + ret=dev_open(dev); - case SIOCSIFFLAGS: /* Set interface flags */ - { - int old_flags = dev->flags; - - /* - * We are not allowed to potentially close/unload - * a device until we get this lock. - */ - - dev_lock_wait(); - dev_lock_list(); - - /* - * Set the flags on our device. - */ - - dev->flags = (ifr.ifr_flags & ( - IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK | IFF_PORTSEL | - IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING | IFF_AUTOMEDIA | - IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE | IFF_MASTER - | IFF_MULTICAST)) | (dev->flags & IFF_UP); - /* - * Load in the correct multicast list now the flags have changed. - */ + if (ret == 0) + dev_mc_upload(dev); + } - dev_mc_upload(dev); + if (dev->flags&IFF_UP && + ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC|IFF_VOLATILE))) { + printk(KERN_DEBUG "SIFFL %s(%s)\n", dev->name, current->comm); + notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + } - /* - * Have we downed the interface. We handle IFF_UP ourselves - * according to user attempts to set it, rather than blindly - * setting it. - */ - - if ((old_flags^ifr.ifr_flags)&IFF_UP) /* Bit is different ? */ - { - if(old_flags&IFF_UP) /* Gone down */ - ret=dev_close(dev); - else /* Come up */ - { - ret=dev_open(dev); - if(ret<0) - dev->flags&=~IFF_UP; /* Open failed */ - } - } - else - ret=0; - /* - * Load in the correct multicast list now the flags have changed. - */ + if ((flags^dev->gflags)&IFF_PROMISC) { + int inc = (flags&IFF_PROMISC) ? +1 : -1; + dev->gflags ^= IFF_PROMISC; + dev_set_promiscuity(dev, inc); + } - dev_mc_upload(dev); - if ((dev->flags&IFF_UP) && ((old_flags^dev->flags)&~(IFF_UP|IFF_RUNNING|IFF_PROMISC))) - { - printk(KERN_DEBUG "SIFFL %s(%s)\n", dev->name, current->comm); - notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); - } - if ((dev->flags^old_flags)&IFF_PROMISC) { - if (dev->flags&IFF_PROMISC) - printk(KERN_INFO "%s enters promiscuous mode.\n", dev->name); - else - printk(KERN_INFO "%s leave promiscuous mode.\n", dev->name); - } - dev_unlock_list(); - } - break; + return ret; +} + +/* + * Perform the SIOCxIFxxx calls. + */ + +static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +{ + struct device *dev; + int err; + + if ((dev = dev_get(ifr->ifr_name)) == NULL) + return -ENODEV; + + switch(cmd) + { + case SIOCGIFFLAGS: /* Get interface flags */ + ifr->ifr_flags = (dev->flags&~IFF_PROMISC)|(dev->gflags&IFF_PROMISC); + return 0; + + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ - - ifr.ifr_metric = dev->metric; - goto rarok; + ifr->ifr_metric = dev->metric; + return 0; case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ - dev->metric = ifr.ifr_metric; - ret=0; - break; + dev->metric = ifr->ifr_metric; + return 0; case SIOCGIFMTU: /* Get the MTU of a device */ - ifr.ifr_mtu = dev->mtu; - goto rarok; + ifr->ifr_mtu = dev->mtu; + return 0; case SIOCSIFMTU: /* Set the MTU of a device */ - - if (ifr.ifr_mtu == dev->mtu) { - ret = 0; - break; - } + if (ifr->ifr_mtu == dev->mtu) + return 0; /* * MTU must be positive. */ - if(ifr.ifr_mtu<68) + if (ifr->ifr_mtu<0) return -EINVAL; if (dev->change_mtu) - ret = dev->change_mtu(dev, ifr.ifr_mtu); - else - { - dev->mtu = ifr.ifr_mtu; - ret = 0; + err = dev->change_mtu(dev, ifr->ifr_mtu); + else { + dev->mtu = ifr->ifr_mtu; + err = 0; } - if (!ret && dev->flags&IFF_UP) { + if (!err && dev->flags&IFF_UP) { printk(KERN_DEBUG "SIFMTU %s(%s)\n", dev->name, current->comm); notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev); } - break; - - case SIOCGIFMEM: /* Get the per device memory space. We can add this but currently - do not support it */ - ret = -EINVAL; - break; - - case SIOCSIFMEM: /* Set the per device memory buffer space. Not applicable in our case */ - ret = -EINVAL; - break; + return err; case SIOCGIFHWADDR: - memcpy(ifr.ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); - ifr.ifr_hwaddr.sa_family=dev->type; - goto rarok; + memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); + ifr->ifr_hwaddr.sa_family=dev->type; + return 0; case SIOCSIFHWADDR: if(dev->set_mac_address==NULL) return -EOPNOTSUPP; - if(ifr.ifr_hwaddr.sa_family!=dev->type) + if(ifr->ifr_hwaddr.sa_family!=dev->type) return -EINVAL; - ret=dev->set_mac_address(dev,&ifr.ifr_hwaddr); - if (!ret) + err=dev->set_mac_address(dev,&ifr->ifr_hwaddr); + if (!err) notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); - break; + return err; + case SIOCSIFHWBROADCAST: + if(ifr->ifr_hwaddr.sa_family!=dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN); + notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); + return 0; + case SIOCGIFMAP: - ifr.ifr_map.mem_start=dev->mem_start; - ifr.ifr_map.mem_end=dev->mem_end; - ifr.ifr_map.base_addr=dev->base_addr; - ifr.ifr_map.irq=dev->irq; - ifr.ifr_map.dma=dev->dma; - ifr.ifr_map.port=dev->if_port; - goto rarok; + ifr->ifr_map.mem_start=dev->mem_start; + ifr->ifr_map.mem_end=dev->mem_end; + ifr->ifr_map.base_addr=dev->base_addr; + ifr->ifr_map.irq=dev->irq; + ifr->ifr_map.dma=dev->dma; + ifr->ifr_map.port=dev->if_port; + return 0; case SIOCSIFMAP: - if(dev->set_config==NULL) - return -EOPNOTSUPP; - return dev->set_config(dev,&ifr.ifr_map); + if (dev->set_config) + return dev->set_config(dev,&ifr->ifr_map); + return -EOPNOTSUPP; case SIOCADDMULTI: - if(dev->set_multicast_list==NULL) + if(dev->set_multicast_list==NULL || + ifr->ifr_hwaddr.sa_family!=AF_UNSPEC) return -EINVAL; - if(ifr.ifr_hwaddr.sa_family!=AF_UNSPEC) - return -EINVAL; - dev_mc_add(dev,ifr.ifr_hwaddr.sa_data, dev->addr_len, 1); + printk(KERN_DEBUG "SIOCADDMULTI ioctl is deprecated\n"); + dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); return 0; case SIOCDELMULTI: - if(dev->set_multicast_list==NULL) - return -EINVAL; - if(ifr.ifr_hwaddr.sa_family!=AF_UNSPEC) + if(dev->set_multicast_list==NULL || + ifr->ifr_hwaddr.sa_family!=AF_UNSPEC) return -EINVAL; - dev_mc_delete(dev,ifr.ifr_hwaddr.sa_data,dev->addr_len, 1); + printk(KERN_DEBUG "SIOCDELMULTI ioctl is deprecated\n"); + dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1); return 0; - case SIOGIFINDEX: - ifr.ifr_ifindex = dev->ifindex; - goto rarok; - + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; /* * Unknown or private ioctl */ default: - if((getset >= SIOCDEVPRIVATE) && - (getset <= (SIOCDEVPRIVATE + 15))) { - if(dev->do_ioctl==NULL) - return -EOPNOTSUPP; - ret = dev->do_ioctl(dev, &ifr, getset); - if (!ret) - { - err = copy_to_user(arg,&ifr,sizeof(struct ifreq)); - if (err) - ret = -EFAULT; - } - break; + if(cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) { + if (dev->do_ioctl) + return dev->do_ioctl(dev, ifr, cmd); + return -EOPNOTSUPP; } #ifdef CONFIG_NET_RADIO - if((getset >= SIOCIWFIRST) && (getset <= SIOCIWLAST)) - { - if(dev->do_ioctl==NULL) - return -EOPNOTSUPP; - /* Perform the ioctl */ - ret=dev->do_ioctl(dev, &ifr, getset); - /* If return args... */ - if(IW_IS_GET(getset)) - { - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - { - ret = -EFAULT; - } - } - break; + if(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { + if (dev->do_ioctl) + return dev->do_ioctl(dev, ifr, cmd); + return -EOPNOTSUPP; } #endif /* CONFIG_NET_RADIO */ - ret = -EINVAL; } - return(ret); -/* - * The load of calls that return an ifreq and ok (saves memory). - */ -rarok: - err = copy_to_user(arg, &ifr, sizeof(struct ifreq)); - if (err) - err = -EFAULT; - return err; + return -EINVAL; } @@ -1528,47 +1339,98 @@ int dev_ioctl(unsigned int cmd, void *arg) { + struct ifreq ifr; + int ret; +#ifdef CONFIG_NET_ALIAS + char *colon; +#endif + + /* One special case: SIOCGIFCONF takes ifconf argument + and requires shared lock, because it sleeps writing + to user space. + */ + + if (cmd == SIOCGIFCONF) { + rtnl_shlock(); + dev_ifconf((char *) arg); + rtnl_shunlock(); + return 0; + } + if (cmd == SIOCGIFCOUNT) { + return dev_ifcount((unsigned int*)arg); + } + if (cmd == SIOCGIFNAME) { + return dev_ifname((struct ifreq *)arg); + } + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + +#ifdef CONFIG_NET_ALIAS + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; +#endif + + /* + * See which interface the caller is talking about. + */ + +#ifdef CONFIG_KERNELD + dev_load(ifr.ifr_name); +#endif + switch(cmd) { - case SIOCGIFCONF: - (void) dev_ifconf((char *) arg); - return 0; - case SIOCGIFCOUNT: - return dev_ifcount((unsigned int *) arg); - case SIOGIFNAME: - return dev_ifname((struct ifreq *)arg); - /* - * Ioctl calls that can be done by all. + * These ioctl calls: + * - can be done by all. + * - atomic and do not require locking. + * - return a value */ case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: - case SIOCGIFMEM: case SIOCGIFHWADDR: case SIOCGIFSLAVE: case SIOCGIFMAP: - case SIOGIFINDEX: - return dev_ifsioc(arg, cmd); + case SIOCGIFINDEX: + ret = dev_ifsioc(&ifr, cmd); + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return ret; /* - * Ioctl calls requiring the power of a superuser + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - do not return a value */ case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: - case SIOCSIFMEM: - case SIOCSIFHWADDR: case SIOCSIFMAP: + case SIOCSIFHWADDR: case SIOCSIFSLAVE: case SIOCADDMULTI: case SIOCDELMULTI: + case SIOCSIFHWBROADCAST: if (!suser()) return -EPERM; - return dev_ifsioc(arg, cmd); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + return ret; + case SIOCGIFMEM: + /* Get the per device memory space. We can add this but currently + do not support it */ + case SIOCSIFMEM: + /* Set the per device memory buffer space. Not applicable in our case */ case SIOCSIFLINK: return -EINVAL; @@ -1577,16 +1439,29 @@ */ default: - if((cmd >= SIOCDEVPRIVATE) && - (cmd <= (SIOCDEVPRIVATE + 15))) { - return dev_ifsioc(arg, cmd); + if (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) { + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return ret; } #ifdef CONFIG_NET_RADIO - if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST)) - { - if((IW_IS_SET(cmd)) && (!suser())) - return -EPERM; - return dev_ifsioc(arg, cmd); + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { + if (IW_IS_SET(cmd)) { + if (!suser()) + return -EPERM; + rtnl_lock(); + } + ret = dev_ifsioc(&ifr, cmd); + if (IW_IS_SET(cmd)) + rtnl_unlock(); + if (!ret && IW_IS_GET(cmd) && + copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return ret; } #endif /* CONFIG_NET_RADIO */ return -EINVAL; @@ -1596,9 +1471,103 @@ int dev_new_index() { static int ifindex; - return ++ifindex; + for (;;) { + if (++ifindex <= 0) + ifindex=1; + if (dev_get_by_index(ifindex) == NULL) + return ifindex; + } } +static int dev_boot_phase = 1; + + +int register_netdevice(struct device *dev) +{ + struct device *d, **dp; + + if (dev_boot_phase) { + printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name); + + /* Check for existence, and append to tail of chain */ + for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { + if (d == dev || strcmp(d->name, dev->name) == 0) + return -EEXIST; + } + dev->next = NULL; + *dp = dev; + return 0; + } + + dev->iflink = -1; + + /* Init, if this function is available */ + if (dev->init && dev->init(dev) != 0) + return -EIO; + + /* Check for existence, and append to tail of chain */ + for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { + if (d == dev || strcmp(d->name, dev->name) == 0) + return -EEXIST; + } + dev->next = NULL; + dev_init_scheduler(dev); + dev->ifindex = dev_new_index(); + if (dev->iflink == -1) + dev->iflink = dev->ifindex; + *dp = dev; + + /* Notify protocols, that a new device appeared. */ + notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + + return 0; +} + +int unregister_netdevice(struct device *dev) +{ + struct device *d, **dp; + + if (dev_boot_phase == 0) { + /* If device is running, close it. + It is very bad idea, really we should + complain loudly here, but random hackery + in linux/drivers/net likes it. + */ + if (dev->flags & IFF_UP) + dev_close(dev); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); + + /* To avoid pointers looking to nowhere, + we wait for end of critical section */ + dev_lock_wait(); + } + + /* And unlink it from device chain. */ + for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) { + if (d == dev) { + *dp = d->next; + d->next = NULL; + if (dev->destructor) + dev->destructor(dev); + return 0; + } + } + return -ENODEV; +} + + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -1613,7 +1582,9 @@ extern void dlci_setup(void); extern int pt_init(void); extern int sm_init(void); -extern int baycom_init(void); +extern int baycom_ser_fdx_init(void); +extern int baycom_ser_hdx_init(void); +extern int baycom_par_init(void); extern int lapbeth_init(void); extern void arcnet_init(void); @@ -1641,6 +1612,8 @@ { struct device *dev, **dp; + pktsched_init(); + /* * Initialise the packet receive queue. */ @@ -1660,6 +1633,7 @@ * * Some devices want to be initialized early.. */ + #if defined(CONFIG_LANCE) lance_init(); #endif @@ -1681,8 +1655,14 @@ #if defined(CONFIG_SDLA) sdla_setup(); #endif -#if defined(CONFIG_BAYCOM) - baycom_init(); +#if defined(CONFIG_BAYCOM_PAR) + baycom_par_init(); +#endif +#if defined(CONFIG_BAYCOM_SER_FDX) + baycom_ser_fdx_init(); +#endif +#if defined(CONFIG_BAYCOM_SER_HDX) + baycom_ser_hdx_init(); #endif #if defined(CONFIG_SOUNDMODEM) sm_init(); @@ -1706,6 +1686,7 @@ slhc_install(); #endif + /* * Add the devices. * If the call to dev->init fails, the dev is removed @@ -1716,11 +1697,7 @@ dp = &dev_base; while ((dev = *dp) != NULL) { - int i; - for (i = 0; i < DEV_NUMBUFFS; i++) { - skb_queue_head_init(dev->buffs + i); - } - + dev->iflink = -1; if (dev->init && dev->init(dev)) { /* @@ -1732,6 +1709,9 @@ { dp = &dev->next; dev->ifindex = dev_new_index(); + if (dev->iflink == -1) + dev->iflink = dev->ifindex; + dev_init_scheduler(dev); } } @@ -1745,18 +1725,13 @@ #endif /* CONFIG_PROC_FS */ #endif /* CONFIG_NET_RADIO */ - /* - * Initialise net_alias engine - * - * - register net_alias device notifier - * - register proc entries: /proc/net/alias_types - * /proc/net/aliases - */ + init_bh(NET_BH, net_bh); -#ifdef CONFIG_NET_ALIAS - net_alias_init(); + dev_boot_phase = 0; + +#ifdef CONFIG_IP_PNP + ip_auto_config(); #endif - init_bh(NET_BH, net_bh); return 0; } diff -u --recursive --new-file v2.1.67/linux/net/core/dev_mcast.c linux/net/core/dev_mcast.c --- v2.1.67/linux/net/core/dev_mcast.c Sun Jan 19 05:47:27 1997 +++ linux/net/core/dev_mcast.c Sun Nov 30 14:00:39 1997 @@ -42,7 +42,6 @@ #include #include #include -#include /* @@ -70,19 +69,6 @@ return; /* - * An aliased device should end up with the combined - * multicast list of all its aliases. - * Really, multicasting with logical interfaces is very - * subtle question. Now we DO forward multicast packets - * to logical interfcases, that doubles multicast - * traffic but allows mrouted to work. - * Alas, mrouted does not understand aliases even - * in 4.4BSD --ANK - */ - - dev = net_alias_main_dev(dev); - - /* * Devices with no set multicast don't get set */ @@ -99,7 +85,6 @@ void dev_mc_delete(struct device *dev, void *addr, int alen, int all) { struct dev_mc_list **dmi; - dev = net_alias_main_dev(dev); for(dmi=&dev->mc_list;*dmi!=NULL;dmi=&(*dmi)->next) { @@ -136,8 +121,6 @@ { struct dev_mc_list *dmi; - dev = net_alias_main_dev(dev); - for(dmi=dev->mc_list;dmi!=NULL;dmi=dmi->next) { if(memcmp(dmi->dmi_addr,addr,dmi->dmi_addrlen)==0 && dmi->dmi_addrlen==alen) @@ -165,12 +148,12 @@ void dev_mc_discard(struct device *dev) { - if (net_alias_is(dev)) - return; while(dev->mc_list!=NULL) { struct dev_mc_list *tmp=dev->mc_list; dev->mc_list=dev->mc_list->next; + if (tmp->dmi_users) + printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); kfree_s(tmp,sizeof(*tmp)); } dev->mc_count=0; diff -u --recursive --new-file v2.1.67/linux/net/core/iovec.c linux/net/core/iovec.c --- v2.1.67/linux/net/core/iovec.c Sun Feb 2 05:18:49 1997 +++ linux/net/core/iovec.c Sun Nov 30 14:00:39 1997 @@ -192,69 +192,78 @@ * * ip_build_xmit must ensure that when fragmenting only the last * call to this function will be unaligned also. - * - * FIXME: add an error handling path when a copy/checksum from - * user space failed because of a invalid pointer. */ -unsigned int csum_partial_copy_fromiovecend(unsigned char *kdata, - struct iovec *iov, int offset, - int len, int csum) +int csum_partial_copy_fromiovecend(unsigned char *kdata, + struct iovec *iov, int offset, + int len, int *csump) { - __u32 partial; - __u32 partial_cnt = 0; - - while(offset>0) - { - if (offset > iov->iov_len) - { - offset -= iov->iov_len; - - } - else - { - u8 *base; - int copy; - - base = iov->iov_base + offset; - copy = min(len, iov->iov_len - offset); - offset = 0; + int partial_cnt = 0; + int err = 0; + int csum; + + do { + int copy = iov->iov_len - offset; + + if (copy >= 0) { + u8 *base = iov->iov_base + offset; + + /* Normal case (single iov component) is fastly detected */ + if (len <= copy) { + *csump = csum_partial_copy_from_user(base, kdata, + len, *csump, &err); + return err; + } partial_cnt = copy % 4; - if (partial_cnt) - { + if (partial_cnt) { copy -= partial_cnt; - copy_from_user(&partial, base + copy, - partial_cnt); + err |= copy_from_user(kdata+copy, base+copy, partial_cnt); } - /* - * FIXME: add exception handling to the - * csum functions and set *err when an - * exception occurs. - */ - csum = csum_partial_copy_fromuser(base, kdata, - copy, csum); + *csump = csum_partial_copy_from_user(base, kdata, + copy, *csump, &err); len -= copy + partial_cnt; kdata += copy + partial_cnt; + iov++; + break; } - iov++; - } + iov++; + offset = -copy; + } while (offset > 0); + + csum = *csump; while (len>0) { u8 *base = iov->iov_base; - int copy=min(len, iov->iov_len); + int copy = min(len, iov->iov_len); + /* There is a remnant from previous iov. */ if (partial_cnt) { int par_len = 4 - partial_cnt; - copy_from_user(&partial, base + partial_cnt, par_len); - csum = csum_partial((u8*) &partial, 4, csum); + /* iov component is too short ... */ + if (par_len > copy) { + err |= copy_from_user(kdata, base, copy); + base += copy; + partial_cnt += copy; + kdata += copy; + len -= copy; + iov++; + if (len) + continue; + *csump = csum_partial(kdata-partial_cnt, partial_cnt, csum); + return err; + } + err |= copy_from_user(kdata, base, par_len); + csum = csum_partial(kdata-partial_cnt, 4, csum); base += par_len; copy -= par_len; + len -= par_len; + kdata += par_len; partial_cnt = 0; } @@ -264,16 +273,15 @@ if (partial_cnt) { copy -= partial_cnt; - copy_from_user(&partial, base + copy, - partial_cnt); + err |= copy_from_user(kdata+copy, base + copy, partial_cnt); } } - csum = csum_partial_copy_fromuser(base, kdata, copy, csum); + csum = csum_partial_copy_from_user(base, kdata, copy, csum, &err); len -= copy + partial_cnt; kdata += copy + partial_cnt; iov++; } - - return csum; + *csump = csum; + return err; } diff -u --recursive --new-file v2.1.67/linux/net/core/net_alias.c linux/net/core/net_alias.c --- v2.1.67/linux/net/core/net_alias.c Thu Sep 4 17:07:32 1997 +++ linux/net/core/net_alias.c Wed Dec 31 16:00:00 1969 @@ -1,1464 +0,0 @@ -/* - * NET_ALIAS network device aliasing module. - * - * - * Version: @(#)net_alias.c 0.43 12/20/95 - * - * Authors: Juan Jose Ciarlante, - * Marcelo Fabian Roccasalva, - * - * Features: - * - AF_ independent: net_alias_type objects - * - AF_INET optimized - * - ACTUAL alias devices inserted in dev chain - * - fast hashed alias address lookup - * - net_alias_type objs registration/unreg., module-ables. - * - /proc/net/aliases & /proc/net/alias_types entries - * Fixes: - * JJC : several net_alias_type func. renamed. - * JJC : net_alias_type object methods now pass - * *this. - * JJC : xxx_rcv device selection based on - * addrs - * Andreas Schultz : Kerneld support. - * - * FIXME: - * - User calls sleep/wake_up locking. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_KERNELD -#include -#endif - -/* - * Only allow the following flags to pass from main device to aliases - */ - -#define NET_ALIAS_IFF_MASK (IFF_UP|IFF_RUNNING|IFF_NOARP|IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_MULTICAST) - -static struct net_alias_type * nat_getbytype(int type); -static int nat_attach_chg(struct net_alias_type *nat, int delta); -static int nat_bind(struct net_alias_type *nat,struct net_alias *alias, struct sockaddr *sa); -static int nat_unbind(struct net_alias_type *nat, struct net_alias *alias); - -static int net_alias_devinit(struct device *dev); -static int net_alias_hard_start_xmit(struct sk_buff *skb, struct device *dev); -static int net_alias_devsetup(struct net_alias *alias, struct net_alias_type *nat, struct sockaddr *sa); -static struct net_alias **net_alias_slow_findp(struct net_alias_info *alias_info, struct net_alias *alias); -static struct device *net_alias_dev_create(struct device *main_dev, int slot, int *err, struct sockaddr *sa, void *data); -static struct device *net_alias_dev_delete(struct device *main_dev, int slot, int *err); -static void net_alias_free(struct device *dev); - -/* - * net_alias_type base array, will hold net_alias_type obj hashed list - * heads. - */ - -struct net_alias_type *nat_base[16]; - - -/* - * Get net_alias_type ptr by type - */ - -extern __inline__ struct net_alias_type *nat_getbytype(int type) -{ - struct net_alias_type *nat; - for(nat = nat_base[type & 0x0f]; nat ; nat = nat->next) - { - if (nat->type == type) - return nat; - } - return NULL; -} - - -/* - * Get addr32 representation (pre-hashing) of address. - * If NULL nat->get_addr32, assume sockaddr_in struct (IP-ish). - */ - -extern __inline__ __u32 nat_addr32(struct net_alias_type *nat, struct sockaddr *sa) -{ - if (nat->get_addr32) - return nat->get_addr32(nat, sa); - else - return (*(struct sockaddr_in *)sa).sin_addr.s_addr; -} - - -/* - * Hashing code for alias_info->hash_tab entries - * 4 bytes -> 1/2 byte using xor complemented by af - */ - -extern __inline__ unsigned HASH(__u32 addr, int af) -{ - unsigned tmp = addr ^ (addr>>16); /* 4 -> 2 */ - tmp ^= (tmp>>8); /* 2 -> 1 */ - return (tmp^(tmp>>4)^af) & 0x0f; /* 1 -> 1/2 */ -} - - -/* - * get hash key for supplied net alias type and address - * nat must be !NULL - * the purpose here is to map a net_alias_type and a generic - * address to a hash code. - */ - -extern __inline__ int nat_hash_key(struct net_alias_type *nat, struct sockaddr *sa) -{ - return HASH(nat_addr32(nat,sa), sa->sa_family); -} - - -/* - * Change net_alias_type number of attachments (bindings) - */ - -static int nat_attach_chg(struct net_alias_type *nat, int delta) -{ - unsigned long flags; - int n_at; - if (!nat) - return -1; - save_flags(flags); - cli(); - n_at = nat->n_attach + delta; - if (n_at < 0) - { - restore_flags(flags); - printk(KERN_WARNING - "net_alias: tried to set n_attach < 0 for (family==%d) nat object.\n", - nat->type); - return -1; - } - nat->n_attach = n_at; - restore_flags(flags); - return 0; -} - - -/* - * Bind alias to its type (family) object and call initialization hook - */ - -extern __inline__ int nat_bind(struct net_alias_type *nat, - struct net_alias *alias, struct sockaddr *sa) -{ - if (nat->alias_init_1) - nat->alias_init_1(nat, alias, sa); - return nat_attach_chg(nat, +1); -} - - -/* - * Unbind alias from type object and call alias destructor - */ - -extern __inline__ int nat_unbind(struct net_alias_type *nat, - struct net_alias *alias) -{ - if (nat->alias_done_1) - nat->alias_done_1(nat, alias); - return nat_attach_chg(nat, -1); -} - - -/* - * Compare device address with given. if NULL nat->dev_addr_chk, - * compare dev->pa_addr with (sockaddr_in) 32 bits address (IP-ish) - */ - -static __inline__ int nat_dev_addr_chk_1(struct net_alias_type *nat, - struct device *dev, struct sockaddr *sa) -{ - if (nat->dev_addr_chk) - return nat->dev_addr_chk(nat, dev, sa); - else - return (dev->pa_addr == (*(struct sockaddr_in *)sa).sin_addr.s_addr); -} - - -/* - * Alias device init() - * do nothing. - */ - -static int net_alias_devinit(struct device *dev) -{ -#ifdef ALIAS_USER_LAND_DEBUG - printk("net_alias_devinit(%s) called.\n", dev->name); -#endif - return 0; -} - - -/* - * 2 options for multicast: - * 1) fake it for aliases. - * 2) allow aliases and actual device to set it. - * current choice: option 1 - */ -static void net_alias_setmulticast(struct device *dev) -{ -} - - -/* - * Hard_start_xmit() should not be called. - * ignore ... but shout!. - */ - -static int net_alias_hard_start_xmit(struct sk_buff *skb, struct device *dev) -{ - printk(KERN_WARNING "net_alias: net_alias_hard_start_xmit() for %s called (ignored)!!\n", dev->name); - dev_kfree_skb(skb, FREE_WRITE); - return 0; -} - - -static int net_alias_open(struct device * dev) -{ - return 0; -} - -static int net_alias_close(struct device * dev) -{ - return 0; -} - -/* - * setups a new (alias) device - */ - -static int net_alias_devsetup(struct net_alias *alias, - struct net_alias_type *nat, struct sockaddr *sa) -{ - struct device *main_dev; - struct device *dev; - int family; - int i; - - /* - * - * generic device setup based on main_dev info - * - * FIXME: is NULL bitwise 0 for all Linux platforms? - */ - - main_dev = alias->main_dev; - dev = &alias->dev; - memset(dev, '\0', sizeof(struct device)); - family = (sa)? sa->sa_family : main_dev->family; - - dev->alias_info = NULL; /* no aliasing recursion */ - dev->my_alias = alias; /* point to alias */ - dev->name = alias->name; - dev->type = main_dev->type; - dev->open = net_alias_open; - dev->stop = net_alias_close; - if (main_dev->set_multicast_list) - dev->set_multicast_list = net_alias_setmulticast; - dev->hard_header_len = main_dev->hard_header_len; - memcpy(dev->broadcast, main_dev->broadcast, MAX_ADDR_LEN); - memcpy(dev->dev_addr, main_dev->dev_addr, MAX_ADDR_LEN); - dev->addr_len = main_dev->addr_len; - dev->init = net_alias_devinit; - dev->hard_start_xmit = net_alias_hard_start_xmit; - dev->flags = main_dev->flags & NET_ALIAS_IFF_MASK & ~IFF_UP; - dev->ifindex = dev_new_index(); - - /* - * Only makes sense if same family (arguable) - */ - - if (family == main_dev->family) - { - dev->metric = main_dev->metric; - dev->mtu = main_dev->mtu; - dev->pa_alen = main_dev->pa_alen; - dev->hard_header = main_dev->hard_header; - dev->hard_header_cache = main_dev->hard_header_cache; - dev->header_cache_update = main_dev->header_cache_update; - dev->rebuild_header = main_dev->rebuild_header; - } - - /* - * Fill in the generic fields of the device structure. - * not actually used, avoids some dev.c #ifdef's - */ - - for (i = 0; i < DEV_NUMBUFFS; i++) - skb_queue_head_init(&dev->buffs[i]); - - dev->family = family; - return 0; -} - - -/* - * Slow alias find (parse the whole hash_tab) - * returns: alias' pointer address - */ - -static struct net_alias **net_alias_slow_findp(struct net_alias_info - *alias_info, struct net_alias *alias) -{ - unsigned idx, n_aliases; - struct net_alias **aliasp; - - /* - * For each alias_info's hash_tab entry, for every alias ... - */ - - n_aliases = alias_info->n_aliases; - for (idx=0; idx < 16 ; idx++) - { - for (aliasp = &alias_info->hash_tab[idx];*aliasp; - aliasp = &(*aliasp)->next) - { - if (*aliasp == alias) - return aliasp; - else - if (--n_aliases == 0) - break; /* faster give up */ - } - } - return NULL; -} - - -/* - * Create alias device for main_dev with given slot num. - * if sa==NULL will create a same_family alias device. - */ - -static struct device *net_alias_dev_create(struct device *main_dev, int slot, - int *err, struct sockaddr *sa, void *data) -{ - struct net_alias_info *alias_info; - struct net_alias *alias, **aliasp; - struct net_alias_type *nat; - struct device *dev; - unsigned long flags; - int family; - __u32 addr32; - - /* FIXME: lock */ - - alias_info = main_dev->alias_info; - - /* - * If NULL address given, take family from main_dev - */ - - family = (sa)? sa->sa_family : main_dev->family; - - /* - * Check if wanted family has a net_alias_type object registered - */ - - nat = nat_getbytype(family); - if (!nat) - { -#ifdef CONFIG_KERNELD - char modname[20]; - sprintf (modname,"netalias-%d", family); - request_module(modname); - - nat = nat_getbytype(family); - if (!nat) - { -#endif - printk(KERN_WARNING "net_alias_dev_create(%s:%d): unregistered family==%d\n", - main_dev->name, slot, family); - /* *err = -EAFNOSUPPORT; */ - *err = -EINVAL; - return NULL; -#ifdef CONFIG_KERNELD - } -#endif - } - - /* - * Do not allow creation over downed devices - */ - - *err = -EIO; - - if (! (main_dev->flags & IFF_UP) ) - return NULL; - - /* - * If first alias, must also create alias_info - */ - - *err = -ENOMEM; - - if (!alias_info) - { - alias_info = kmalloc(sizeof(struct net_alias_info), GFP_KERNEL); - if (!alias_info) - return NULL; /* ENOMEM */ - memset(alias_info, 0, sizeof(struct net_alias_info)); - } - - if (!(alias = kmalloc(sizeof(struct net_alias), GFP_KERNEL))) - return NULL; /* ENOMEM */ - - memset(alias, 0, sizeof(struct net_alias)); - alias->slot = slot; - alias->main_dev = main_dev; - alias->nat = nat; - alias->next = NULL; - alias->data = data; - sprintf(alias->name, "%s:%d", main_dev->name, slot); - - /* - * Initialise alias' device structure - */ - - net_alias_devsetup(alias, nat, sa); - - dev = &alias->dev; - - save_flags(flags); - cli(); - - /* - * bind alias to its object type - * nat_bind calls nat->alias_init_1 - */ - - nat_bind(nat, alias, sa); - - /* - * If no address passed, take from device (could have been - * set by nat->alias_init_1) - */ - - addr32 = (sa)? nat_addr32(nat, sa) : alias->dev.pa_addr; - - /* - * Store hash key in alias: will speed-up rehashing and deletion - */ - - alias->hash = HASH(addr32, family); - - /* - * Insert alias in hashed linked list - */ - - aliasp = &alias_info->hash_tab[alias->hash]; - alias->next = *aliasp; - *aliasp = alias; - - /* - * If first alias ... - */ - - if (!alias_info->n_aliases++) - { - alias_info->taildev = main_dev; - main_dev->alias_info = alias_info; - } - - /* - * add device at tail (just after last main_dev alias) - */ - - dev->next = alias_info->taildev->next; - alias_info->taildev->next = dev; - alias_info->taildev = dev; - restore_flags(flags); - return dev; -} - - -/* - * Delete one main_dev alias (referred by its slot num) - */ - -static struct device *net_alias_dev_delete(struct device *main_dev, int slot, - int *err) -{ - struct net_alias_info *alias_info; - struct net_alias *alias, **aliasp; - struct device *dev; - unsigned n_aliases; - unsigned long flags; - struct net_alias_type *nat; - struct device *prevdev; - - /* FIXME: lock */ - *err = -ENODEV; - - if (main_dev == NULL) - return NULL; - - /* - * Does main_dev have aliases? - */ - - alias_info = main_dev->alias_info; - if (!alias_info) - return NULL; /* ENODEV */ - - n_aliases = alias_info->n_aliases; - - /* - * Find device that holds the same slot number (could also - * be strcmp() ala dev_get). - */ - - for (prevdev=main_dev, alias = NULL; - prevdev->next && n_aliases; prevdev = prevdev->next) - { - if (!(alias = prevdev->next->my_alias)) - { - printk(KERN_ERR "net_alias_dev_delete(): incorrect non-alias device after maindev\n"); - continue; /* or should give up? */ - } - if (alias->slot == slot) - break; - alias = NULL; - n_aliases--; - } - - if (!alias) - return NULL; /* ENODEV */ - - dev = &alias->dev; - - /* - * Find alias hashed entry - */ - - for(aliasp = &alias_info->hash_tab[alias->hash]; *aliasp; - aliasp = &(*aliasp)->next) - { - if(*aliasp == alias) - break; - } - - /* - * If not found (???), try a full search - */ - - if (*aliasp != alias) - { - if ((aliasp = net_alias_slow_findp(alias_info, alias))) - printk(KERN_WARNING "net_alias_dev_delete(%s): bad hashing recovered\n", alias->name); - else - { - printk(KERN_ERR "net_alias_dev_delete(%s): unhashed alias!\n",alias->name); - return NULL; /* ENODEV */ - } - } - nat = alias->nat; - - save_flags(flags); - cli(); - - /* - * Unbind alias from alias_type obj. - */ - - nat_unbind(nat, alias); - - /* - * Is alias at tail? - */ - - if ( dev == alias_info->taildev ) - alias_info->taildev = prevdev; - - /* - * Unlink and close device - */ - prevdev->next = dev->next; - dev_close(dev); - - /* - * Unlink alias - */ - - *aliasp = (*aliasp)->next; - if (--alias_info->n_aliases == 0) /* last alias */ - main_dev->alias_info = NULL; - - restore_flags(flags); - - /* - * Now free structures - */ - - kfree_s(alias, sizeof(struct net_alias)); - if (main_dev->alias_info == NULL) - kfree_s(alias_info, sizeof(struct net_alias_info)); - - /* - * Deletion ok (*err=0), NULL device returned. - */ - - *err = 0; - return NULL; -} - -/* - * Free all main device aliasing stuff - * will be called on dev_close(main_dev) - */ - -static void net_alias_free(struct device *main_dev) -{ - struct net_alias_info *alias_info; - struct net_alias *alias; - struct net_alias_type *nat; - struct device *dev; - unsigned long flags; - - /* - * Do I really have aliases? - */ - - if (!(alias_info = main_dev->alias_info)) - return; - - /* - * Fast device link "short-circuit": set main_dev->next to - * device after last alias - */ - - save_flags(flags); - cli(); - - dev = main_dev->next; - main_dev->next = alias_info->taildev->next; - main_dev->alias_info = NULL; - alias_info->taildev->next = NULL; - - restore_flags(flags); - - /* - * Loop over alias devices, free and dev_close() - */ - - while (dev) - { - if (net_alias_is(dev)) - { - alias = dev->my_alias; - if (alias->main_dev == main_dev) - { - /* - * unbind alias from alias_type object - */ - nat = alias->nat; - if (nat) - { - nat_unbind(nat, alias); - } /* else error/printk ??? */ - - dev_close(dev); - dev = dev->next; - - kfree_s(alias, sizeof(struct net_alias)); - continue; - } - else - printk(KERN_ERR "net_alias_free(%s): '%s' is not my alias\n", - main_dev->name, alias->name); - } - else - { - printk(KERN_ERR "net_alias_free(%s): found a non-alias after device!\n", - main_dev->name); - } - dev = dev->next; - } - - kfree_s(alias_info, sizeof(alias_info)); - return; -} - -/* - * dev_get() with added alias naming magic. - */ - -struct device *net_alias_dev_get(char *dev_name, int aliasing_ok, int *err, - struct sockaddr *sa, void *data) -{ - struct device *dev; - char *sptr,*eptr; - int slot = 0; - int delete = 0; - - *err = -ENODEV; - if ((dev=dev_get(dev_name))) - return dev; - - /* - * Want alias naming magic? - */ - - if (!aliasing_ok) - return NULL; - - if (!dev_name || !*dev_name) - return NULL; - - /* - * Find the first ':' , must be followed by, at least, 1 char - */ - - sptr=strchr(dev_name,':'); - if (sptr==NULL || !sptr[1]) - return NULL; - -#if 0 - for (sptr=dev_name ; *sptr ; sptr++) - if(*sptr==':') - break; - if (!*sptr || !*(sptr+1)) - return NULL; -#endif - /* - * Seems to be an alias name, fetch main device - */ - - *sptr='\0'; - if (!(dev=dev_get(dev_name))) - return NULL; - *sptr++=':'; - - /* - * Fetch slot number - */ - - slot = simple_strtoul(sptr,&eptr,10); - if (slot >= NET_ALIAS_MAX_SLOT) - return NULL; - - /* - * If last char is '-', it is a deletion request - */ - - if (eptr[0] == '-' && !eptr[1] ) - delete++; - else if (eptr[0]) - return NULL; - - /* - * Well... let's work. - */ - - if (delete) - return net_alias_dev_delete(dev, slot, err); - else - return net_alias_dev_create(dev, slot, err, sa, data); -} - - -/* - * Rehash alias device with address supplied. - */ - -int net_alias_dev_rehash(struct device *dev, struct sockaddr *sa) -{ - struct net_alias_info *alias_info; - struct net_alias *alias, **aliasp; - struct device *main_dev; - unsigned long flags; - struct net_alias_type *o_nat, *n_nat; - unsigned n_hash; - - /* - * Defensive ... - */ - - if (dev == NULL) - return -1; - if ( (alias = dev->my_alias) == NULL ) - return -1; - - if (!sa) - { - printk(KERN_ERR "net_alias_rehash(): NULL sockaddr passed\n"); - return -1; - } - - /* - * Defensive. should not happen. - */ - - if ( (main_dev = alias->main_dev) == NULL ) - { - printk(KERN_ERR "net_alias_rehash for %s: NULL maindev\n", alias->name); - return -1; - } - - /* - * Defensive. should not happen. - */ - - if (!(alias_info=main_dev->alias_info)) - { - printk(KERN_ERR "net_alias_rehash for %s: NULL alias_info\n", alias->name); - return -1; - } - - /* - * Will the request also change device family? - */ - - o_nat = alias->nat; - if (!o_nat) - { - printk(KERN_ERR "net_alias_rehash(%s): unbound alias.\n", alias->name); - return -1; - } - - /* - * Point to new alias_type obj. - */ - - if (o_nat->type == sa->sa_family) - n_nat = o_nat; - else - { - n_nat = nat_getbytype(sa->sa_family); - if (!n_nat) - { - printk(KERN_ERR "net_alias_rehash(%s): unreg family==%d.\n", alias->name, sa->sa_family); - return -1; - } - } - - /* - * New hash key. if same as old AND same type (family) return; - */ - - n_hash = nat_hash_key(n_nat, sa); - if (n_hash == alias->hash && o_nat == n_nat ) - return 0; - - /* - * Find alias in hashed list - */ - - for (aliasp = &alias_info->hash_tab[alias->hash]; *aliasp; - aliasp = &(*aliasp)->next) - { - if (*aliasp == alias) - break; - } - - /* - * Not found (???). try a full search - */ - - if(!*aliasp) - { - if ((aliasp = net_alias_slow_findp(alias_info, alias))) - { - printk(KERN_WARNING - "net_alias_rehash(%s): bad hashing recovered\n", alias->name); - } - else - { - printk(KERN_ERR "net_alias_rehash(%s): unhashed alias!\n", alias->name); - return -1; - } - } - - save_flags(flags); - cli(); - - /* - * If type (family) changed, unlink from old type object (o_nat) - * Will call o_nat->alias_done_1() - */ - - if (o_nat != n_nat) - nat_unbind(o_nat, alias); - - /* - * If diff hash key, change alias position in hashed list - */ - - if (n_hash != alias->hash) - { - *aliasp = (*aliasp)->next; - alias->hash = n_hash; - aliasp = &alias_info->hash_tab[n_hash]; - alias->next = *aliasp; - *aliasp = alias; - } - - /* - * If type (family) changed link to new type object (n_nat) - * will call n_nat->alias_init_1() - */ - - if (o_nat != n_nat) - nat_bind(n_nat, alias, sa); - - restore_flags(flags); - return 0; -} - - - - -/* - * Implements /proc/net/alias_types entry - * Shows net_alias_type objects registered. - */ - -int net_alias_types_getinfo(char *buffer, char **start, off_t offset, int length, int dummy) -{ - off_t pos=0, begin=0; - int len=0; - struct net_alias_type *nat; - unsigned idx; - len=sprintf(buffer,"type name n_attach\n"); - for (idx=0 ; idx < 16 ; idx++) - { - for (nat = nat_base[idx]; nat ; nat = nat->next) - { - len += sprintf(buffer+len, "%-7d %-15s %-7d\n", - nat->type, nat->name,nat->n_attach); - pos=begin+len; - if(posoffset+length) - break; - } - } - *start=buffer+(offset-begin); - len-=(offset-begin); - if(len>length) - len=length; - return len; -} - - -/* - * Implements /proc/net/aliases entry, shows alias devices. - * calls alias nat->alias_print_1 if not NULL and formats everything - * to a fixed rec. size without using local (stack) buffers - * - */ - -#define NET_ALIASES_RECSIZ 64 - -int net_alias_getinfo(char *buffer, char **start, off_t offset, - int length, int dummy) -{ - off_t pos=0, begin=0; - int len=0; - int dlen; - struct net_alias_type *nat; - struct net_alias *alias; - struct device *dev; - - len=sprintf(buffer,"%-*s\n",NET_ALIASES_RECSIZ-1,"device family address"); - for (dev = dev_base; dev ; dev = dev->next) - { - if (net_alias_is(dev)) - { - alias = dev->my_alias; - nat = alias->nat; - dlen=sprintf(buffer+len, "%-16s %-6d ", alias->name, alias->dev.family); - - /* - * Call alias_type specific print function. - */ - - if (nat->alias_print_1) - dlen += nat->alias_print_1(nat, alias, buffer+len+dlen, NET_ALIASES_RECSIZ - dlen); - else - dlen += sprintf(buffer+len+dlen, "-"); - - /* - * Fill with spaces if needed - */ - - if (dlen < NET_ALIASES_RECSIZ) - memset(buffer+len+dlen, ' ', NET_ALIASES_RECSIZ - dlen); - - /* - * Truncate to NET_ALIASES_RECSIZ - */ - - len += NET_ALIASES_RECSIZ; - buffer[len-1] = '\n'; - - pos=begin+len; - if(posoffset+length) - break; - } - } - *start=buffer+(offset-begin); - len-=(offset-begin); - if(len>length) - len=length; - return len; -} - - -/* - * Notifier for devices events - */ - -int net_alias_device_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct device *dev = ptr; - - if (event == NETDEV_DOWN) - { -#ifdef ALIAS_USER_LAND_DEBUG - printk("net_alias: NETDEV_DOWN for %s received\n", dev->name); -#endif - if (net_alias_has(dev)) - net_alias_free(dev); - } - - if (event == NETDEV_UP) - { -#ifdef ALIAS_USER_LAND_DEBUG - printk("net_alias: NETDEV_UP for %s received\n", dev->name); -#endif - dev->alias_info = 0; - } - - return NOTIFY_DONE; -} - - -/* - * Device aliases address comparison workhorse - * No checks for nat and alias_info, must be !NULL - */ - -extern __inline__ struct device *nat_addr_chk(struct net_alias_type *nat, - struct net_alias_info *alias_info, struct sockaddr *sa, int flags_on, int flags_off) -{ - struct net_alias *alias; - for(alias = alias_info->hash_tab[nat_hash_key(nat,sa)]; - alias; alias = alias->next) - { - if (alias->dev.family != sa->sa_family) - continue; - - /* - * Nat_dev_addr_chk_1 will call type specific address - * cmp function. - */ - - if (alias->dev.flags & flags_on && - !(alias->dev.flags & flags_off) && - nat_dev_addr_chk_1(nat,&alias->dev,sa)) - return &alias->dev; - } - return NULL; -} - -/* - * Nat_addr_chk enough for protocols whose addr is (fully) stored at - * pa_addr. Note that nat pointer is ignored because of static comparison. - */ - -extern __inline__ struct device *nat_addr_chk32(struct net_alias_type *nat, - struct net_alias_info *alias_info, int family, __u32 addr32, - int flags_on, int flags_off) -{ - struct net_alias *alias; - for (alias=alias_info->hash_tab[HASH(addr32,family)]; - alias; alias=alias->next) - { - if (alias->dev.family != family) - continue; - /* - * "hard" (static) comparison between addr32 and pa_addr. - */ - - if (alias->dev.flags & flags_on && !(alias->dev.flags & flags_off) && - addr32 == alias->dev.pa_addr) - return &alias->dev; - } - return NULL; -} - -/* - * Returns alias device with specified address AND flags_on AND flags_off, - * else NULL. - * Intended for main devices. - */ - -struct device *net_alias_dev_chk(struct device *main_dev, - struct sockaddr *sa,int flags_on, int flags_off) -{ - struct net_alias_info *alias_info = main_dev->alias_info; - struct net_alias_type *nat; - - /* - * Only if main_dev has aliases - */ - - if (!alias_info) - return NULL; - - /* - * Get alias_type object for sa->sa_family. - */ - - nat = nat_getbytype(sa->sa_family); - if (!nat) - return NULL; - - return nat_addr_chk(nat, alias_info, sa, flags_on, flags_off); -} - -/* - * net_alias_dev_chk enough for protocols whose addr is (fully) stored - * at pa_addr. - */ - -struct device *net_alias_dev_chk32(struct device *main_dev, int family, - __u32 addr32, int flags_on, int flags_off) -{ - struct net_alias_info *alias_info = main_dev->alias_info; - - /* - * only if main_dev has aliases - */ - - if (!alias_info) - return NULL; - return nat_addr_chk32(NULL, alias_info, family, addr32, - flags_on, flags_off); -} - - -/* - * Select closest (main or alias) device to addresses given. If - * there is no further info available, return main_dev (for easier - * calling arrangement). - * - * Should be called early at xxx_rcv() time for device selection - */ - -struct device *net_alias_dev_rcv_sel(struct device *main_dev, - struct sockaddr *sa_src, struct sockaddr *sa_dst) -{ - int family; - struct net_alias_type *nat; - struct net_alias_info *alias_info; - struct device *dev; - - if (main_dev == NULL) - return NULL; - - /* - * If not aliased, don't bother any more - */ - - if ((alias_info = main_dev->alias_info) == NULL) - return main_dev; - - /* - * Find out family - */ - - family = (sa_src)? sa_src->sa_family : - ((sa_dst)? sa_dst->sa_family : AF_UNSPEC); - - if (family == AF_UNSPEC) - return main_dev; - - /* - * Get net_alias_type object for this family - */ - - if ( (nat = nat_getbytype(family)) == NULL ) - return main_dev; - - /* - * First step: find out if dst addr is main_dev's or one of its - * aliases' - */ - - if (sa_dst) - { - if (nat_dev_addr_chk_1(nat, main_dev,sa_dst)) - return main_dev; - - dev = nat_addr_chk(nat, alias_info, sa_dst, IFF_UP, 0); - - if (dev != NULL) - return dev; - } - - /* - * Second step: find the rcv addr 'closest' alias through nat - * method call - */ - - if ( sa_src == NULL || nat->dev_select == NULL) - return main_dev; - - dev = nat->dev_select(nat, main_dev, sa_src); - - if (dev == NULL || dev->family != family) - return main_dev; - - /* - * Dev ok only if it is alias of main_dev - */ - - dev = net_alias_is(dev)? - ( (dev->my_alias->main_dev == main_dev)? dev : NULL) : NULL; - - /* - * Do not return NULL. - */ - - return (dev)? dev : main_dev; - -} - -/* - * dev_rcv_sel32: dev_rcv_sel for 'pa_addr' protocols. - */ - -struct device *net_alias_dev_rcv_sel32(struct device *main_dev, int family, - __u32 src, __u32 dst) -{ - struct net_alias_type *nat; - struct net_alias_info *alias_info; - struct sockaddr_in sin_src; - struct device *dev; - - if (main_dev == NULL) - return NULL; - - /* - * If not aliased, don't bother any more - */ - - if ((alias_info = main_dev->alias_info) == NULL) - return main_dev; - - /* - * Early return if dst is main_dev's address - */ - - if (dst == main_dev->pa_addr) - return main_dev; - - if (family == AF_UNSPEC) - return main_dev; - - /* - * Get net_alias_type object for this family - */ - - if ( (nat = nat_getbytype(family)) == NULL ) - return main_dev; - - /* - * First step: find out if dst address one of main_dev aliases' - */ - - if (dst) - { - dev = nat_addr_chk32(nat, alias_info, family, dst, IFF_UP, 0); - if (dev) - return dev; - } - - /* - * Second step: find the rcv addr 'closest' alias through nat - * method call - */ - - if ( src == 0 || nat->dev_select == NULL) - return main_dev; - - sin_src.sin_family = family; - sin_src.sin_addr.s_addr = src; - - dev = nat->dev_select(nat, main_dev, (struct sockaddr *)&sin_src); - - if (dev == NULL || dev->family != family) - return main_dev; - - /* - * Dev ok only if it is alias of main_dev - */ - - dev = net_alias_is(dev)? - ( (dev->my_alias->main_dev == main_dev)? dev : NULL) : NULL; - - /* - * Do not return NULL. - */ - - return (dev)? dev : main_dev; -} - - -/* - * Device event hook - */ - -static struct notifier_block net_alias_dev_notifier = -{ - net_alias_device_event, - NULL, - 0 -}; - -#ifndef ALIAS_USER_LAND_DEBUG -#ifdef CONFIG_PROC_FS -static struct proc_dir_entry proc_net_alias_types = { - PROC_NET_ALIAS_TYPES, 11, "alias_types", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - net_alias_types_getinfo -}; -static struct proc_dir_entry proc_net_aliases = { - PROC_NET_ALIASES, 7, "aliases", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - net_alias_getinfo -}; -#endif -#endif - -/* - * Net_alias initialisation called from net_dev_init(). - */ - -__initfunc(void net_alias_init(void)) -{ - - /* - * Register device events notifier - */ - - register_netdevice_notifier(&net_alias_dev_notifier); - - /* - * Register /proc/net entries - */ - -#ifndef ALIAS_USER_LAND_DEBUG -#ifdef CONFIG_PROC_FS - proc_net_register(&proc_net_alias_types); - proc_net_register(&proc_net_aliases); -#endif -#endif - -} - -/* - * Net_alias type object registering func. - */ - -int register_net_alias_type(struct net_alias_type *nat, int type) -{ - unsigned hash; - unsigned long flags; - if (!nat) - { - printk(KERN_ERR "register_net_alias_type(): NULL arg\n"); - return -EINVAL; - } - nat->type = type; - nat->n_attach = 0; - hash = nat->type & 0x0f; - save_flags(flags); - cli(); - nat->next = nat_base[hash]; - nat_base[hash] = nat; - restore_flags(flags); - return 0; -} - -/* - * Net_alias type object unreg. - */ - -int unregister_net_alias_type(struct net_alias_type *nat) -{ - struct net_alias_type **natp; - unsigned hash; - unsigned long flags; - - if (!nat) - { - printk(KERN_ERR "unregister_net_alias_type(): NULL arg\n"); - return -EINVAL; - } - - /* - * Only allow unregistration if it has no attachments - */ - - if (nat->n_attach) - { - printk(KERN_ERR "unregister_net_alias_type(): has %d attachments. failed\n", - nat->n_attach); - return -EINVAL; - } - hash = nat->type & 0x0f; - save_flags(flags); - cli(); - for (natp = &nat_base[hash]; *natp ; natp = &(*natp)->next) - { - if (nat==(*natp)) - { - *natp = nat->next; - restore_flags(flags); - return 0; - } - } - restore_flags(flags); - printk(KERN_ERR "unregister_net_alias_type(type=%d): not found!\n", nat->type); - return -EINVAL; -} - diff -u --recursive --new-file v2.1.67/linux/net/core/rtnetlink.c linux/net/core/rtnetlink.c --- v2.1.67/linux/net/core/rtnetlink.c Wed Dec 31 16:00:00 1969 +++ linux/net/core/rtnetlink.c Sun Nov 30 14:00:39 1997 @@ -0,0 +1,436 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Routing netlink socket interface: protocol independent part. + * + * Authors: Alexey Kuznetsov, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +atomic_t rtnl_rlockct; +struct wait_queue *rtnl_wait; + + +void rtnl_lock() +{ + rtnl_shlock(); + rtnl_exlock(); +} + +void rtnl_unlock() +{ + rtnl_exunlock(); + rtnl_shunlock(); +} + +#ifdef CONFIG_RTNETLINK +struct sock *rtnl; + +unsigned long rtnl_wlockct; + +struct rtnetlink_link * rtnetlink_links[NPROTO]; + +#define _S 1 /* superuser privileges required */ +#define _X 2 /* exclusive access to tables required */ +#define _G 4 /* GET request */ + +static unsigned char rtm_properties[RTM_MAX-RTM_BASE+1] = +{ + _S|_X, /* RTM_NEWLINK */ + _S|_X, /* RTM_DELLINK */ + _G, /* RTM_GETLINK */ + 0, + + _S|_X, /* RTM_NEWADDR */ + _S|_X, /* RTM_DELADDR */ + _G, /* RTM_GETADDR */ + 0, + + _S|_X, /* RTM_NEWROUTE */ + _S|_X, /* RTM_DELROUTE */ + _G, /* RTM_GETROUTE */ + 0, + + _S|_X, /* RTM_NEWNEIGH */ + _S|_X, /* RTM_DELNEIGH */ + _G, /* RTM_GETNEIGH */ + 0, + + _S|_X, /* RTM_NEWRULE */ + _S|_X, /* RTM_DELRULE */ + _G, /* RTM_GETRULE */ + 0 +}; + +static int rtnetlink_get_rta(struct kern_rta *rta, struct rtattr *attr, int attrlen) +{ + void **rta_data = (void**)rta; + + while (RTA_OK(attr, attrlen)) { + int type = attr->rta_type; + if (type != RTA_UNSPEC) { + if (type > RTA_MAX) + return -EINVAL; + rta_data[type-1] = RTA_DATA(attr); + } + attr = RTA_NEXT(attr, attrlen); + } + return 0; +} + +static int rtnetlink_get_ifa(struct kern_ifa *ifa, struct rtattr *attr, int attrlen) +{ + void **ifa_data = (void**)ifa; + + while (RTA_OK(attr, attrlen)) { + int type = attr->rta_type; + if (type != IFA_UNSPEC) { + if (type > IFA_MAX) + return -EINVAL; + ifa_data[type-1] = RTA_DATA(attr); + } + attr = RTA_NEXT(attr, attrlen); + } + return 0; +} + +void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) +{ + struct rtattr *rta; + int size = RTA_LENGTH(attrlen); + + rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); + rta->rta_type = attrtype; + rta->rta_len = size; + memcpy(RTA_DATA(rta), data, attrlen); +} + +static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct device *dev, + int type, pid_t pid, u32 seq) +{ + struct ifinfomsg *r; + struct nlmsghdr *nlh; + + nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r)); + if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; + r = NLMSG_DATA(nlh); + r->ifi_addrlen = dev->addr_len; + r->ifi_address.sa_family = dev->type; + memcpy(&r->ifi_address.sa_data, dev->dev_addr, dev->addr_len); + r->ifi_broadcast.sa_family = dev->type; + memcpy(&r->ifi_broadcast.sa_data, dev->broadcast, dev->addr_len); + r->ifi_flags = dev->flags; + r->ifi_mtu = dev->mtu; + r->ifi_index = dev->ifindex; + r->ifi_link = dev->iflink; + strncpy(r->ifi_name, dev->name, IFNAMSIZ-1); + r->ifi_qdiscname[0] = 0; + r->ifi_qdisc = dev->qdisc_sleeping->handle; + if (dev->qdisc_sleeping->ops) + strcpy(r->ifi_qdiscname, dev->qdisc_sleeping->ops->id); + return skb->len; + +nlmsg_failure: + return -1; +} + +int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx; + int s_idx = cb->args[0]; + struct device *dev; + + for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + if (idx < s_idx) + continue; + if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq) <= 0) + break; + } + cb->args[0] = idx; + + return skb->len; +} + +void rtmsg_ifinfo(int type, struct device *dev) +{ + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifinfomsg)); + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; + + if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0) < 0) { + kfree_skb(skb, 0); + return; + } + NETLINK_CB(skb).dst_groups = RTMGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); +} + +static int rtnetlink_done(struct netlink_callback *cb) +{ + if (NETLINK_CREDS(cb->skb)->uid == 0 && cb->nlh->nlmsg_flags&NLM_F_ATOMIC) + rtnl_shunlock(); + return 0; +} + +/* Process one rtnetlink message. */ + +extern __inline__ int +rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) +{ + union { + struct kern_rta rta; + struct kern_ifa ifa; + } u; + struct rtmsg *rtm; + struct ifaddrmsg *ifm; + int exclusive = 0; + int family; + int type; + int err; + + if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) + return 0; + type = nlh->nlmsg_type; + if (type < RTM_BASE) + return 0; + if (type > RTM_MAX) + goto err_inval; + + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) + return 0; + family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; + if (family > NPROTO || rtnetlink_links[family] == NULL) { + *errp = -EAFNOSUPPORT; + return -1; + } + if (rtm_properties[type-RTM_BASE]&_S) { + if (NETLINK_CREDS(skb)->uid) { + *errp = -EPERM; + return -1; + } + } + if (rtm_properties[type-RTM_BASE]&_G && nlh->nlmsg_flags&NLM_F_DUMP) { + if (rtnetlink_links[family][type-RTM_BASE].dumpit == NULL) + goto err_inval; + + /* Super-user locks all the tables to get atomic snapshot */ + if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC) + atomic_inc(&rtnl_rlockct); + if ((*errp = netlink_dump_start(rtnl, skb, nlh, + rtnetlink_links[family][type-RTM_BASE].dumpit, + rtnetlink_done)) != 0) { + if (NETLINK_CREDS(skb)->uid == 0 && nlh->nlmsg_flags&NLM_F_ATOMIC) + atomic_dec(&rtnl_rlockct); + return -1; + } + skb_pull(skb, NLMSG_ALIGN(nlh->nlmsg_len)); + return -1; + } + if (rtm_properties[type-RTM_BASE]&_X) { + if (rtnl_exlock_nowait()) { + *errp = 0; + return -1; + } + exclusive = 1; + } + + memset(&u, 0, sizeof(u)); + + switch (nlh->nlmsg_type) { + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + case RTM_NEWRULE: + case RTM_DELRULE: + case RTM_GETRULE: + rtm = NLMSG_DATA(nlh); + if (nlh->nlmsg_len < sizeof(*rtm)) + goto err_inval; + + if (rtm->rtm_optlen && + rtnetlink_get_rta(&u.rta, RTM_RTA(rtm), rtm->rtm_optlen) < 0) + goto err_inval; + break; + + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_GETADDR: + ifm = NLMSG_DATA(nlh); + if (nlh->nlmsg_len < sizeof(*ifm)) + goto err_inval; + + if (nlh->nlmsg_len > NLMSG_LENGTH(sizeof(*ifm)) && + rtnetlink_get_ifa(&u.ifa, IFA_RTA(ifm), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifm))) < 0) + goto err_inval; + break; + + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_GETLINK: + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + case RTM_GETNEIGH: + /* Not urgent and even not necessary */ + default: + goto err_inval; + } + + if (rtnetlink_links[family][type-RTM_BASE].doit == NULL) + goto err_inval; + err = rtnetlink_links[family][type-RTM_BASE].doit(skb, nlh, (void *)&u); + + if (exclusive) + rtnl_exunlock(); + *errp = err; + return err; + +err_inval: + if (exclusive) + rtnl_exunlock(); + *errp = -EINVAL; + return -1; +} + +/* + * Process one packet of messages. + * Malformed skbs with wrong lengths of messages are discarded silently. + */ + +extern __inline__ int rtnetlink_rcv_skb(struct sk_buff *skb) +{ + int err; + struct nlmsghdr * nlh; + + while (skb->len >= NLMSG_SPACE(0)) { + int rlen; + + nlh = (struct nlmsghdr *)skb->data; + if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) + return 0; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + if (rtnetlink_rcv_msg(skb, nlh, &err)) { + /* Not error, but we must interrupt processing here: + * Note, that in this case we do not pull message + * from skb, it will be processed later. + */ + if (err == 0) + return -1; + netlink_ack(skb, nlh, err); + } else if (nlh->nlmsg_flags&NLM_F_ACK) + netlink_ack(skb, nlh, 0); + skb_pull(skb, rlen); + } + + return 0; +} + +/* + * rtnetlink input queue processing routine: + * - try to acquire shared lock. If it is failed, defer processing. + * - feed skbs to rtnetlink_rcv_skb, until it refuse a message, + * that will occur, when a dump started and/or acquisition of + * exclusive lock failed. + */ + +static void rtnetlink_rcv(struct sock *sk, int len) +{ + struct sk_buff *skb; + + if (rtnl_shlock_nowait()) + return; + + while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { + if (rtnetlink_rcv_skb(skb)) { + if (skb->len) + skb_queue_head(&sk->receive_queue, skb); + else + kfree_skb(skb, FREE_READ); + break; + } + kfree_skb(skb, FREE_READ); + } + + rtnl_shunlock(); +} + +static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct device *dev = ptr; + switch (event) { + case NETDEV_UNREGISTER: + rtmsg_ifinfo(RTM_DELLINK, dev); + break; + default: + rtmsg_ifinfo(RTM_NEWLINK, dev); + break; + } + return NOTIFY_DONE; +} + +struct notifier_block rtnetlink_dev_notifier = { + rtnetlink_event, + NULL, + 0 +}; + + +__initfunc(void rtnetlink_init(void)) +{ +#ifdef RTNL_DEBUG + printk("Initializing RT netlink socket\n"); +#endif + rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); + if (rtnl == NULL) + panic("rtnetlink_init: cannot initialize rtnetlink\n"); + register_netdevice_notifier(&rtnetlink_dev_notifier); +} + + + +#endif diff -u --recursive --new-file v2.1.67/linux/net/core/scm.c linux/net/core/scm.c --- v2.1.67/linux/net/core/scm.c Thu Jul 17 10:06:09 1997 +++ linux/net/core/scm.c Sun Nov 30 14:00:39 1997 @@ -205,25 +205,25 @@ return err; } -void put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) +int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { struct cmsghdr *cm = (struct cmsghdr*)msg->msg_control; + struct cmsghdr cmhdr; int cmlen = CMSG_LEN(len); int err; if (cm==NULL || msg->msg_controllen < sizeof(*cm)) { msg->msg_flags |= MSG_CTRUNC; - return; + return 0; /* XXX: return error? check spec. */ } if (msg->msg_controllen < cmlen) { msg->msg_flags |= MSG_CTRUNC; cmlen = msg->msg_controllen; } - err = put_user(level, &cm->cmsg_level); - if (!err) - err = put_user(type, &cm->cmsg_type); - if (!err) - err = put_user(cmlen, &cm->cmsg_len); + cmhdr.cmsg_level = level; + cmhdr.cmsg_type = type; + cmhdr.cmsg_len = cmlen; + err = copy_to_user(cm, &cmhdr, sizeof cmhdr); if (!err) err = copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)); if (!err) { @@ -231,6 +231,7 @@ msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } + return err; } void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) diff -u --recursive --new-file v2.1.67/linux/net/core/skbuff.c linux/net/core/skbuff.c --- v2.1.67/linux/net/core/skbuff.c Thu May 15 16:48:05 1997 +++ linux/net/core/skbuff.c Sun Nov 30 14:00:39 1997 @@ -176,7 +176,7 @@ skb->dst = NULL; skb->destructor = NULL; memset(skb->cb, 0, sizeof(skb->cb)); - skb->priority = SOPRI_NORMAL; + skb->priority = 0; atomic_inc(&net_skbcount); atomic_set(&skb->users, 1); diff -u --recursive --new-file v2.1.67/linux/net/core/sock.c linux/net/core/sock.c --- v2.1.67/linux/net/core/sock.c Wed Sep 24 20:05:48 1997 +++ linux/net/core/sock.c Sun Nov 30 14:00:39 1997 @@ -101,6 +101,7 @@ #include #include #include +#include #include #include @@ -143,6 +144,7 @@ int valbool; int err; struct linger ling; + struct ifreq req; int ret = 0; /* @@ -241,7 +243,7 @@ break; case SO_PRIORITY: - if (val >= 0 && val < DEV_NUMBUFFS) + if (val >= 0 && val <= 7) sk->priority = val; else return(-EINVAL); @@ -317,6 +319,46 @@ return -EINVAL; break; #endif + case SO_BINDTODEVICE: + /* Bind this socket to a particular device like "eth0", + * as specified in an ifreq structure. If the device + * is "", socket is NOT bound to a device. + */ + + if (!valbool) { + sk->bound_dev_if = 0; + } + else { + if (copy_from_user(&req, optval, sizeof(req)) < 0) + return -EFAULT; + + /* Remove any cached route for this socket. */ + if (sk->dst_cache) { + ip_rt_put((struct rtable*)sk->dst_cache); + sk->dst_cache = NULL; + } + + if (req.ifr_ifrn.ifrn_name[0] == '\0') { + sk->bound_dev_if = 0; + } + else { + struct device *dev = dev_get(req.ifr_ifrn.ifrn_name); + if (!dev) + return -EINVAL; + sk->bound_dev_if = dev->ifindex; + if (sk->daddr) { + int ret; + ret = ip_route_output((struct rtable**)&sk->dst_cache, + sk->daddr, sk->saddr, + sk->ip_tos, sk->bound_dev_if); + if (ret) + return ret; + } + } + } + return 0; + + /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ default: @@ -627,7 +669,7 @@ produce annoying no free page messages still.... */ skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER); if(!skb) - skb=sock_wmalloc(sk, fallback, 0, GFP_KERNEL); + skb=sock_wmalloc(sk, fallback, 0, sk->allocation); } /* @@ -669,7 +711,7 @@ * In any case I'd delete this check at all, or * change it to: */ - if (atomic_read(&sk->wmem_alloc) + size >= sk->sndbuf) + if (atomic_read(&sk->wmem_alloc) >= sk->sndbuf) #endif { sk->socket->flags &= ~SO_NOSPACE; @@ -967,7 +1009,6 @@ sk->allocation = GFP_KERNEL; sk->rcvbuf = sysctl_rmem_default*2; sk->sndbuf = sysctl_wmem_default*2; - sk->priority = SOPRI_NORMAL; sk->state = TCP_CLOSE; sk->zapped = 1; sk->socket = sock; diff -u --recursive --new-file v2.1.67/linux/net/ethernet/eth.c linux/net/ethernet/eth.c --- v2.1.67/linux/net/ethernet/eth.c Tue May 13 22:41:21 1997 +++ linux/net/ethernet/eth.c Sun Nov 30 14:00:39 1997 @@ -232,6 +232,13 @@ return htons(ETH_P_802_2); } +int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) +{ + struct ethhdr *eth = skb->mac.ethernet; + memcpy(haddr, eth->h_source, ETH_ALEN); + return ETH_ALEN; +} + int eth_header_cache(struct dst_entry *dst, struct neighbour *neigh, struct hh_cache *hh) { diff -u --recursive --new-file v2.1.67/linux/net/ipv4/Config.in linux/net/ipv4/Config.in --- v2.1.67/linux/net/ipv4/Config.in Thu Jun 26 12:33:41 1997 +++ linux/net/ipv4/Config.in Sun Nov 30 14:00:39 1997 @@ -2,6 +2,25 @@ # IP configuration # bool 'IP: multicasting' CONFIG_IP_MULTICAST +bool 'IP: advanced router' CONFIG_IP_ADVANCED_ROUTER +if [ "$CONFIG_IP_ADVANCED_ROUTER" = "y" ]; then + define_bool CONFIG_RTNETLINK y + bool 'IP: policy routing' CONFIG_IP_MULTIPLE_TABLES + bool 'IP: equal cost multipath' CONFIG_IP_ROUTE_MULTIPATH + bool 'IP: use TOS value as routing key' CONFIG_IP_ROUTE_TOS + bool 'IP: verbose route monitoring' CONFIG_IP_ROUTE_VERBOSE + bool 'IP: large routing tables' CONFIG_IP_ROUTE_LARGE_TABLES + if [ "$CONFIG_IP_MULTIPLE_TABLES" = "y" ]; then + bool 'IP: fast network address translation' CONFIG_IP_ROUTE_NAT + fi +fi +bool 'IP: kernel level autoconfiguration' CONFIG_IP_PNP +if [ "$CONFIG_IP_PNP" = "y" ]; then + bool ' BOOTP support' CONFIG_IP_PNP_BOOTP + bool ' RARP support' CONFIG_IP_PNP_RARP +# not yet ready.. +# bool ' ARP support' CONFIG_IP_PNP_ARP +fi if [ "$CONFIG_FIREWALL" = "y" ]; then bool 'IP: firewalling' CONFIG_IP_FIREWALL if [ "$CONFIG_IP_FIREWALL" = "y" ]; then @@ -9,23 +28,29 @@ bool 'IP: firewall packet netlink device' CONFIG_IP_FIREWALL_NETLINK fi bool 'IP: firewall packet logging' CONFIG_IP_FIREWALL_VERBOSE - bool 'IP: masquerading' CONFIG_IP_MASQUERADE - if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then - comment 'Protocol-specific masquerading support will be built as modules.' - fi bool 'IP: transparent proxy support' CONFIG_IP_TRANSPARENT_PROXY bool 'IP: always defragment' CONFIG_IP_ALWAYS_DEFRAG fi fi bool 'IP: accounting' CONFIG_IP_ACCT +bool 'IP: masquerading' CONFIG_IP_MASQUERADE +if [ "$CONFIG_IP_MASQUERADE" != "n" ]; then + comment 'Protocol-specific masquerading support will be built as modules.' +fi bool 'IP: optimize as router not host' CONFIG_IP_ROUTER tristate 'IP: tunneling' CONFIG_NET_IPIP +tristate 'IP: GRE tunnels over IP' CONFIG_NET_IPGRE if [ "$CONFIG_IP_MULTICAST" = "y" ]; then + if [ "$CONFIG_NET_IPGRE" != "n" ]; then + bool 'IP: broadcast GRE over IP' CONFIG_NET_IPGRE_BROADCAST + fi bool 'IP: multicast routing' CONFIG_IP_MROUTE + if [ "$CONFIG_IP_MROUTE" = "y" ]; then + bool 'IP: PIM-SM version 1 support' CONFIG_IP_PIMSM_V1 + bool 'IP: PIM-SM version 2 support' CONFIG_IP_PIMSM_V2 + fi fi -if [ "$CONFIG_NET_ALIAS" = "y" ]; then - tristate 'IP: aliasing support' CONFIG_IP_ALIAS -fi +tristate 'IP: aliasing support' CONFIG_IP_ALIAS if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then if [ "$CONFIG_NETLINK" = "y" ]; then bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD @@ -33,9 +58,9 @@ fi bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES comment '(it is safe to leave these untouched)' -bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP +#bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP tristate 'IP: Reverse ARP' CONFIG_INET_RARP -bool 'IP: Path MTU Discovery (normally enabled)' CONFIG_PATH_MTU_DISCOVERY +#bool 'IP: Path MTU Discovery (normally enabled)' CONFIG_PATH_MTU_DISCOVERY #bool 'IP: Disable NAGLE algorithm (normally enabled)' CONFIG_TCP_NAGLE_OFF bool 'IP: Drop source routed frames' CONFIG_IP_NOSR bool 'IP: Allow large windows (not recommended if <16Mb of memory)' CONFIG_SKB_LARGE diff -u --recursive --new-file v2.1.67/linux/net/ipv4/Makefile linux/net/ipv4/Makefile --- v2.1.67/linux/net/ipv4/Makefile Thu Jun 26 12:33:41 1997 +++ linux/net/ipv4/Makefile Sun Nov 30 14:00:39 1997 @@ -8,17 +8,25 @@ # Note 2! The CFLAGS definition is now in the main makefile... O_TARGET := ipv4.o -IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o packet.o \ +IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o\ raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o ip_fw.o \ - sysctl_net_ipv4.o fib.o ip_nat_dumb.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o IPV4X_OBJS := MOD_LIST_NAME := IPV4_MODULES M_OBJS := +ifeq ($(CONFIG_IP_MULTIPLE_TABLES),y) +IPV4_OBJS += fib_rules.o +endif + +ifeq ($(CONFIG_IP_ROUTE_NAT),y) +IPV4_OBJS += ip_nat_dumb.o +endif + ifeq ($(CONFIG_IP_MROUTE),y) IPV4_OBJS += ipmr.o endif @@ -32,10 +40,18 @@ endif ifeq ($(CONFIG_NET_IPIP),y) -IPV4_OBJS += ipip.o +IPV4X_OBJS += ipip.o else ifeq ($(CONFIG_NET_IPIP),m) - M_OBJS += ipip.o + MX_OBJS += ipip.o + endif +endif + +ifeq ($(CONFIG_NET_IPGRE),y) +IPV4X_OBJS += ip_gre.o +else + ifeq ($(CONFIG_NET_IPGRE),m) + MX_OBJS += ip_gre.o endif endif @@ -44,17 +60,13 @@ M_OBJS += ip_masq_ftp.o ip_masq_irc.o ip_masq_raudio.o ip_masq_quake.o endif -ifeq ($(CONFIG_IP_ALIAS),y) -IPV4_OBJS += ip_alias.o -else - ifeq ($(CONFIG_IP_ALIAS),m) - M_OBJS += ip_alias.o - endif -endif - ifeq ($(CONFIG_SYN_COOKIES),y) IPV4_OBJS += syncookies.o # module not supported, because it would be too messy. +endif + +ifeq ($(CONFIG_IP_PNP),y) +IPV4_OBJS += ipconfig.o endif ifdef CONFIG_INET diff -u --recursive --new-file v2.1.67/linux/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c --- v2.1.67/linux/net/ipv4/af_inet.c Wed Sep 24 20:05:48 1997 +++ linux/net/ipv4/af_inet.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * AF_INET protocol family socket handler. * - * Version: @(#)af_inet.c (from sock.c) 1.0.17 06/02/93 + * Version: $Id: af_inet.c,v 1.58 1997/10/29 20:27:21 kuznet Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -77,6 +77,7 @@ #include #include #include +#include #include #include @@ -94,14 +95,15 @@ #include #include #include +#include #include #include +#ifdef CONFIG_IP_MROUTE +#include +#endif #ifdef CONFIG_IP_MASQUERADE #include #endif -#ifdef CONFIG_IP_ALIAS -#include -#endif #ifdef CONFIG_BRIDGE #include #endif @@ -115,13 +117,13 @@ #define min(a,b) ((a)<(b)?(a):(b)) extern int sysctl_core_destroy_delay; -extern struct proto packet_prot; + extern int raw_get_info(char *, char **, off_t, int, int); extern int snmp_get_info(char *, char **, off_t, int, int); extern int afinet_get_info(char *, char **, off_t, int, int); extern int tcp_get_info(char *, char **, off_t, int, int); extern int udp_get_info(char *, char **, off_t, int, int); - +extern void ip_mc_drop_socket(struct sock *sk); #ifdef CONFIG_DLCI extern int dlci_ioctl(unsigned int, void*); @@ -165,9 +167,8 @@ /* No longer exists. */ del_from_prot_sklist(sk); - /* This is gross, but needed for SOCK_PACKET -DaveM */ - if(sk->prot->unhash) - sk->prot->unhash(sk); + /* Remove from protocol hash chains. */ + sk->prot->unhash(sk); if(sk->opt) kfree(sk->opt); @@ -321,13 +322,24 @@ struct sock *sk; struct proto *prot; + /* Compatibility */ + if (sock->type == SOCK_PACKET) { + static int warned; + if (net_families[AF_PACKET]==NULL) + return -ESOCKTNOSUPPORT; + if (!warned++) + printk(KERN_INFO "%s uses obsolete (AF_INET,SOCK_PACKET)\n", current->comm); + return net_families[AF_PACKET]->create(sock, protocol); + } + sock->state = SS_UNCONNECTED; sk = sk_alloc(AF_INET, GFP_KERNEL); if (sk == NULL) goto do_oom; - /* Note for tcp that also wiped the dummy_th block for us. */ - if(sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET) { + switch (sock->type) { + case SOCK_STREAM: + /* Note for tcp that also wiped the dummy_th block for us. */ if (protocol && protocol != IPPROTO_TCP) goto free_and_noproto; protocol = IPPROTO_TCP; @@ -338,7 +350,10 @@ sk->ip_pmtudisc = IP_PMTUDISC_WANT; prot = &tcp_prot; sock->ops = &inet_stream_ops; - } else if(sock->type == SOCK_DGRAM) { + break; + case SOCK_SEQPACKET: + goto free_and_badtype; + case SOCK_DGRAM: if (protocol && protocol != IPPROTO_UDP) goto free_and_noproto; protocol = IPPROTO_UDP; @@ -346,21 +361,26 @@ sk->ip_pmtudisc = IP_PMTUDISC_DONT; prot=&udp_prot; sock->ops = &inet_dgram_ops; - } else if(sock->type == SOCK_RAW || sock->type == SOCK_PACKET) { + break; + case SOCK_RAW: if (!suser()) goto free_and_badperm; if (!protocol) goto free_and_noproto; - prot = (sock->type == SOCK_RAW) ? &raw_prot : &packet_prot; + prot = &raw_prot; sk->reuse = 1; sk->ip_pmtudisc = IP_PMTUDISC_DONT; sk->num = protocol; sock->ops = &inet_dgram_ops; - } else { + if (protocol == IPPROTO_RAW) + sk->ip_hdrincl = 1; + break; + default: goto free_and_badtype; } sock_init_data(sock,sk); + sk->destruct = NULL; sk->zapped=0; @@ -378,11 +398,6 @@ sk->ip_ttl=ip_statistics.IpDefaultTTL; - if(sk->type==SOCK_RAW && protocol==IPPROTO_RAW) - sk->ip_hdrincl=1; - else - sk->ip_hdrincl=0; - sk->ip_mc_loop=1; sk->ip_mc_ttl=1; sk->ip_mc_index=0; @@ -398,11 +413,10 @@ * creation time automatically * shares. */ - sk->dummy_th.source = ntohs(sk->num); + sk->dummy_th.source = htons(sk->num); - /* This is gross, but needed for SOCK_PACKET -DaveM */ - if(sk->prot->hash) - sk->prot->hash(sk); + /* Add to protocol hash chains. */ + sk->prot->hash(sk); add_to_prot_sklist(sk); } @@ -482,7 +496,7 @@ unsigned short snum; int chk_addr_ret; - /* If the socket has its own bind function then use it. (RAW and PACKET) */ + /* If the socket has its own bind function then use it. (RAW) */ if(sk->prot->bind) return sk->prot->bind(sk, uaddr, addr_len); @@ -503,12 +517,12 @@ if (snum < PROT_SOCK && !suser()) return(-EACCES); - chk_addr_ret = __ip_chk_addr(addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && - chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) { + chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + if (addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) { #ifdef CONFIG_IP_TRANSPARENT_PROXY /* Superuser may bind to any address to allow transparent proxying. */ - if(!suser()) + if(chk_addr_ret != RTN_UNICAST || !suser()) #endif return -EADDRNOTAVAIL; /* Source address MUST be ours! */ } @@ -521,7 +535,7 @@ * which case the sending device address is used. */ sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; - if(chk_addr_ret == IS_MULTICAST || chk_addr_ret == IS_BROADCAST) + if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) sk->saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ @@ -529,7 +543,7 @@ return -EADDRINUSE; sk->num = snum; - sk->dummy_th.source = ntohs(snum); + sk->dummy_th.source = htons(snum); sk->daddr = 0; sk->dummy_th.dest = 0; sk->prot->rehash(sk); @@ -868,9 +882,6 @@ case SIOCDARP: case SIOCGARP: case SIOCSARP: - case OLD_SIOCDARP: - case OLD_SIOCGARP: - case OLD_SIOCSARP: return(arp_ioctl(cmd,(void *) arg)); case SIOCDRARP: case SIOCGRARP: @@ -889,10 +900,12 @@ case SIOCSIFNETMASK: case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: + case SIOCSIFPFLAGS: + case SIOCGIFPFLAGS: + case SIOCSIFFLAGS: return(devinet_ioctl(cmd,(void *) arg)); case SIOCGIFCONF: case SIOCGIFFLAGS: - case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCGIFMETRIC: @@ -908,9 +921,10 @@ case SIOCGIFMAP: case SIOCSIFSLAVE: case SIOCGIFSLAVE: - case SIOGIFINDEX: - case SIOGIFNAME: - case SIOCGIFCOUNT: + case SIOCGIFINDEX: + case SIOCGIFNAME: + case SIOCGIFCOUNT: + case SIOCSIFHWBROADCAST: return(dev_ioctl(cmd,(void *) arg)); case SIOCGIFBR: @@ -1105,6 +1119,16 @@ icmp_init(&inet_family_ops); + /* I wish inet_add_protocol had no constructor hook... + I had to move IPIP from net/ipv4/protocol.c :-( --ANK + */ +#ifdef CONFIG_NET_IPIP + ipip_init(); +#endif +#ifdef CONFIG_NET_IPGRE + ipgre_init(); +#endif + /* * Set the firewalling up */ @@ -1114,20 +1138,12 @@ #ifdef CONFIG_IP_MASQUERADE ip_masq_init(); #endif - + /* * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) ip_mr_init(); -#endif - - /* - * Initialise AF_INET alias type (register net_alias_type) - */ - -#if defined(CONFIG_IP_ALIAS) - ip_alias_init(); #endif #ifdef CONFIG_INET_RARP diff -u --recursive --new-file v2.1.67/linux/net/ipv4/arp.c linux/net/ipv4/arp.c --- v2.1.67/linux/net/ipv4/arp.c Tue Sep 23 16:48:50 1997 +++ linux/net/ipv4/arp.c Sun Nov 30 14:00:39 1997 @@ -1,5 +1,7 @@ /* linux/net/inet/arp.c * + * Version: $Id: arp.c,v 1.56 1997/11/24 12:51:47 freitag Exp $ + * * Copyright (C) 1994 by Florian La Roche * * This module implements the Address Resolution Protocol ARP (RFC 826), @@ -58,6 +60,8 @@ * folded into the mainstream FDDI code. * Ack spit, Linus how did you allow that * one in... + * Jes Sorensen : Make FDDI work again in 2.1.x and + * clean up the APFDDI & gen. FDDI bits. */ /* RFC1122 Status: @@ -105,7 +109,6 @@ #include #endif #endif -#include #ifdef CONFIG_ARPD #include #endif @@ -251,6 +254,7 @@ #ifdef CONFIG_ARPD static int arpd_not_running; static int arpd_stamp; +struct sock *arpd_sk; #endif static void arp_check_expire (unsigned long); @@ -428,8 +432,6 @@ static __inline__ void arpd_update(u32 ip, struct device *dev, char *ha) { - if (arpd_not_running) - return; arpd_send(ARPD_UPDATE, ip, dev, ha, jiffies); } @@ -440,8 +442,6 @@ static __inline__ void arpd_lookup(u32 addr, struct device * dev) { - if (arpd_not_running) - return; arpd_send(ARPD_LOOKUP, addr, dev, NULL, 0); } @@ -451,13 +451,11 @@ static __inline__ void arpd_flush(struct device * dev) { - if (arpd_not_running) - return; arpd_send(ARPD_FLUSH, 0, dev, NULL, 0); } -static int arpd_callback(int minor, struct sk_buff *skb) +static int arpd_callback(struct sk_buff *skb, struct sock *sk) { struct device * dev; struct arpd_request *retreq; @@ -484,7 +482,9 @@ /* * Invalid mapping: drop it and send ARP broadcast. */ - arp_send(ARPOP_REQUEST, ETH_P_ARP, retreq->ip, dev, dev->pa_addr, NULL, + arp_send(ARPOP_REQUEST, ETH_P_ARP, retreq->ip, dev, + inet_select_addr(dev, retreq->ip, RT_SCOPE_LINK), + NULL, dev->dev_addr, NULL); } else @@ -658,8 +658,8 @@ entry->timer.expires = jiffies + ARP_CONFIRM_TIMEOUT; add_timer(&entry->timer); arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, - dev, dev->pa_addr, entry->u.neigh.ha, - dev->dev_addr, NULL); + dev, inet_select_addr(dev, entry->ip, RT_SCOPE_LINK), + entry->u.neigh.ha, dev->dev_addr, NULL); #if RT_CACHE_DEBUG >= 2 printk("arp_expire: %08x requires confirmation\n", entry->ip); #endif @@ -710,7 +710,8 @@ /* Set new timer. */ entry->timer.expires = jiffies + sysctl_arp_res_time; add_timer(&entry->timer); - arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev, dev->pa_addr, + arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev, + inet_select_addr(dev, entry->ip, RT_SCOPE_LINK), entry->retries > sysctl_arp_max_tries ? entry->u.neigh.ha : NULL, dev->dev_addr, NULL); return; @@ -749,7 +750,8 @@ entry->timer.expires = jiffies + sysctl_arp_dead_res_time; add_timer(&entry->timer); - arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev, dev->pa_addr, + arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev, + inet_select_addr(dev, entry->ip, RT_SCOPE_LINK), NULL, dev->dev_addr, NULL); return; } @@ -797,9 +799,7 @@ entry = (struct arp_table *)neigh_alloc(sizeof(struct arp_table), &arp_neigh_ops); - - if (entry != NULL) - { + if (entry != NULL) { atomic_set(&entry->u.neigh.refcnt, 1); if (how) @@ -953,19 +953,19 @@ for (entry = arp_tables[HASH(paddr)]; entry != NULL; entry = entry->u.next) if (entry->ip == paddr && entry->u.neigh.dev == dev) - return entry; - return NULL; + break; + return entry; } static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct device * dev) { switch (addr_hint) { - case IS_MYADDR: + case RTN_LOCAL: printk(KERN_DEBUG "ARP: arp called for own IP address\n"); memcpy(haddr, dev->dev_addr, dev->addr_len); return 1; - case IS_MULTICAST: + case RTN_MULTICAST: if(dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802 || dev->type==ARPHRD_FDDI) { @@ -985,7 +985,7 @@ * If a device does not support multicast broadcast the stuff (eg AX.25 for now) */ - case IS_BROADCAST: + case RTN_BROADCAST: memcpy(haddr, dev->broadcast, dev->addr_len); return 1; } @@ -1007,11 +1007,17 @@ else #endif arp_send(ARPOP_REQUEST, ETH_P_ARP, entry->ip, dev, - dev->pa_addr, NULL, dev->dev_addr, NULL); + inet_select_addr(dev, entry->ip, RT_SCOPE_LINK), NULL, + dev->dev_addr, NULL); } /* * Create a new unresolved entry. + * + * NOTE: Always make sure no possibility of sleeping is introduced here, + * since nearly all callers are inside of BH atomic. Don't let + * the arp_alloc() fool you, at neigh_alloc() it is using GFP_ATOMIC + * always. */ struct arp_table * arp_new_entry(u32 paddr, struct device *dev, struct sk_buff *skb) @@ -1049,7 +1055,6 @@ struct device *dev = skb->dev; u32 paddr; struct arp_table *entry; - unsigned long hash; if (!skb->dst) { printk(KERN_DEBUG "arp_find called with dst==NULL\n"); @@ -1058,14 +1063,11 @@ paddr = ((struct rtable*)skb->dst)->rt_gateway; - if (arp_set_predefined(__ip_chk_addr(paddr), haddr, paddr, dev)) { - if (skb) - skb->arp = 1; + if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) { + skb->arp = 1; return 0; } - hash = HASH(paddr); - start_bh_atomic(); /* @@ -1079,8 +1081,7 @@ { entry->u.neigh.lastused = jiffies; memcpy(haddr, entry->u.neigh.ha, dev->addr_len); - if (skb) - skb->arp = 1; + skb->arp = 1; end_bh_atomic(); return 0; } @@ -1090,24 +1091,17 @@ * queue the packet with the previous attempt */ - if (skb != NULL) - { - if (entry->last_updated) - { - if (entry->u.neigh.arp_queue.qlen < ARP_MAX_UNRES_PACKETS) - skb_queue_tail(&entry->u.neigh.arp_queue, skb); - else - kfree_skb(skb, FREE_WRITE); - } - /* - * If last_updated==0 host is dead, so - * drop skb's and set socket error. - */ + if (entry->last_updated) { + if (entry->u.neigh.arp_queue.qlen < ARP_MAX_UNRES_PACKETS) + skb_queue_tail(&entry->u.neigh.arp_queue, skb); else - { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); kfree_skb(skb, FREE_WRITE); - } + } else { + /* If last_updated==0 host is dead, so + * drop skb's and set socket error. + */ + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + kfree_skb(skb, FREE_WRITE); } end_bh_atomic(); return 1; @@ -1115,7 +1109,7 @@ entry = arp_new_entry(paddr, dev, skb); - if (skb != NULL && !entry) + if (entry == NULL) kfree_skb(skb, FREE_WRITE); end_bh_atomic(); @@ -1129,12 +1123,13 @@ struct device *dev = dst->dev; u32 paddr = rt->rt_gateway; struct arp_table *entry; - unsigned long hash; if (!neigh) { - if ((rt->rt_flags & RTF_MULTICAST) && - (dev->type==ARPHRD_ETHER || dev->type==ARPHRD_IEEE802)) + if (rt->rt_type == RTN_MULTICAST && + (dev->type == ARPHRD_ETHER || + dev->type == ARPHRD_IEEE802 || + dev->type == ARPHRD_FDDI)) { u32 taddr; haddr[0]=0x01; @@ -1148,12 +1143,12 @@ haddr[3]=taddr&0x7f; return 1; } - if (rt->rt_flags & (RTF_BROADCAST|RTF_MULTICAST)) + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) { memcpy(haddr, dev->broadcast, dev->addr_len); return 1; } - if (rt->rt_flags & RTF_LOCAL) + if (rt->rt_flags & RTCF_LOCAL) { printk(KERN_DEBUG "ARP: arp called for own IP address\n"); memcpy(haddr, dev->dev_addr, dev->addr_len); @@ -1162,8 +1157,6 @@ return 0; } - hash = HASH(paddr); - start_bh_atomic(); entry = (struct arp_table*)neigh; @@ -1187,17 +1180,14 @@ struct device *dev = rt->u.dst.dev; u32 paddr = rt->rt_gateway; struct arp_table *entry; - unsigned long hash; if (dst->ops->family != AF_INET) return NULL; if ((dev->flags & (IFF_LOOPBACK|IFF_NOARP)) || - (rt->rt_flags & (RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST))) + (rt->rt_flags & (RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST))) return NULL; - hash = HASH(paddr); - start_bh_atomic(); /* @@ -1213,8 +1203,10 @@ return (struct neighbour*)entry; } - if (!resolve) + if (!resolve) { + end_bh_atomic(); return NULL; + } entry = arp_new_entry(paddr, dev, NULL); @@ -1256,17 +1248,19 @@ */ skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) - + dev->hard_header_len, GFP_ATOMIC); + + dev->hard_header_len + 15, GFP_ATOMIC); if (skb == NULL) { printk(KERN_DEBUG "ARP: no memory to send an arp packet\n"); return; } - skb_reserve(skb, dev->hard_header_len); + + skb_reserve(skb, (dev->hard_header_len+15)&~15); + skb->nh.raw = skb->data; arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); skb->arp = 1; skb->dev = dev; - skb->protocol = htons (ETH_P_ARP); + skb->protocol = __constant_htons (ETH_P_ARP); /* * Fill the device header for the ARP frame @@ -1295,7 +1289,7 @@ arp->ar_pro = (dev->type != ARPHRD_AX25) ? htons(ETH_P_IP) : htons(AX25_P_IP); #endif #else - arp->ar_pro = htons(ETH_P_IP); + arp->ar_pro = __constant_htons(ETH_P_IP); #endif arp->ar_hln = dev->addr_len; arp->ar_pln = 4; @@ -1319,6 +1313,20 @@ dev_queue_xmit(skb); } +static __inline__ int arp_check_published(u32 tip, struct device *dev) +{ + struct arp_table *entry; + + for (entry = arp_proxy_list; entry; entry = entry->u.next) { + if (!((entry->ip^tip)&entry->mask) && + ((!entry->u.neigh.dev && + (!(entry->flags & ATF_COM) || entry->hatype == dev->type)) + || entry->u.neigh.dev == dev) ) + break; + } + + return entry && !(entry->flags & ATF_DONTPUB); +} /* * Receive an arp request by the device layer. @@ -1331,6 +1339,7 @@ struct rtable *rt; unsigned char *sha, *tha; u32 sip, tip; + u16 dev_type = dev->type; /* * The hardware length of the packet should match the hardware length @@ -1339,45 +1348,38 @@ * is not from an IP number. We can't currently handle this, so toss * it. */ -#if defined(CONFIG_FDDI) || defined(CONFIG_AP1000) - if (dev->type == ARPHRD_FDDI) +#if defined(CONFIG_FDDI) + if (dev_type == ARPHRD_FDDI) { /* * According to RFC 1390, FDDI devices should accept ARP hardware types * of 1 (Ethernet). However, to be more robust, we'll accept hardware * types of either 1 (Ethernet) or 6 (IEEE 802.2). */ + if (arp->ar_hln != dev->addr_len || ((ntohs(arp->ar_hrd) != ARPHRD_ETHER) && (ntohs(arp->ar_hrd) != ARPHRD_IEEE802)) || dev->flags & IFF_NOARP || skb->pkt_type == PACKET_OTHERHOST || arp->ar_pln != 4) - { - kfree_skb(skb, FREE_READ); - return 0; - } + goto out; } else { if (arp->ar_hln != dev->addr_len || - dev->type != ntohs(arp->ar_hrd) || + dev_type != ntohs(arp->ar_hrd) || dev->flags & IFF_NOARP || skb->pkt_type == PACKET_OTHERHOST || arp->ar_pln != 4) - { - kfree_skb(skb, FREE_READ); - return 0; - } + goto out; } #else if (arp->ar_hln != dev->addr_len || - dev->type != ntohs(arp->ar_hrd) || + dev_type != ntohs(arp->ar_hrd) || dev->flags & IFF_NOARP || skb->pkt_type == PACKET_OTHERHOST || - arp->ar_pln != 4) { - kfree_skb(skb, FREE_READ); - return 0; - } + arp->ar_pln != 4) + goto out; #endif /* @@ -1387,24 +1389,18 @@ * problem, so toss the packet. */ - switch (dev->type) + switch (dev_type) { #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) case ARPHRD_AX25: if(arp->ar_pro != htons(AX25_P_IP)) - { - kfree_skb(skb, FREE_READ); - return 0; - } + goto out; break; #endif #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) case ARPHRD_NETROM: if(arp->ar_pro != htons(AX25_P_IP)) - { - kfree_skb(skb, FREE_READ); - return 0; - } + goto out; break; #endif case ARPHRD_ETHER: @@ -1412,23 +1408,19 @@ case ARPHRD_METRICOM: case ARPHRD_IEEE802: case ARPHRD_FDDI: + case ARPHRD_IPGRE: if(arp->ar_pro != htons(ETH_P_IP)) - { - kfree_skb(skb, FREE_READ); - return 0; - } + goto out; break; default: printk(KERN_ERR "ARP: dev->type mangled!\n"); - kfree_skb(skb, FREE_READ); - return 0; + goto out; } /* * Extract fields */ - sha=arp_ptr; arp_ptr += dev->addr_len; memcpy(&sip, arp_ptr, 4); @@ -1440,21 +1432,8 @@ * Check for bad requests for 127.x.x.x and requests for multicast * addresses. If this is one such, delete it. */ - if (LOOPBACK(tip) || MULTICAST(tip)) { - kfree_skb(skb, FREE_READ); - return 0; - } - if (ip_route_input(skb, tip, sip, 0, dev)) { - kfree_skb(skb, FREE_READ); - return 0; - } - dev = skb->dev; - rt = (struct rtable*)skb->dst; - if (dev->type != ntohs(arp->ar_hrd) || dev->flags&IFF_NOARP || - rt->rt_flags&RTF_BROADCAST) { - kfree_skb(skb, FREE_READ); - return 0; - } + if (LOOPBACK(tip) || MULTICAST(tip)) + goto out; /* * Process entry. The idea here is we want to send a reply if it is a @@ -1472,31 +1451,31 @@ * and in the case of requests for us we add the requester to the arp * cache. */ - if (arp->ar_op == htons(ARPOP_REQUEST)) { - struct arp_table *entry; - - for (entry = arp_proxy_list; entry; entry = entry->u.next) { - if (!((entry->ip^tip)&entry->mask) && - ((!entry->u.neigh.dev && - (!(entry->flags & ATF_COM) || entry->hatype == dev->type)) - || entry->u.neigh.dev == dev) ) - break; - } + int addr_type; + struct in_device *in_dev = dev->ip_ptr; - if (entry && !(entry->flags & ATF_DONTPUB)) { - char *ha = (entry->flags & ATF_COM) ? entry->u.neigh.ha : dev->dev_addr; - - if (rt->rt_flags&(RTF_LOCAL|RTF_NAT) || - (!(rt->rt_flags&RTCF_DOREDIRECT) && - rt->u.dst.dev != dev)) - arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,ha,sha); - } + if (ip_route_input(skb, tip, sip, 0, dev)) + goto out; + rt = (struct rtable*)skb->dst; + addr_type = rt->rt_type; + + if (addr_type == RTN_LOCAL || (rt->rt_flags&RTCF_DNAT) || + (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && + ((in_dev && IN_DEV_PROXY_ARP(in_dev) && IN_DEV_FORWARD(in_dev)) || + arp_check_published(tip, dev)))) + arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); + } else { + if (arp->ar_op != htons(ARPOP_REPLY) || + inet_addr_type(sip) != RTN_UNICAST) + goto out; } start_bh_atomic(); - arp_update(sip, sha, dev, 0, !RT_LOCALADDR(rt->rt_flags) && dev->type != ARPHRD_METRICOM); + arp_update(sip, sha, dev, 0, arp->ar_op == htons(ARPOP_REPLY)); end_bh_atomic(); + +out: kfree_skb(skb, FREE_READ); return 0; } @@ -1554,13 +1533,13 @@ if ((r->arp_flags & ATF_PERM) && !(r->arp_flags & ATF_COM)) return -EINVAL; - err = ip_route_output(&rt, ip, 0, 1, dev); + err = ip_route_output(&rt, ip, 0, 1, dev ? dev->ifindex : 0); if (err) return err; if (!dev) dev = rt->u.dst.dev; - if (rt->rt_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTCF_NAT)) { - if (rt->rt_flags&RTF_BROADCAST && + if (rt->rt_flags&(RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST|RTCF_DNAT)) { + if (rt->rt_flags&RTCF_BROADCAST && dev->type == ARPHRD_METRICOM && r->arp_ha.sa_family == ARPHRD_METRICOM) { memcpy(dev->broadcast, r->arp_ha.sa_data, dev->addr_len); @@ -1578,7 +1557,7 @@ if (dev && r->arp_ha.sa_family != dev->type) return -EINVAL; - + start_bh_atomic(); if (!(r->arp_flags & ATF_PUBL)) @@ -1991,7 +1970,7 @@ #endif #ifdef CONFIG_ARPD - netlink_attach(NETLINK_ARPD, arpd_callback); + arpd_sk = netlink_kernel_create(NETLINK_ARPD, arpd_callback); #endif } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/devinet.c linux/net/ipv4/devinet.c --- v2.1.67/linux/net/ipv4/devinet.c Mon Jun 16 16:36:01 1997 +++ linux/net/ipv4/devinet.c Sun Nov 30 14:00:39 1997 @@ -1,6 +1,8 @@ /* * NET3 IP device support routines. * + * Version: $Id: devinet.c,v 1.14 1997/10/10 22:40:44 davem Exp $ + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -13,9 +15,13 @@ * * Additional Authors: * Alan Cox, + * Alexey Kuznetsov, + * + * Changes: + * Alexey Kuznetsov: pa_* fields are replaced with ifaddr lists. */ -#include /* For CONFIG_IP_CLASSLESS */ +#include #include #include @@ -34,72 +40,336 @@ #include #include #include -#include -#include -#include -#include -#include #include -#include -#include +#include +#include #include -#include +#include +#include #ifdef CONFIG_KERNELD #include #endif -extern struct notifier_block *netdev_chain; +#include +#include +#include -/* - * Determine a default network mask, based on the IP address. +#ifdef CONFIG_RTNETLINK +static void rtmsg_ifa(int event, struct in_ifaddr *); +#else +#define rtmsg_ifa(a,b) do { } while(0) +#endif + +static struct notifier_block *inetaddr_chain; +static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); + + +int inet_ifa_count; +int inet_dev_count; + +static struct in_ifaddr * inet_alloc_ifa(void) +{ + struct in_ifaddr *ifa; + + ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); + if (ifa) { + memset(ifa, 0, sizeof(*ifa)); + inet_ifa_count++; + } + + return ifa; +} + +static __inline__ void inet_free_ifa(struct in_ifaddr *ifa) +{ + kfree_s(ifa, sizeof(*ifa)); + inet_ifa_count--; +} + +struct in_device *inetdev_init(struct device *dev) +{ + struct in_device *in_dev; + + in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL); + if (!in_dev) + return NULL; + inet_dev_count++; + memset(in_dev, 0, sizeof(*in_dev)); + in_dev->dev = dev; + dev->ip_ptr = in_dev; + ip_mc_init_dev(in_dev); + return in_dev; +} + +static void inetdev_destroy(struct in_device *in_dev) +{ + struct in_ifaddr *ifa; + + ip_mc_destroy_dev(in_dev); + + while ((ifa = in_dev->ifa_list) != NULL) { + inet_del_ifa(in_dev, &in_dev->ifa_list, 0); + inet_free_ifa(ifa); + } + + in_dev->dev->ip_ptr = NULL; + kfree(in_dev); +} + +struct in_ifaddr * inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) +{ + for_primary_ifa(in_dev) { + if (inet_ifa_match(a, ifa)) { + if (!b || inet_ifa_match(b, ifa)) + return ifa; + } + } endfor_ifa(in_dev); + return NULL; +} + +static void +inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy) +{ + struct in_ifaddr *ifa1 = *ifap; + struct in_ifaddr *ifa; + + /* 1. Unlink it */ + + *ifap = ifa1->ifa_next; + + /* 2. Deleting primary ifaddr forces deletion all secondaries */ + + if (!(ifa1->ifa_flags&IFA_F_SECONDARY)) { + while ((ifa=*ifap) != NULL) { + if (ifa1->ifa_mask != ifa->ifa_mask || + !inet_ifa_match(ifa1->ifa_address, ifa)) { + ifap = &ifa->ifa_next; + continue; + } + *ifap = ifa->ifa_next; + rtmsg_ifa(RTM_DELADDR, ifa); + notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); + inet_free_ifa(ifa); + } + } + + /* 3. Announce address deletion */ + + /* Send message first, then call notifier. + At first sight, FIB update triggered by notifier + will refer to already deleted ifaddr, that could confuse + netlink listeners. It is not true: look, gated sees + that route deleted and if it still thinks that ifaddr + is valid, it will try to restore deleted routes... Grr. + So that, this order is correct. + */ + rtmsg_ifa(RTM_DELADDR, ifa1); + notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); + if (destroy) { + inet_free_ifa(ifa1); + if (in_dev->ifa_list == NULL) + inetdev_destroy(in_dev); + } +} + +static int +inet_insert_ifa(struct in_device *in_dev, struct in_ifaddr *ifa) +{ + struct in_ifaddr *ifa1, **ifap, **last_primary; + + if (ifa->ifa_local == 0) { + inet_free_ifa(ifa); + return 0; + } + + ifa->ifa_flags &= ~IFA_F_SECONDARY; + last_primary = &in_dev->ifa_list; + + for (ifap=&in_dev->ifa_list; (ifa1=*ifap)!=NULL; ifap=&ifa1->ifa_next) { + if (!(ifa1->ifa_flags&IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope) + last_primary = &ifa1->ifa_next; + if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) { + if (ifa1->ifa_local == ifa->ifa_local) { + inet_free_ifa(ifa); + return -EEXIST; + } + if (ifa1->ifa_scope != ifa->ifa_scope) { + inet_free_ifa(ifa); + return -EINVAL; + } + ifa->ifa_flags |= IFA_F_SECONDARY; + } + } + + if (!(ifa->ifa_flags&IFA_F_SECONDARY)) + ifap = last_primary; + + cli(); + ifa->ifa_next = *ifap; + *ifap = ifa; + sti(); + + /* Send message first, then call notifier. + Notifier will trigger FIB update, so that + listeners of netlink will know about new ifaddr */ + rtmsg_ifa(RTM_NEWADDR, ifa); + notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); + + return 0; +} + +static int +inet_set_ifa(struct device *dev, struct in_ifaddr *ifa) +{ + struct in_device *in_dev = dev->ip_ptr; + + if (in_dev == NULL) { + in_dev = inetdev_init(dev); + if (in_dev == NULL) { + inet_free_ifa(ifa); + return -ENOBUFS; + } + } + ifa->ifa_dev = in_dev; + if (LOOPBACK(ifa->ifa_local)) + ifa->ifa_scope = RT_SCOPE_HOST; + return inet_insert_ifa(in_dev, ifa); +} + +struct in_device *inetdev_by_index(int ifindex) +{ + struct device *dev; + dev = dev_get_by_index(ifindex); + if (dev) + return dev->ip_ptr; + return NULL; +} + +struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask) +{ + for_primary_ifa(in_dev) { + if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) + return ifa; + } endfor_ifa(in_dev); + return NULL; +} + +#ifdef CONFIG_RTNETLINK + +/* rtm_{add|del} functions are not reenterable, so that + this structure can be made static */ -static unsigned long ip_get_mask(unsigned long addr) +int +inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - unsigned long dst; + struct kern_ifa *k_ifa = arg; + struct in_device *in_dev; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in_ifaddr *ifa, **ifap; + + if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) + return -EADDRNOTAVAIL; + + for (ifap=&in_dev->ifa_list; (ifa=*ifap)!=NULL; ifap=&ifa->ifa_next) { + if ((k_ifa->ifa_local && memcmp(k_ifa->ifa_local, &ifa->ifa_local, 4)) || + (k_ifa->ifa_label && strcmp(k_ifa->ifa_label, ifa->ifa_label)) || + (k_ifa->ifa_address && + (ifm->ifa_prefixlen != ifa->ifa_prefixlen || + !inet_ifa_match(*(u32*)k_ifa->ifa_address, ifa)))) + continue; + inet_del_ifa(in_dev, ifap, 1); + return 0; + } - if (ZERONET(addr)) - return(0L); /* special case */ + return -EADDRNOTAVAIL; +} - dst = ntohl(addr); - if (IN_CLASSA(dst)) - return(htonl(IN_CLASSA_NET)); - if (IN_CLASSB(dst)) - return(htonl(IN_CLASSB_NET)); - if (IN_CLASSC(dst)) - return(htonl(IN_CLASSC_NET)); - - /* - * Something else, probably a multicast. - */ - - return(0); +int +inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct kern_ifa *k_ifa = arg; + struct device *dev; + struct in_device *in_dev; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in_ifaddr *ifa; + + if (ifm->ifa_prefixlen > 32 || k_ifa->ifa_local == NULL) + return -EINVAL; + + if ((dev = dev_get_by_index(ifm->ifa_index)) == NULL) + return -ENODEV; + + if ((in_dev = dev->ip_ptr) == NULL) { + in_dev = inetdev_init(dev); + if (!in_dev) + return -ENOBUFS; + } + + if ((ifa = inet_alloc_ifa()) == NULL) + return -ENOBUFS; + + if (k_ifa->ifa_address == NULL) + k_ifa->ifa_address = k_ifa->ifa_local; + memcpy(&ifa->ifa_local, k_ifa->ifa_local, 4); + memcpy(&ifa->ifa_address, k_ifa->ifa_address, 4); + ifa->ifa_prefixlen = ifm->ifa_prefixlen; + ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); + if (k_ifa->ifa_broadcast) + memcpy(&ifa->ifa_broadcast, k_ifa->ifa_broadcast, 4); + if (k_ifa->ifa_anycast) + memcpy(&ifa->ifa_anycast, k_ifa->ifa_anycast, 4); + ifa->ifa_flags = ifm->ifa_flags; + ifa->ifa_scope = ifm->ifa_scope; + ifa->ifa_dev = in_dev; + if (k_ifa->ifa_label) + memcpy(ifa->ifa_label, k_ifa->ifa_label, IFNAMSIZ); + else + memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + + return inet_insert_ifa(in_dev, ifa); } +#endif -/* - * This checks bitmasks for the ioctl calls for devices. +/* + * Determine a default network mask, based on the IP address. */ - -static inline int bad_mask(__u32 mask, __u32 addr) + +static __inline__ int inet_abc_len(u32 addr) { - if (addr & (mask = ~mask)) - return 1; - mask = ntohl(mask); - if (mask & (mask+1)) - return 1; - return 0; + if (ZERONET(addr)) + return 0; + + addr = ntohl(addr); + if (IN_CLASSA(addr)) + return 8; + if (IN_CLASSB(addr)) + return 16; + if (IN_CLASSC(addr)) + return 24; + + /* + * Something else, probably a multicast. + */ + + return -1; } - + int devinet_ioctl(unsigned int cmd, void *arg) { struct ifreq ifr; + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; + struct in_device *in_dev; + struct in_ifaddr **ifap = NULL; + struct in_ifaddr *ifa = NULL; struct device *dev; - __u32 addr; -#ifdef CONFIG_NET_ALIAS - int err; +#ifdef CONFIG_IP_ALIAS + char *colon; #endif + int exclusive = 0; + int ret = 0; /* * Fetch the caller's info block into kernel space @@ -107,191 +377,483 @@ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; + ifr.ifr_name[IFNAMSIZ-1] = 0; + +#ifdef CONFIG_IP_ALIAS + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; +#endif - /* - * See which interface the caller is talking about. - */ - - /* - * - * net_alias_dev_get(): dev_get() with added alias naming magic. - * only allow alias creation/deletion if (getset==SIOCSIFADDR) - * - */ - #ifdef CONFIG_KERNELD dev_load(ifr.ifr_name); -#endif +#endif -#ifdef CONFIG_NET_ALIAS - if ((dev = net_alias_dev_get(ifr.ifr_name, cmd == SIOCSIFADDR, &err, NULL, NULL)) == NULL) - return(err); -#else - if ((dev = dev_get(ifr.ifr_name)) == NULL) - return(-ENODEV); + switch(cmd) { + case SIOCGIFADDR: /* Get interface address */ + case SIOCGIFBRDADDR: /* Get the broadcast address */ + case SIOCGIFDSTADDR: /* Get the destination address */ + case SIOCGIFNETMASK: /* Get the netmask for the interface */ + case SIOCGIFPFLAGS: /* Get per device sysctl controls */ + /* Note that this ioctls will not sleep, + so that we do not impose a lock. + One day we will be forced to put shlock here (I mean SMP) + */ + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + break; + + case SIOCSIFFLAGS: + if (!suser()) + return -EACCES; + rtnl_lock(); + exclusive = 1; + break; + case SIOCSIFADDR: /* Set interface address (and family) */ + case SIOCSIFBRDADDR: /* Set the broadcast address */ + case SIOCSIFDSTADDR: /* Set the destination address */ + case SIOCSIFNETMASK: /* Set the netmask for the interface */ + case SIOCSIFPFLAGS: /* Set per device sysctl controls */ + if (!suser()) + return -EACCES; + if (sin->sin_family != AF_INET) + return -EINVAL; + rtnl_lock(); + exclusive = 1; + break; + default: + return -EINVAL; + } + + + if ((dev = dev_get(ifr.ifr_name)) == NULL) { + ret = -ENODEV; + goto done; + } + +#ifdef CONFIG_IP_ALIAS + if (colon) + *colon = ':'; #endif - if (cmd != SIOCSIFADDR && dev->family != AF_INET) - return(-EINVAL); + if ((in_dev=dev->ip_ptr) != NULL) { + for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) + if (strcmp(ifr.ifr_name, ifa->ifa_label) == 0) + break; + } + + if (ifa == NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) { + ret = -EADDRNOTAVAIL; + goto done; + } + + switch(cmd) { + case SIOCGIFADDR: /* Get interface address */ + sin->sin_addr.s_addr = ifa->ifa_local; + goto rarok; - switch(cmd) - { - case SIOCGIFADDR: /* Get interface address (and family) */ - if (ifr.ifr_addr.sa_family == AF_UNSPEC) - { - memcpy(ifr.ifr_hwaddr.sa_data, dev->dev_addr, MAX_ADDR_LEN); - ifr.ifr_hwaddr.sa_family = dev->type; - } - else - { - (*(struct sockaddr_in *) - &ifr.ifr_addr).sin_addr.s_addr = dev->pa_addr; - (*(struct sockaddr_in *) - &ifr.ifr_addr).sin_family = dev->family; - (*(struct sockaddr_in *) - &ifr.ifr_addr).sin_port = 0; + case SIOCGIFBRDADDR: /* Get the broadcast address */ + sin->sin_addr.s_addr = ifa->ifa_broadcast; + goto rarok; + + case SIOCGIFDSTADDR: /* Get the destination address */ + sin->sin_addr.s_addr = ifa->ifa_address; + goto rarok; + + case SIOCGIFNETMASK: /* Get the netmask for the interface */ + sin->sin_addr.s_addr = ifa->ifa_mask; + goto rarok; + + case SIOCGIFPFLAGS: + ifr.ifr_flags = in_dev->flags; + goto rarok; + + case SIOCSIFFLAGS: +#ifdef CONFIG_IP_ALIAS + if (colon) { + if (ifa == NULL) { + ret = -EADDRNOTAVAIL; + break; + } + if (!(ifr.ifr_flags&IFF_UP)) + inet_del_ifa(in_dev, ifap, 1); + break; } +#endif + ret = dev_change_flags(dev, ifr.ifr_flags); break; - case SIOCSIFADDR: /* Set interface address (and family) */ - - if (!suser()) - return -EPERM; + case SIOCSIFPFLAGS: + in_dev->flags = ifr.ifr_flags; + break; - /* - * BSDism. SIOCSIFADDR family=AF_UNSPEC sets the - * physical address. We can cope with this now. - */ - - if(ifr.ifr_addr.sa_family==AF_UNSPEC) - { - int ret; - if(dev->set_mac_address==NULL) - return -EOPNOTSUPP; - ret = dev->set_mac_address(dev,&ifr.ifr_addr); - if (!ret) - notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); - return ret; + case SIOCSIFADDR: /* Set interface address (and family) */ + if (inet_abc_len(sin->sin_addr.s_addr) < 0) { + ret = -EINVAL; + break; } - if(ifr.ifr_addr.sa_family!=AF_INET) - return -EINVAL; - addr = (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr; - - dev_lock_wait(); - dev_lock_list(); - - if (dev->family == AF_INET && addr == dev->pa_addr) { - dev_unlock_list(); - return 0; + if (!ifa) { + if ((ifa = inet_alloc_ifa()) == NULL) { + ret = -ENOBUFS; + break; + } +#ifdef CONFIG_IP_ALIAS + if (colon) + memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); + else +#endif + memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + } else { + ret = 0; + if (ifa->ifa_local == sin->sin_addr.s_addr) + break; + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_broadcast = 0; + ifa->ifa_anycast = 0; + ifa->ifa_prefixlen = 32; + ifa->ifa_mask = inet_make_mask(32); } - if (dev->flags & IFF_UP) - notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); - - /* - * if dev is an alias, must rehash to update - * address change - */ + ifa->ifa_address = + ifa->ifa_local = sin->sin_addr.s_addr; -#ifdef CONFIG_NET_ALIAS - if (net_alias_is(dev)) - net_alias_dev_rehash(dev, &ifr.ifr_addr); -#endif - dev->pa_addr = addr; - dev->ip_flags |= IFF_IP_ADDR_OK; - dev->ip_flags &= ~(IFF_IP_BRD_OK|IFF_IP_MASK_OK); - dev->family = AF_INET; - if (dev->flags & IFF_POINTOPOINT) { - dev->pa_mask = 0xFFFFFFFF; - dev->pa_brdaddr = 0xFFFFFFFF; - } else { - dev->pa_mask = ip_get_mask(dev->pa_addr); - dev->pa_brdaddr = dev->pa_addr|~dev->pa_mask; + if (!(dev->flags&IFF_POINTOPOINT)) { + ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); + ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); + if ((dev->flags&IFF_BROADCAST) && ifa->ifa_prefixlen < 31) + ifa->ifa_broadcast = ifa->ifa_address|~ifa->ifa_mask; } - if (dev->flags & IFF_UP) - notifier_call_chain(&netdev_chain, NETDEV_UP, dev); - dev_unlock_list(); - return 0; - - case SIOCGIFBRDADDR: /* Get the broadcast address */ - (*(struct sockaddr_in *) - &ifr.ifr_broadaddr).sin_addr.s_addr = dev->pa_brdaddr; - (*(struct sockaddr_in *) - &ifr.ifr_broadaddr).sin_family = dev->family; - (*(struct sockaddr_in *) - &ifr.ifr_broadaddr).sin_port = 0; + ret = inet_set_ifa(dev, ifa); break; case SIOCSIFBRDADDR: /* Set the broadcast address */ - if (!suser()) - return -EPERM; - - addr = (*(struct sockaddr_in *)&ifr.ifr_broadaddr).sin_addr.s_addr; - - if (dev->flags & IFF_UP) - ip_rt_change_broadcast(dev, addr); - dev->pa_brdaddr = addr; - dev->ip_flags |= IFF_IP_BRD_OK; - return 0; - - case SIOCGIFDSTADDR: /* Get the destination address (for point-to-point links) */ - (*(struct sockaddr_in *) - &ifr.ifr_dstaddr).sin_addr.s_addr = dev->pa_dstaddr; - (*(struct sockaddr_in *) - &ifr.ifr_dstaddr).sin_family = dev->family; - (*(struct sockaddr_in *) - &ifr.ifr_dstaddr).sin_port = 0; + if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_broadcast = sin->sin_addr.s_addr; + inet_insert_ifa(in_dev, ifa); + } break; - case SIOCSIFDSTADDR: /* Set the destination address (for point-to-point links) */ - if (!suser()) - return -EPERM; - addr = (*(struct sockaddr_in *)&ifr.ifr_dstaddr).sin_addr.s_addr; - if (addr == dev->pa_dstaddr) - return 0; - if (dev->flags & IFF_UP) - ip_rt_change_dstaddr(dev, addr); - dev->pa_dstaddr = addr; - return 0; - - case SIOCGIFNETMASK: /* Get the netmask for the interface */ - (*(struct sockaddr_in *) - &ifr.ifr_netmask).sin_addr.s_addr = dev->pa_mask; - (*(struct sockaddr_in *) - &ifr.ifr_netmask).sin_family = dev->family; - (*(struct sockaddr_in *) - &ifr.ifr_netmask).sin_port = 0; + case SIOCSIFDSTADDR: /* Set the destination address */ + if (ifa->ifa_address != sin->sin_addr.s_addr) { + if (inet_abc_len(sin->sin_addr.s_addr) < 0) { + ret = -EINVAL; + break; + } + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_address = sin->sin_addr.s_addr; + inet_insert_ifa(in_dev, ifa); + } break; case SIOCSIFNETMASK: /* Set the netmask for the interface */ - if (!suser()) - return -EPERM; - addr = (*(struct sockaddr_in *)&ifr.ifr_netmask).sin_addr.s_addr; - - if (addr == dev->pa_mask) { - dev->ip_flags |= IFF_IP_MASK_OK; - return 0; - } /* * The mask we set must be legal. */ - if (bad_mask(addr, 0)) - return -EINVAL; - if (addr == htonl(0xFFFFFFFE)) - return -EINVAL; - if (dev->flags & IFF_UP) - ip_rt_change_netmask(dev, addr); - dev->pa_mask = addr; - dev->ip_flags |= IFF_IP_MASK_OK; - dev->ip_flags &= ~IFF_IP_BRD_OK; - return 0; - default: - return -EINVAL; - + if (bad_mask(sin->sin_addr.s_addr, 0)) { + ret = -EINVAL; + break; + } + + if (ifa->ifa_mask != sin->sin_addr.s_addr) { + inet_del_ifa(in_dev, ifap, 0); + ifa->ifa_mask = sin->sin_addr.s_addr; + ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); + inet_set_ifa(dev, ifa); + } + break; } +done: + if (exclusive) + rtnl_unlock(); + return ret; + +rarok: if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; return 0; +} + +static int +inet_gifconf(struct device *dev, char *buf, int len) +{ + struct in_device *in_dev = dev->ip_ptr; + struct in_ifaddr *ifa; + struct ifreq ifr; + int done=0; + + if (in_dev==NULL || (ifa=in_dev->ifa_list)==NULL) + return 0; + + for ( ; ifa; ifa = ifa->ifa_next) { + if (!buf) { + done += sizeof(ifr); + continue; + } + if (len < sizeof(ifr)) + return done; + memset(&ifr, 0, sizeof(struct ifreq)); + if (ifa->ifa_label) + strcpy(ifr.ifr_name, ifa->ifa_label); + else + strcpy(ifr.ifr_name, dev->name); + + (*(struct sockaddr_in *) &ifr.ifr_addr).sin_family = AF_INET; + (*(struct sockaddr_in *) &ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local; + + if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) + return -EFAULT; + buf += sizeof(struct ifreq); + len -= sizeof(struct ifreq); + done += sizeof(struct ifreq); + } + return done; +} + +u32 inet_select_addr(struct device *dev, u32 dst, int scope) +{ + u32 addr = 0; + struct in_device *in_dev = dev->ip_ptr; + + if (in_dev == NULL) + return 0; + + for_primary_ifa(in_dev) { + if (ifa->ifa_scope > scope) + continue; + addr = ifa->ifa_local; + if (!dst || inet_ifa_match(dst, ifa)) + return addr; + } endfor_ifa(in_dev); + + return addr; +} + +/* + * Device notifier + */ + +int register_inetaddr_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&inetaddr_chain, nb); +} + +int unregister_inetaddr_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&inetaddr_chain,nb); +} + +static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct device *dev = ptr; + struct in_device *in_dev = dev->ip_ptr; + + if (in_dev == NULL) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + if (in_dev) + printk(KERN_DEBUG "inetdev_event: bug\n"); + dev->ip_ptr = NULL; + break; + case NETDEV_UP: + if (dev == &loopback_dev) { + struct in_ifaddr *ifa; + if ((ifa = inet_alloc_ifa()) != NULL) { + ifa->ifa_local = + ifa->ifa_address = htonl(INADDR_LOOPBACK); + ifa->ifa_prefixlen = 8; + ifa->ifa_mask = inet_make_mask(8); + ifa->ifa_dev = in_dev; + ifa->ifa_scope = RT_SCOPE_HOST; + inet_insert_ifa(in_dev, ifa); + } + } + ip_mc_up(in_dev); + break; + case NETDEV_DOWN: + ip_mc_down(in_dev); + break; + case NETDEV_UNREGISTER: + inetdev_destroy(in_dev); + break; + } + + return NOTIFY_DONE; +} + +struct notifier_block ip_netdev_notifier={ + inetdev_event, + NULL, + 0 +}; + +#ifdef CONFIG_RTNETLINK + +static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, + pid_t pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_INET; + ifm->ifa_prefixlen = ifa->ifa_prefixlen; + ifm->ifa_flags = ifa->ifa_flags; + ifm->ifa_scope = ifa->ifa_scope; + ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (ifa->ifa_prefixlen) + RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); + if (ifa->ifa_local) + RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); + if (ifa->ifa_broadcast) + RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); + if (ifa->ifa_anycast) + RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); + if (ifa->ifa_label[0]) + RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_put(skb, b - skb->tail); + return -1; +} + +static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx, ip_idx; + int s_idx, s_ip_idx; + struct device *dev; + struct in_device *in_dev; + struct in_ifaddr *ifa; + + s_idx = cb->args[0]; + s_ip_idx = ip_idx = cb->args[1]; + for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + if (idx < s_idx) + continue; + if (idx > s_idx) + s_ip_idx = 0; + if ((in_dev = dev->ip_ptr) == NULL) + continue; + for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; + ifa = ifa->ifa_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR) <= 0) + goto done; + } + } +done: + cb->args[0] = idx; + cb->args[1] = ip_idx; + + return skb->len; +} + +static void rtmsg_ifa(int event, struct in_ifaddr * ifa) +{ + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) { + netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); + return; + } + if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { + kfree_skb(skb, 0); + netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); + return; + } + NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL); +} + + +static struct rtnetlink_link inet_rtnetlink_table[RTM_MAX-RTM_BASE+1] = +{ + { NULL, NULL, }, + { NULL, NULL, }, + { NULL, rtnetlink_dump_ifinfo, }, + { NULL, NULL, }, + + { inet_rtm_newaddr, NULL, }, + { inet_rtm_deladdr, NULL, }, + { NULL, inet_dump_ifaddr, }, + { NULL, NULL, }, + + { inet_rtm_newroute, NULL, }, + { inet_rtm_delroute, NULL, }, + { inet_rtm_getroute, inet_dump_fib, }, + { NULL, NULL, }, + + { NULL, NULL, }, + { NULL, NULL, }, + { NULL, NULL, }, + { NULL, NULL, }, + +#ifdef CONFIG_IP_MULTIPLE_TABLES + { inet_rtm_newrule, NULL, }, + { inet_rtm_delrule, NULL, }, + { NULL, inet_dump_rules, }, + { NULL, NULL, }, +#else + { NULL, NULL, }, + { NULL, NULL, }, + { NULL, NULL, }, + { NULL, NULL, }, +#endif +}; + +#endif /* CONFIG_RTNETLINK */ + +#ifdef CONFIG_IP_PNP_BOOTP + +/* + * Addition and deletion of fake interface addresses + * for sending of BOOTP packets. In this case, we must + * set the local address to zero which is not permitted + * otherwise. + */ + +__initfunc(int inet_add_bootp_addr(struct device *dev)) +{ + struct in_device *in_dev = dev->ip_ptr; + struct in_ifaddr *ifa; + + if (!in_dev && !(in_dev = inetdev_init(dev))) + return -ENOBUFS; + if (!(ifa = inet_alloc_ifa())) + return -ENOBUFS; + ifa->ifa_dev = in_dev; + in_dev->ifa_list = ifa; + rtmsg_ifa(RTM_NEWADDR, ifa); + notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); + return 0; +} + +__initfunc(void inet_del_bootp_addr(struct device *dev)) +{ + if (dev->ip_ptr) + inetdev_destroy(dev->ip_ptr); +} + +#endif + +__initfunc(void devinet_init(void)) +{ + register_gifconf(AF_INET, inet_gifconf); + register_netdevice_notifier(&ip_netdev_notifier); +#ifdef CONFIG_RTNETLINK + rtnetlink_links[AF_INET] = inet_rtnetlink_table; +#endif } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/fib.c linux/net/ipv4/fib.c --- v2.1.67/linux/net/ipv4/fib.c Mon Aug 18 18:19:47 1997 +++ linux/net/ipv4/fib.c Wed Dec 31 16:00:00 1969 @@ -1,2077 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IPv4 Forwarding Information Base. - * - * Authors: Alexey Kuznetsov, - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * - * NOTE: This file is scheduled to be removed from kernel. - * The natural place for router FIB is user level - * routing daemon (it has to keep its copy in any case) - * - * Kernel should keep only interface routes and, - * if host is not router, default gateway. - * - * We have good proof that it is feasible and efficient - - * multicast routing. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct fib_class local_class = {RT_CLASS_LOCAL, }; -static struct fib_class default_class = {RT_CLASS_DEFAULT, }; -static struct fib_class main_class = {RT_CLASS_MAIN, }; -static struct fib_class *fib_classes[RT_CLASS_MAX+1]; - -static struct fib_rule *fib_rules; - -static struct fib_info *fib_info_list; - -static int fib_stamp; - -static int rtmsg_process(struct nlmsghdr *n, struct in_rtmsg *r); - - -#ifdef CONFIG_RTNETLINK - -static unsigned rt_nl_flags; -static int rt_nl_owner = -1; - -/* - * Default mode is delayed for 0.5sec batch delivery. - * If someone starts to use user->level calls, - * we turn on synchronous message passing. - */ - -#define RTMSG_DELAY (HZ/2) - -static struct nlmsg_ctl rtmsg_ctl = { - { NULL, NULL, 0, 0L, NULL }, - NULL, - NETLINK_ROUTE, - RTMSG_DELAY, - NLMSG_GOODSIZE, - 0, 0, 0, 0 -}; - -static void __rtmsg_ack(struct nlmsghdr *n, int err); - -static __inline__ void rtmsg_ack(struct nlmsghdr *n, int err) -{ - if (n->nlmsg_seq && rt_nl_flags&RTCTL_ACK) - __rtmsg_ack(n, err); -} - -static void rtmsg_fib(unsigned long type, struct fib_node *f, int logmask, - struct fib_class *class, struct nlmsghdr *n); -static void rtmsg_dev(unsigned long type, struct device *dev, struct nlmsghdr *n); -#define rtmsg_kick() ({ if (rtmsg_ctl.nlmsg_skb) nlmsg_transmit(&rtmsg_ctl); }) - -#else -#define rtmsg_fib(a,b,c,d,e) -#define rtmsg_dev(a,b,c) -#define rtmsg_ack(a,b) -#define rtmsg_kick() -#endif - - -/* - * FIB locking. - */ - -static struct wait_queue *fib_wait; -static atomic_t fib_users = ATOMIC_INIT(0); - -static void fib_lock(void) -{ - while (atomic_read(&fib_users)) - sleep_on(&fib_wait); - atomic_inc(&fib_users); - dev_lock_list(); -} - -static void fib_unlock(void) -{ - dev_unlock_list(); - if (atomic_dec_and_test(&fib_users)) { - rtmsg_kick(); - wake_up(&fib_wait); - } -} - -/* - * Check if a mask is acceptable. - */ - -static __inline__ int bad_mask(u32 mask, u32 addr) -{ - if (addr & (mask = ~mask)) - return 1; - mask = ntohl(mask); - if (mask & (mask+1)) - return 1; - return 0; -} - -/* - * Evaluate mask length. - */ - -static __inline__ int fib_logmask(u32 mask) -{ - if (!(mask = ntohl(mask))) - return 32; - return ffz(~mask); -} - -/* - * Create mask from mask length. - */ - -static __inline__ u32 fib_mask(int logmask) -{ - if (logmask >= 32) - return 0; - return htonl(~((1<cl_id = id; - fib_classes[id] = class; - return class; -} - -static struct fib_class *fib_empty_class(void) -{ - int id; - for (id = 1; id <= RT_CLASS_MAX; id++) - if (fib_classes[id] == NULL) - return fib_alloc_class(id); - return NULL; -} - -static int fib_rule_delete(struct in_rtrulemsg *r, struct device *dev, struct nlmsghdr *n) -{ - u32 src = r->rtrmsg_src.s_addr; - u32 dst = r->rtrmsg_dst.s_addr; - u32 srcmask = fib_netmask(r->rtrmsg_srclen); - u32 dstmask = fib_netmask(r->rtrmsg_dstlen); - struct fib_rule *cl, **clp; - - for (clp=&fib_rules; (cl=*clp) != NULL; clp=&cl->cl_next) { - if (src == cl->cl_src && - srcmask == cl->cl_srcmask && - dst == cl->cl_dst && - dstmask == cl->cl_dstmask && - r->rtrmsg_tos == cl->cl_tos && - dev == cl->cl_dev && - r->rtrmsg_action == cl->cl_action && - (!r->rtrmsg_preference || r->rtrmsg_preference == cl->cl_preference) && - (!r->rtrmsg_class || (cl && r->rtrmsg_class == cl->cl_class->cl_id))) { - cli(); - *clp = cl->cl_next; - sti(); - if (cl->cl_class) - cl->cl_class->cl_users--; - kfree(cl); - return 0; - } - } - return -ESRCH; -} - -static int fib_rule_add(struct in_rtrulemsg *r, struct device *dev, struct nlmsghdr *n) -{ - u32 src = r->rtrmsg_src.s_addr; - u32 dst = r->rtrmsg_dst.s_addr; - u32 srcmask = fib_netmask(r->rtrmsg_srclen); - u32 dstmask = fib_netmask(r->rtrmsg_dstlen); - - struct fib_rule *cl, *new_cl, **clp; - struct fib_class *class = NULL; - - if ((src&~srcmask) || (dst&~dstmask)) - return -EINVAL; - if (dev && net_alias_main_dev(dev) != dev) - return -ENODEV; - - if (!r->rtrmsg_class) { - if (r->rtrmsg_action==RTP_GO || r->rtrmsg_action==RTP_NAT - || r->rtrmsg_action==RTP_MASQUERADE) { - if ((class = fib_empty_class()) == NULL) - return -ENOMEM; - class->cl_auto = 1; - } else if (r->rtrmsg_rtmsgs) - return -EINVAL; - } else if ((class = fib_alloc_class(r->rtrmsg_class)) == NULL) - return -ENOMEM; - - new_cl = kmalloc(sizeof(*new_cl), GFP_KERNEL); - if (!new_cl) - return -ENOMEM; - new_cl->cl_src = src; - new_cl->cl_srcmask = srcmask; - new_cl->cl_dst = dst; - new_cl->cl_dstmask = dstmask; - new_cl->cl_dev = dev; - new_cl->cl_srcmap = r->rtrmsg_srcmap.s_addr; - new_cl->cl_tos = r->rtrmsg_tos; - new_cl->cl_action = r->rtrmsg_action; - new_cl->cl_flags = r->rtrmsg_flags; - new_cl->cl_preference = r->rtrmsg_preference; - new_cl->cl_class = class; - if (class) - class->cl_users++; - - clp = &fib_rules; - - if (!new_cl->cl_preference) { - cl = fib_rules; - if (cl && (cl = cl->cl_next) != NULL) { - clp = &fib_rules->cl_next; - if (cl->cl_preference) - new_cl->cl_preference = cl->cl_preference - 1; - } - } - - while ( (cl = *clp) != NULL ) { - if (cl->cl_preference >= new_cl->cl_preference) - break; - clp = &cl->cl_next; - } - - new_cl->cl_next = cl; - cli(); - *clp = new_cl; - sti(); - - if (r->rtrmsg_rtmsgs) { - n->nlmsg_type = RTMSG_NEWROUTE; - r->rtrmsg_rtmsg->rtmsg_class = class->cl_id; - return rtmsg_process(n, r->rtrmsg_rtmsg); - } - return 0; -} - - -#define FZ_MAX_DIVISOR 1024 - -static __inline__ u32 fib_hash(u32 key, u32 mask) -{ - u32 h; - h = key^(key>>20); - h = h^(h>>10); - h = h^(h>>5); - return h & mask; -} - -static __inline__ struct fib_node ** fz_hash_p(u32 key, struct fib_zone *fz) -{ - return &fz->fz_hash[fib_hash(key, fz->fz_hashmask)]; -} - -static __inline__ struct fib_node * fz_hash(u32 key, struct fib_zone *fz) -{ - return fz->fz_hash[fib_hash(key, fz->fz_hashmask)]; -} - -/* - * Free FIB node. - */ - -static void fib_free_node(struct fib_node * f) -{ - struct fib_info * fi = f->fib_info; - if (fi && !--fi->fib_refcnt) { -#if RT_CACHE_DEBUG >= 2 - printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev ? fi->fib_dev->name : "null"); -#endif - if (fi->fib_next) - fi->fib_next->fib_prev = fi->fib_prev; - if (fi->fib_prev) - fi->fib_prev->fib_next = fi->fib_next; - if (fi == fib_info_list) - fib_info_list = fi->fib_next; - } - kfree_s(f, sizeof(struct fib_node)); -} - -static __inline__ int fib_flags_trans(unsigned flags) -{ - if (flags & RTF_BROADCAST) - return IS_BROADCAST; - if (flags & RTF_MULTICAST) - return IS_MULTICAST; - if (flags & RTF_LOCAL) - return IS_MYADDR; - return 0; -} - -unsigned ip_fib_chk_addr(u32 addr) -{ - struct fib_zone * fz; - struct fib_node * f; - - /* - * Accept both `all ones' and `all zeros' as BROADCAST. - * (Support old BSD in other words). This old BSD - * support will go very soon as it messes other things - * up. - */ - - if (addr == INADDR_ANY || addr == 0xFFFFFFFF) - return RTF_LOCAL|RTF_BROADCAST; - - if ((addr & htonl(0x7F000000L)) == htonl(0x7F000000L)) - return RTF_LOCAL|RTF_INTERFACE; - - if (MULTICAST(addr)) - return RTF_MULTICAST; - - addr = ntohl(addr); - for (fz = local_class.fib_zone_list; fz; fz = fz->fz_next) { - u32 key = (addr&fz->fz_mask)>>fz->fz_logmask; - for (f = fz_hash(key, fz); f; f = f->fib_next) { - if (key != f->fib_key || (f->fib_flag & FIBFLG_DOWN)) - continue; - if (!f->fib_info) - return 0; - return f->fib_info->fib_flags&RTF_ADDRCLASSMASK; - } - } - - return 0; -} - -int __ip_chk_addr(unsigned long addr) -{ - return fib_flags_trans(ip_fib_chk_addr(addr)); -} - -/* - * Find the first device with a given source address. - */ - -struct device *ip_dev_find(unsigned long addr, char *name) -{ - struct fib_zone * fz = local_class.fib_zones[0]; - u32 key; - struct fib_node * f; - - key = (ntohl(addr)&fz->fz_mask)>>fz->fz_logmask; - for (f = fz_hash(key, fz); f; f = f->fib_next) { - if (key == f->fib_key && - !(f->fib_flag & (FIBFLG_DOWN|FIBFLG_REJECT|FIBFLG_THROW)) && - f->fib_info->fib_flags == (RTF_IFLOCAL&~RTF_UP)) { - if (!name || strcmp(name, f->fib_info->fib_dev->name) == 0) - return f->fib_info->fib_dev; - } - } - - return NULL; -} - -/* - * Find tunnel with a given source and destination. - */ - -struct device *ip_dev_find_tunnel(u32 daddr, u32 saddr) -{ - struct fib_zone * fz = local_class.fib_zones[0]; - u32 key; - struct fib_node * f; - - key = (ntohl(daddr)&fz->fz_mask)>>fz->fz_logmask; - for (f = fz_hash(key, fz); f; f = f->fib_next) { - if (key == f->fib_key && - !(f->fib_flag & (FIBFLG_DOWN|FIBFLG_REJECT|FIBFLG_THROW)) && - f->fib_info->fib_flags == (RTF_IFLOCAL&~RTF_UP)) { - struct device *dev = f->fib_info->fib_dev; - if (dev->type == ARPHRD_TUNNEL && - dev->pa_dstaddr == saddr) - return dev; - } - if (!f->fib_info) - return NULL; - } - - return NULL; -} - - -int ip_fib_chk_default_gw(u32 addr, struct device *dev) -{ - struct fib_rule *cl; - struct fib_node * f; - - for (cl = fib_rules; cl; cl = cl->cl_next) { - if (cl->cl_srcmask || cl->cl_dstmask || cl->cl_tos || - cl->cl_dev || cl->cl_action != RTP_GO || !cl->cl_class || - !cl->cl_class->fib_zones[32]) - continue; - for (f = cl->cl_class->fib_zones[32]->fz_hash[0]; f; f = f->fib_next) { - struct fib_info *fi = f->fib_info; - if (!(f->fib_flag & (FIBFLG_DOWN|FIBFLG_REJECT|FIBFLG_THROW)) && - fi->fib_gateway == addr && - fi->fib_dev == dev && - fi->fib_flags&RTF_GATEWAY) - return 0; - } - } - return -1; -} - - -/* - * Main lookup routine. - */ - - -int -fib_lookup(struct fib_result *res, u32 daddr, u32 src, u8 tos, - struct device *devin, struct device *devout) -{ - struct fib_node * f; - struct fib_rule * cl; - u32 dst; - int local = tos & 1; - - tos &= IPTOS_TOS_MASK; - dst = ntohl(daddr); - - for (cl = fib_rules; cl; cl=cl->cl_next) { - struct fib_zone * fz; - - if (((src^cl->cl_src) & cl->cl_srcmask) || - ((daddr^cl->cl_dst) & cl->cl_dstmask) || - (cl->cl_tos && cl->cl_tos != tos) || - (cl->cl_dev && cl->cl_dev != devin)) - continue; - - switch (cl->cl_action) { - case RTP_GO: - case RTP_NAT: - case RTP_MASQUERADE: - default: - break; - case RTP_UNREACHABLE: - return -ENETUNREACH; - case RTP_DROP: - return -EINVAL; - case RTP_PROHIBIT: - return -EACCES; - } - - for (fz = cl->cl_class->fib_zone_list; fz; fz = fz->fz_next) { - u32 key = (dst&fz->fz_mask)>>fz->fz_logmask; - - for (f = fz_hash(key, fz); f; f = f->fib_next) { - if (key != f->fib_key || - (f->fib_flag & FIBFLG_DOWN) || - (f->fib_tos && f->fib_tos != tos)) - continue; - if (f->fib_flag & FIBFLG_THROW) - goto next_class; - if (f->fib_flag & FIBFLG_REJECT) - return -ENETUNREACH; - if (devout && f->fib_info->fib_dev != devout) - continue; - if (!local || !(f->fib_info->fib_flags&RTF_GATEWAY)) { - res->f = f; - res->fr = cl; - res->fm = fz->fz_logmask; - return 0; - } - } - } -next_class: - } - return -ENETUNREACH; -} - -static int fib_autopublish(int op, struct fib_node *f, int logmask) -{ - struct fib_zone *fz; - struct fib_node *f1; - struct arpreq r; - u32 addr = htonl(f->fib_key<fib_flag || LOOPBACK(addr) || - (!RT_LOCALADDR(f->fib_info->fib_flags) && - !(f->fib_info->fib_flags&RTF_NAT))) - return 0; - - memset(&r, 0, sizeof(struct arpreq)); - r.arp_flags = ATF_PUBL|ATF_PERM|ATF_MAGIC; - if (logmask) - r.arp_flags |= ATF_NETMASK; - ((struct sockaddr_in*)&r.arp_pa)->sin_family = AF_INET; - ((struct sockaddr_in*)&r.arp_pa)->sin_addr.s_addr = addr; - ((struct sockaddr_in*)&r.arp_netmask)->sin_family = AF_INET; - ((struct sockaddr_in*)&r.arp_netmask)->sin_addr.s_addr = fib_mask(logmask); - - if (op) - return arp_req_set(&r, NULL); - - fz = local_class.fib_zones[logmask]; - - for (f1 = fz_hash(f->fib_key, fz); f1; f1=f1->fib_next) { - if (f->fib_key != f1->fib_key || f1->fib_flag || - (!RT_LOCALADDR(f1->fib_info->fib_flags) && - !(f1->fib_info->fib_flags&RTF_NAT))) - continue; - return 0; - } - - return arp_req_delete(&r, NULL); -} - -#define FIB_SCAN(f, fp) \ -for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fib_next) - -#define FIB_SCAN_KEY(f, fp, key) \ -for ( ; ((f) = *(fp)) != NULL && (f)->fib_key == (key); (fp) = &(f)->fib_next) - -#define FIB_CONTINUE(f, fp) \ -{ \ - fp = &f->fib_next; \ - continue; \ -} - -static int fib_delete(struct in_rtmsg * r, struct device *dev, - struct fib_class *class, struct nlmsghdr *n) -{ - struct fib_node **fp, *f; - struct fib_zone *fz = class->fib_zones[32-r->rtmsg_prefixlen]; - int logmask = 32 - r->rtmsg_prefixlen; - u32 dst = ntohl(r->rtmsg_prefix.s_addr); - u32 gw = r->rtmsg_gateway.s_addr; - short metric = r->rtmsg_metric; - u8 tos = r->rtmsg_tos; - u8 fibflg = 0; - int found=0; - unsigned flags; - u32 key; - - flags = r->rtmsg_flags; - if (flags & RTF_REJECT) - fibflg |= FIBFLG_REJECT; - else if (flags & RTF_THROW) - fibflg |= FIBFLG_THROW; - flags &= ~(RTF_UP|RTF_REJECT|RTF_THROW); - - if (fz != NULL) { - key = (dst&fz->fz_mask)>>logmask; - fp = fz_hash_p(key, fz); - - FIB_SCAN(f, fp) { - if (f->fib_key == key) - break; - } - FIB_SCAN_KEY(f, fp, key) { - if (f->fib_tos == tos) - break; - } - - while ((f = *fp) != NULL && f->fib_key == key && f->fib_tos == tos) { - struct fib_info * fi = f->fib_info; - - /* - * If metric was not specified (<0), match all metrics. - */ - if (metric >= 0 && f->fib_metric != metric) - FIB_CONTINUE(f, fp); - - if (flags & RTF_MAGIC) { - /* "Magic" deletions require exact match */ - if (!fi || (fi->fib_flags^flags) || - fi->fib_dev != dev || - fi->fib_gateway != gw) - FIB_CONTINUE(f, fp); - } else { - /* - * Device, gateway, reject and throw are - * also checked if specified. - */ - if ((dev && fi && fi->fib_dev != dev) || - (gw && fi && fi->fib_gateway != gw) || - (fibflg && (f->fib_flag^fibflg)&~FIBFLG_DOWN)) - FIB_CONTINUE(f, fp); - } - cli(); - /* It's interesting, can this operation be not atomic? */ - *fp = f->fib_next; - sti(); - if (class == &local_class) - fib_autopublish(0, f, logmask); - rtmsg_fib(RTMSG_DELROUTE, f, logmask, class, n); - fib_free_node(f); - found++; - } - fz->fz_nent -= found; - } - - if (found) { - fib_stamp++; - rt_cache_flush(0); - rtmsg_ack(n, 0); - return 0; - } - rtmsg_ack(n, ESRCH); - return -ESRCH; -} - -static struct fib_info * fib_create_info(struct device * dev, struct in_rtmsg *r) -{ - struct fib_info * fi; - unsigned flags = r->rtmsg_flags; - u32 gw = r->rtmsg_gateway.s_addr; - unsigned short mtu; - unsigned short irtt; - unsigned long window; - - mtu = dev ? dev->mtu : 0; - if (flags&RTF_MSS && r->rtmsg_mtu < mtu && r->rtmsg_mtu >= 68) - mtu = r->rtmsg_mtu; - window = (flags & RTF_WINDOW) ? r->rtmsg_window : 0; - irtt = (flags & RTF_IRTT) ? r->rtmsg_rtt : TCP_TIMEOUT_INIT; - - flags &= RTF_FIB; - - for (fi=fib_info_list; fi; fi = fi->fib_next) { - if (fi->fib_gateway != gw || - fi->fib_dev != dev || - fi->fib_flags != flags || - fi->fib_mtu != mtu || - fi->fib_window != window || - fi->fib_irtt != irtt) - continue; - fi->fib_refcnt++; -#if RT_CACHE_DEBUG >= 2 - printk("fib_create_info: fi %08x/%s/%04x is duplicate\n", fi->fib_gateway, fi->fib_dev ? fi->fib_dev->name : "null", fi->fib_flags); -#endif - return fi; - } - fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL); - if (!fi) - return NULL; - memset(fi, 0, sizeof(struct fib_info)); - fi->fib_flags = flags; - fi->fib_dev = dev; - fi->fib_gateway = gw; - fi->fib_mtu = mtu; - fi->fib_window = window; - fi->fib_refcnt++; - fi->fib_next = fib_info_list; - fi->fib_prev = NULL; - fi->fib_irtt = irtt; - if (fib_info_list) - fib_info_list->fib_prev = fi; - fib_info_list = fi; -#if RT_CACHE_DEBUG >= 2 - printk("fib_create_info: fi %08x/%s/%04x is created\n", fi->fib_gateway, fi->fib_dev ? fi->fib_dev->name : "null", fi->fib_flags); -#endif - return fi; -} - -static __inline__ void fib_rebuild_zone(struct fib_zone *fz, - struct fib_node **old_ht, - int old_divisor) -{ - int i; - struct fib_node **ht = fz->fz_hash; - u32 hashmask = fz->fz_hashmask; - struct fib_node *f, **fp, *next; - unsigned hash; - - for (i=0; ifib_next; - f->fib_next = NULL; - hash = fib_hash(f->fib_key, hashmask); - for (fp = &ht[hash]; *fp; fp = &(*fp)->fib_next) - /* NONE */; - *fp = f; - } - } -} - -static void fib_rehash_zone(struct fib_zone *fz) -{ - struct fib_node **ht, **old_ht; - int old_divisor, new_divisor; - u32 new_hashmask; - - old_divisor = fz->fz_divisor; - - switch (old_divisor) { - case 16: - new_divisor = 256; - new_hashmask = 0xFF; - break; - case 256: - new_divisor = 1024; - new_hashmask = 0x3FF; - break; - default: - printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); - return; - } -#if RT_CACHE_DEBUG >= 2 - printk("fib_rehash_zone: hash for zone %d grows from %d\n", fz->fz_logmask, old_divisor); -#endif - - ht = kmalloc(new_divisor*sizeof(struct rtable*), GFP_KERNEL); - - if (ht) { - memset(ht, 0, new_divisor*sizeof(struct fib_node*)); - start_bh_atomic(); - old_ht = fz->fz_hash; - fz->fz_hash = ht; - fz->fz_hashmask = new_hashmask; - fz->fz_divisor = new_divisor; - fib_rebuild_zone(fz, old_ht, old_divisor); - fib_stamp++; - end_bh_atomic(); - kfree(old_ht); - } -} - -static struct fib_zone * -fib_new_zone(struct fib_class *class, int logmask) -{ - int i; - struct fib_zone *fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL); - if (!fz) - return NULL; - - memset(fz, 0, sizeof(struct fib_zone)); - if (logmask < 32) { - fz->fz_divisor = 16; - fz->fz_hashmask = 0xF; - } else { - fz->fz_divisor = 1; - fz->fz_hashmask = 0; - } - fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL); - if (!fz->fz_hash) { - kfree(fz); - return NULL; - } - memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*)); - fz->fz_logmask = logmask; - fz->fz_mask = ntohl(fib_mask(logmask)); - for (i=logmask-1; i>=0; i--) - if (class->fib_zones[i]) - break; - start_bh_atomic(); - if (i<0) { - fz->fz_next = class->fib_zone_list; - class->fib_zone_list = fz; - } else { - fz->fz_next = class->fib_zones[i]->fz_next; - class->fib_zones[i]->fz_next = fz; - } - class->fib_zones[logmask] = fz; - fib_stamp++; - end_bh_atomic(); - return fz; -} - -static int fib_create(struct in_rtmsg *r, struct device *dev, - struct fib_class *class, struct nlmsghdr *n) -{ - struct fib_node *f, *f1, **fp; - struct fib_node **dup_fp = NULL; - struct fib_zone * fz; - struct fib_info * fi; - - long logmask = 32L - r->rtmsg_prefixlen; /* gcc bug work-around: must be "L" and "long" */ - u32 dst = ntohl(r->rtmsg_prefix.s_addr); - u32 gw = r->rtmsg_gateway.s_addr; - short metric = r->rtmsg_metric; - unsigned flags = r->rtmsg_flags; - u8 tos = r->rtmsg_tos; - u8 fibflg = 0; - u32 key; - - /* - * Allocate an entry and fill it in. - */ - - f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL); - if (f == NULL) { - rtmsg_ack(n, ENOMEM); - return -ENOMEM; - } - - memset(f, 0, sizeof(struct fib_node)); - - if (!(flags & RTF_UP)) - fibflg = FIBFLG_DOWN; - if (flags & RTF_REJECT) - fibflg |= FIBFLG_REJECT; - else if (flags & RTF_THROW) - fibflg |= FIBFLG_THROW; - - flags &= ~(RTF_UP|RTF_REJECT|RTF_THROW); - r->rtmsg_flags = flags; - - fi = NULL; - if (!(fibflg & (FIBFLG_REJECT|FIBFLG_THROW))) { - if ((fi = fib_create_info(dev, r)) == NULL) { - kfree_s(f, sizeof(struct fib_node)); - rtmsg_ack(n, ENOMEM); - return -ENOMEM; - } - f->fib_info = fi; - flags = fi->fib_flags; - } - - f->fib_key = key = dst>>logmask; - f->fib_metric = metric; - f->fib_tos = tos; - f->fib_flag = fibflg; - fz = class->fib_zones[logmask]; - - if (!fz && !(fz = fib_new_zone(class, logmask))) { - fib_free_node(f); - rtmsg_ack(n, ENOMEM); - return -ENOMEM; - } - - if (fz->fz_nent > (fz->fz_divisor<<2) && - fz->fz_divisor < FZ_MAX_DIVISOR && - (!logmask || (1<<(32-logmask)) > fz->fz_divisor)) - fib_rehash_zone(fz); - - fp = fz_hash_p(key, fz); - - /* - * Scan list to find the first route with the same destination - */ - FIB_SCAN(f1, fp) { - if (f1->fib_key == key) - break; - } - - /* - * Find route with the same destination and tos. - */ - FIB_SCAN_KEY(f1, fp, dst) { - if (f1->fib_tos <= tos) - break; - } - - /* - * Find route with the same destination/tos and less (or equal) metric. - * "Magic" additions go to the end of list. - */ - for ( ; (f1 = *fp) != NULL && f1->fib_key == key && f1->fib_tos == tos; - fp = &f1->fib_next) { - if (f1->fib_metric >= metric && metric != MAGIC_METRIC) - break; - - /* - * Record route with the same destination/tos/gateway/dev, - * but less metric. - */ - if (!dup_fp) { - struct fib_info *fi1 = f1->fib_info; - - if ((fibflg^f1->fib_flag) & ~FIBFLG_DOWN) - continue; - if (fi == fi1 || - (fi && fi1 && - fi->fib_dev == fi1->fib_dev && - fi->fib_gateway == fi1->fib_gateway && - !(flags&RTF_MAGIC))) - dup_fp = fp; - } - } - - /* - * Is it already present? - */ - - if (f1 && f1->fib_key == key && f1->fib_tos == tos && - f1->fib_metric == metric && f1->fib_info == fi) { - fib_free_node(f); - - if (fibflg == f1->fib_flag) { - rtmsg_ack(n, EEXIST); - return -EEXIST; - } else { - fib_stamp++; - f1->fib_flag = fibflg; - rt_cache_flush(0); - rtmsg_ack(n, 0); - return 0; - } - } - - /* - * Do not add "magic" route, if better one is already present. - */ - if ((flags & RTF_MAGIC) && dup_fp) { - fib_free_node(f); - rtmsg_ack(n, EEXIST); - return -EEXIST; - } - - /* - * Insert new entry to the list. - */ - - cli(); - f->fib_next = f1; - *fp = f; - sti(); - fz->fz_nent++; - if (class == &local_class && !dup_fp) - fib_autopublish(1, f, logmask); - rtmsg_fib(RTMSG_NEWROUTE, f, logmask, class, n); - - if (flags & RTF_MAGIC) { - fib_stamp++; - rt_cache_flush(0); - rtmsg_ack(n, 0); - return 0; - } - - /* - * Clean routes with the same destination,tos,gateway and device, - * but different metric. - */ - fp = dup_fp ? : &f->fib_next; - - while ((f1 = *fp) != NULL && f1->fib_key == key && f1->fib_tos == tos) { - if (f1 == f || ((f1->fib_flag^fibflg)&~FIBFLG_DOWN)) - FIB_CONTINUE(f1, fp); - - if (f1->fib_info != fi && - (!fi || !f1->fib_info || - f1->fib_info->fib_gateway != gw || - f1->fib_info->fib_dev != dev)) - FIB_CONTINUE(f1, fp); - - cli(); - *fp = f1->fib_next; - sti(); - fz->fz_nent--; - rtmsg_fib(RTMSG_DELROUTE, f1, logmask, class, n); - fib_free_node(f1); - } - fib_stamp++; - rt_cache_flush(0); - rtmsg_ack(n, 0); - return 0; -} - -static int fib_flush_list(struct fib_node ** fp, struct device *dev, - int logmask, struct fib_class *class) -{ - int found = 0; - struct fib_node *f; - - while ((f = *fp) != NULL) { - if (!f->fib_info || f->fib_info->fib_dev != dev) - FIB_CONTINUE(f, fp); - cli(); - *fp = f->fib_next; - sti(); - if (class == &local_class) - fib_autopublish(0, f, logmask); -#ifdef CONFIG_RTNETLINK - if (rt_nl_flags&RTCTL_FLUSH) - rtmsg_fib(RTMSG_DELROUTE, f, logmask, class, 0); -#endif - fib_free_node(f); - found++; - } - return found; -} - -static void fib_flush(struct device *dev) -{ - struct fib_class *class; - struct fib_rule *cl, **clp; - struct fib_zone *fz; - int found = 0; - int i, tmp, cl_id; - - - for (cl_id = RT_CLASS_MAX; cl_id>=0; cl_id--) { - if ((class = fib_classes[cl_id])==NULL) - continue; - for (fz = class->fib_zone_list; fz; fz = fz->fz_next) { - tmp = 0; - for (i=fz->fz_divisor-1; i>=0; i--) - tmp += fib_flush_list(&fz->fz_hash[i], dev, - fz->fz_logmask, class); - fz->fz_nent -= tmp; - found += tmp; - } - } - - clp = &fib_rules; - while ( (cl=*clp) != NULL) { - if (cl->cl_dev != dev) { - clp = &cl->cl_next; - continue; - } - found++; - cli(); - *clp = cl->cl_next; - sti(); - kfree(cl); - } - - if (found) { - fib_stamp++; - rt_cache_flush(1); - } -} - -#ifdef CONFIG_PROC_FS - -static unsigned __inline__ fib_flag_trans(u8 fibflg) -{ - unsigned ret = RTF_UP; - if (!fibflg) - return ret; - if (fibflg & FIBFLG_DOWN) - ret &= ~RTF_UP; - if (fibflg & FIBFLG_REJECT) - ret |= RTF_REJECT; - if (fibflg & FIBFLG_THROW) - ret |= RTF_THROW; - return ret; -} - -/* - * Called from the PROCfs module. This outputs /proc/net/route. - * - * We preserve the old format but pad the buffers out. This means that - * we can spin over the other entries as we read them. Remember the - * gated BGP4 code could need to read 60,000+ routes on occasion (that's - * about 7Mb of data). To do that ok we will need to also cache the - * last route we got to (reads will generally be following on from - * one another without gaps). - */ - -static int fib_get_info(char *buffer, char **start, off_t offset, int length, int dummy) -{ - struct fib_class *class; - struct fib_zone *fz; - struct fib_node *f; - int len=0; - off_t pos=0; - char temp[129]; - int i; - int cl_id; - - pos = 128; - - if (offset<128) - { - sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\tTOS\tClass"); - len = 128; - } - - fib_lock(); - - for (cl_id=RT_CLASS_MAX-1; cl_id >= 0; cl_id--) { - class = fib_classes[cl_id]; - if (!class) - continue; - for (fz=class->fib_zone_list; fz; fz = fz->fz_next) - { - int maxslot; - struct fib_node ** fp; - - if (fz->fz_nent == 0) - continue; - - if (pos + 128*fz->fz_nent <= offset) { - pos += 128*fz->fz_nent; - len = 0; - continue; - } - - maxslot = fz->fz_divisor; - fp = fz->fz_hash; - - for (i=0; i < maxslot; i++, fp++) { - - for (f = *fp; f; f = f->fib_next) - { - struct fib_info * fi; - unsigned flags; - - /* - * Spin through entries until we are ready - */ - pos += 128; - - if (pos <= offset) - { - len=0; - continue; - } - - fi = f->fib_info; - flags = fib_flag_trans(f->fib_flag); - - if (fi) - flags |= fi->fib_flags; - sprintf(temp, "%s\t%08lX\t%08X\t%04X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%02x\t%02x", - fi && fi->fib_dev ? fi->fib_dev->name : "*", htonl(f->fib_key<fz_logmask), fi ? fi->fib_gateway : 0, - flags, 0, 0, f->fib_metric, - htonl(fz->fz_mask), fi ? (int)fi->fib_mtu : 0, fi ? fi->fib_window : 0, fi ? (int)fi->fib_irtt : 0, f->fib_tos, class->cl_id); - sprintf(buffer+len,"%-127s\n",temp); - - len += 128; - if (pos >= offset+length) - goto done; - } - } - } - } - -done: - fib_unlock(); - - *start = buffer+len-(pos-offset); - len = pos - offset; - if (len>length) - len = length; - return len; -} - -static int fib_local_get_info(char *buffer, char **start, off_t offset, int length, int dummy) -{ - struct fib_zone *fz; - struct fib_node *f; - int len=0; - off_t pos=0; - char temp[129]; - int i; - - pos = 128; - - if (offset<128) - { - sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\tTOS\tClass"); - len = 128; - } - - fib_lock(); - - for (fz=local_class.fib_zone_list; fz; fz = fz->fz_next) - { - int maxslot; - struct fib_node ** fp; - - if (fz->fz_nent == 0) - continue; - - if (pos + 128*fz->fz_nent <= offset) - { - pos += 128*fz->fz_nent; - len = 0; - continue; - } - - maxslot = fz->fz_divisor; - fp = fz->fz_hash; - - for (i=0; i < maxslot; i++, fp++) - { - - for (f = *fp; f; f = f->fib_next) - { - unsigned flags; - struct fib_info * fi; - - /* - * Spin through entries until we are ready - */ - pos += 128; - - if (pos <= offset) - { - len=0; - continue; - } - - fi = f->fib_info; - flags = fib_flag_trans(f->fib_flag); - - if (fi) - flags |= fi->fib_flags; - sprintf(temp, "%s\t%08lX\t%08X\t%X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%02x\t%02x", - fi && fi->fib_dev ? fi->fib_dev->name : "*", - htonl(f->fib_key<fz_logmask), - fi ? fi->fib_gateway : 0, - flags, 0, 0, f->fib_metric, - htonl(fz->fz_mask), fi ? (int)fi->fib_mtu : 0, fi ? fi->fib_window : 0, fi ? (int)fi->fib_irtt : 0, f->fib_tos, RT_CLASS_LOCAL); - sprintf(buffer+len,"%-127s\n",temp); - - len += 128; - if (pos >= offset+length) - goto done; - } - } - } - -done: - fib_unlock(); - - *start = buffer+len-(pos-offset); - len = pos - offset; - if (len>length) - len = length; - return len; -} - -static int fib_rules_get_info(char *buffer, char **start, off_t offset, int length, int dummy) -{ - int len=0; - off_t pos=0; - char temp[129]; - struct fib_rule *cl; - - pos = 128; - - if (offset<128) { - sprintf(buffer,"%-127s\n","Pref\tSource\t\tSrcMask\t\tDst\t\tDstMask\t\tIface\tTOS\tClass\tFlags\tSrcMap\n"); - len = 128; - } - - - fib_lock(); - - for (cl = fib_rules; cl; cl = cl->cl_next) { - /* - * Spin through entries until we are ready - */ - pos += 128; - - if (pos <= offset) { - len = 0; - continue; - } - - sprintf(temp, "%d\t%08X\t%08X\t%08X\t%08X\t%s\t%02X\t%02x\t%02X\t%02X\t%08X", - cl->cl_preference, - cl->cl_src, cl->cl_srcmask, - cl->cl_dst, cl->cl_dstmask, - cl->cl_dev ? cl->cl_dev->name : "*", - cl->cl_tos, cl->cl_class ? cl->cl_class->cl_id : 0, - cl->cl_flags, cl->cl_action, cl->cl_srcmap - ); - sprintf(buffer+len,"%-127s\n",temp); - len += 128; - if (pos >= offset+length) - goto done; - } - -done: - fib_unlock(); - - *start = buffer+len-(pos-offset); - len = pos-offset; - if (len>length) - len = length; - return len; -} - -static int fib_class_get_info(char *buffer, char **start, off_t offset, int length, int dummy) -{ - int len=0; - off_t pos=0; - char temp[129]; - int i; - struct fib_class *cl; - - pos = 128; - - if (offset<128) - { - sprintf(buffer,"%-127s\n","Class\tSize\n"); - len = 128; - } - - - fib_lock(); - - for (i = RT_CLASS_MAX; i>=0; i--) - { - int sz = 0; - struct fib_zone *fz; - - if ((cl=fib_classes[i])==NULL) - continue; - - for (fz=cl->fib_zone_list; fz; fz=fz->fz_next) - sz += fz->fz_nent; - - /* - * Spin through entries until we are ready - */ - pos += 128; - - if (pos <= offset) - { - len = 0; - continue; - } - - sprintf(temp, "%d\t%d\n", cl->cl_id, sz); - sprintf(buffer+len,"%-127s\n",temp); - len += 128; - if (pos >= offset+length) - goto done; - } - -done: - fib_unlock(); - - *start = buffer+len-(pos-offset); - len = pos-offset; - if (len>length) - len = length; - return len; -} - -#endif - -static int rtmsg_process(struct nlmsghdr *n, struct in_rtmsg *r) -{ - unsigned long cmd=n->nlmsg_type; - struct device * dev = NULL; - struct fib_class *class; - - if ((cmd != RTMSG_NEWROUTE && cmd != RTMSG_DELROUTE) || - (r->rtmsg_flags & (RTF_MAGIC|RTF_XRESOLVE|RTF_REINSTATE)) || - r->rtmsg_prefixlen > 32 || - (r->rtmsg_tos & ~IPTOS_TOS_MASK)) { - rtmsg_ack(n, EINVAL); - return -EINVAL; - } - - /* Reject/throw directives have no interface/gateway specification */ - - if (r->rtmsg_flags & (RTF_REJECT|RTF_THROW)) { - r->rtmsg_ifindex = 0; - r->rtmsg_gateway.s_addr = 0; - r->rtmsg_flags &= ~RTF_GATEWAY; - } - - /* Silly metric hack, it is preserved for "compatibility", - * though I do not know any program using it. - */ - - r->rtmsg_metric--; - if (cmd == RTMSG_NEWROUTE && r->rtmsg_metric < 0) - r->rtmsg_metric = 0; - - if (cmd == RTMSG_DELROUTE) - r->rtmsg_flags &= RTF_FIB; - - if (r->rtmsg_ifindex) { - dev = dev_get_by_index(r->rtmsg_ifindex); - if (!dev) { - rtmsg_ack(n, ENODEV); - return -ENODEV; - } - } - - if (r->rtmsg_gateway.s_addr && !(r->rtmsg_flags&RTF_NAT)) { - struct fib_info *fi; - - fi = fib_lookup_info(r->rtmsg_gateway.s_addr, 0, 1, - &loopback_dev, dev); - if (fi) { - if (fi->fib_flags&(RTF_BROADCAST|RTF_MULTICAST) && - cmd != RTMSG_DELROUTE) - return -EINVAL; - dev = fi->fib_dev; - if (fi->fib_flags&RTF_LOCAL) { - r->rtmsg_flags &= ~RTF_GATEWAY; - r->rtmsg_gateway.s_addr = 0; - } - } else if (cmd != RTMSG_DELROUTE) - return -ENETUNREACH; - - /* If gateway is not found in routing table, - * we could assume that user knows that he does. - * It is link layer problem to decide reachable - * this gateway or not. Good example is tunnel interface. - * Another example is ethernet, ARP could (in theory) - * resolve addresses, even if we had no routes. - */ - } - - if (dev && (dev->flags&IFF_LOOPBACK)) { - if (r->rtmsg_flags&RTF_GATEWAY) - return -EINVAL; - /* - * Loopback routes: we declare them local addresses. - * It is the only reasonable solution to avoid - * loopback routing loops. - */ - r->rtmsg_flags |= RTF_LOCAL|RTF_INTERFACE; - } - - if (r->rtmsg_flags&RTF_GATEWAY) { - if (!dev && cmd != RTMSG_DELROUTE) { - rtmsg_ack(n, ENETUNREACH); - return -ENETUNREACH; - } - } else { - if (!dev && !(r->rtmsg_flags & (RTF_NAT|RTF_REJECT|RTF_THROW)) && - cmd != RTMSG_DELROUTE) { - rtmsg_ack(n, ENODEV); - return -ENODEV; - } - } - - if (dev && dev->family != AF_INET) - { - rtmsg_ack(n, ENODEV); - return -ENODEV; - } - - if (r->rtmsg_class == 0) { - if (r->rtmsg_flags&(RTF_LOCAL|RTF_NAT)) - r->rtmsg_class = RT_CLASS_LOCAL; - else if ((r->rtmsg_flags&RTF_GATEWAY) && - (ipv4_config.fib_model==2 || - (ipv4_config.fib_model==1 && !r->rtmsg_prefixlen))) - r->rtmsg_class = RT_CLASS_DEFAULT; - else - r->rtmsg_class = RT_CLASS_MAIN; - } - - if ((class = fib_classes[r->rtmsg_class]) == NULL) - { - rtmsg_ack(n, EINVAL); - return -EINVAL; - } - - return (cmd == RTMSG_NEWROUTE ? fib_create : fib_delete)(r, dev, class, n); -} - - -static int rtrulemsg_process(struct nlmsghdr *n, struct in_rtrulemsg *r) -{ - unsigned long cmd=n->nlmsg_type; - struct device * dev = NULL; - - if ((cmd != RTMSG_NEWRULE && cmd != RTMSG_DELRULE) || - r->rtrmsg_srclen > 32 || r->rtrmsg_dstlen > 32 || - (r->rtrmsg_tos & ~IPTOS_TOS_MASK)) - return -EINVAL; - - if (r->rtrmsg_ifindex) { - dev = dev_get_by_index(r->rtrmsg_ifindex); - if (!dev) - return -ENODEV; - if (dev->family != AF_INET) - return -ENODEV; - } - - if (cmd == RTMSG_DELRULE) - return fib_rule_delete(r, dev, n); - - return fib_rule_add(r, dev, n); -} - - -static int ifmsg_process(struct nlmsghdr *n, struct in_ifmsg *r) -{ - unsigned long cmd=n->nlmsg_type; - - if (cmd != RTMSG_NEWDEVICE && cmd != RTMSG_DELDEVICE) { - rtmsg_ack(n, EINVAL); - return -EINVAL; - } - rtmsg_ack(n, EINVAL); - return -EINVAL; -} - -static int rtcmsg_process(struct nlmsghdr *n, struct in_rtctlmsg *r) -{ -#ifdef CONFIG_RTNETLINK - if (r->rtcmsg_flags&RTCTL_DELAY) - rtmsg_ctl.nlmsg_delay = r->rtcmsg_delay; - if (r->rtcmsg_flags&RTCTL_OWNER) - rt_nl_owner = n->nlmsg_pid; - rt_nl_flags = r->rtcmsg_flags; - return 0; -#else - return -EINVAL; -#endif -} - -static int get_rt_from_user(struct in_rtmsg *rtm, void *arg) -{ - struct rtentry r; - - if (copy_from_user(&r, arg, sizeof(struct rtentry))) - return -EFAULT; - if (r.rt_dev) { - struct device *dev; - char devname[16]; - - if (copy_from_user(devname, r.rt_dev, 15)) - return -EFAULT; - devname[15] = 0; - dev = dev_get(devname); - if (!dev) - return -ENODEV; - rtm->rtmsg_ifindex = dev->ifindex; - } - - rtm->rtmsg_flags = r.rt_flags; - - if (r.rt_dst.sa_family != AF_INET) - return -EAFNOSUPPORT; - rtm->rtmsg_prefix = ((struct sockaddr_in*)&r.rt_dst)->sin_addr; - - if (rtm->rtmsg_flags&RTF_HOST) { - rtm->rtmsg_flags &= ~RTF_HOST; - rtm->rtmsg_prefixlen = 32; - } else { - u32 mask = ((struct sockaddr_in*)&r.rt_genmask)->sin_addr.s_addr; - if (r.rt_genmask.sa_family != AF_INET) { - printk(KERN_DEBUG "%s forgot to specify route netmask.\n", current->comm); - if (r.rt_genmask.sa_family) - return -EAFNOSUPPORT; - } - if (bad_mask(mask, rtm->rtmsg_prefix.s_addr)) - return -EINVAL; - rtm->rtmsg_prefixlen = 32 - fib_logmask(mask); - } - if ((rtm->rtmsg_flags & RTF_GATEWAY) && - r.rt_gateway.sa_family != AF_INET) - return -EAFNOSUPPORT; - rtm->rtmsg_gateway = ((struct sockaddr_in*)&r.rt_gateway)->sin_addr; - rtm->rtmsg_rtt = r.rt_irtt; - rtm->rtmsg_window = r.rt_window; - rtm->rtmsg_mtu = r.rt_mtu; - rtm->rtmsg_class = r.rt_class; - rtm->rtmsg_metric = r.rt_metric; - rtm->rtmsg_tos = r.rt_tos; - return 0; -} - - -/* - * Handle IP routing ioctl calls. These are used to manipulate the routing tables - */ - -int ip_rt_ioctl(unsigned int cmd, void *arg) -{ - int err; - union - { - struct in_rtmsg rtmsg; - struct in_ifmsg ifmsg; - struct in_rtrulemsg rtrmsg; - struct in_rtctlmsg rtcmsg; - } m; - struct nlmsghdr dummy_nlh; - - memset(&m, 0, sizeof(m)); - dummy_nlh.nlmsg_seq = 0; - dummy_nlh.nlmsg_pid = current->pid; - - switch (cmd) - { - case SIOCADDRT: /* Add a route */ - case SIOCDELRT: /* Delete a route */ - if (!suser()) - return -EPERM; - err = get_rt_from_user(&m.rtmsg, arg); - if (err) - return err; - fib_lock(); - dummy_nlh.nlmsg_type = cmd == SIOCDELRT ? RTMSG_DELROUTE - : RTMSG_NEWROUTE; - err = rtmsg_process(&dummy_nlh, &m.rtmsg); - fib_unlock(); - return err; - case SIOCRTMSG: - if (!suser()) - return -EPERM; - if (copy_from_user(&dummy_nlh, arg, sizeof(dummy_nlh))) - return -EFAULT; - switch (dummy_nlh.nlmsg_type) - { - case RTMSG_NEWROUTE: - case RTMSG_DELROUTE: - if (dummy_nlh.nlmsg_len < sizeof(m.rtmsg) + sizeof(dummy_nlh)) - return -EINVAL; - if (copy_from_user(&m.rtmsg, arg+sizeof(dummy_nlh), sizeof(m.rtmsg))) - return -EFAULT; - fib_lock(); - err = rtmsg_process(&dummy_nlh, &m.rtmsg); - fib_unlock(); - return err; - case RTMSG_NEWRULE: - case RTMSG_DELRULE: - if (dummy_nlh.nlmsg_len < sizeof(m.rtrmsg) + sizeof(dummy_nlh)) - return -EINVAL; - if (copy_from_user(&m.rtrmsg, arg+sizeof(dummy_nlh), sizeof(m.rtrmsg))) - return -EFAULT; - fib_lock(); - err = rtrulemsg_process(&dummy_nlh, &m.rtrmsg); - fib_unlock(); - return err; - case RTMSG_NEWDEVICE: - case RTMSG_DELDEVICE: - if (dummy_nlh.nlmsg_len < sizeof(m.ifmsg) + sizeof(dummy_nlh)) - return -EINVAL; - if (copy_from_user(&m.ifmsg, arg+sizeof(dummy_nlh), sizeof(m.ifmsg))) - return -EFAULT; - fib_lock(); - err = ifmsg_process(&dummy_nlh, &m.ifmsg); - fib_unlock(); - return err; - case RTMSG_CONTROL: - if (dummy_nlh.nlmsg_len < sizeof(m.rtcmsg) + sizeof(dummy_nlh)) - return -EINVAL; - if (copy_from_user(&m.rtcmsg, arg+sizeof(dummy_nlh), sizeof(m.rtcmsg))) - return -EFAULT; - fib_lock(); - err = rtcmsg_process(&dummy_nlh, &m.rtcmsg); - fib_unlock(); - return err; - default: - return -EINVAL; - } - } - - return -EINVAL; -} - -#ifdef CONFIG_RTNETLINK - -/* - * Netlink hooks for IP - */ - - -static void -rtmsg_fib(unsigned long type, struct fib_node *f, int logmask, - struct fib_class *class, struct nlmsghdr *n) -{ - struct in_rtmsg *r; - struct fib_info *fi; - - if (n && !(rt_nl_flags&RTCTL_ECHO) && rt_nl_owner == n->nlmsg_pid) - return; - - start_bh_atomic(); - r = nlmsg_send(&rtmsg_ctl, type, sizeof(*r), n ? n->nlmsg_seq : 0, - n ? n->nlmsg_pid : 0); - if (r) { - r->rtmsg_prefix.s_addr = htonl(f->fib_key<rtmsg_prefixlen = 32 - logmask; - r->rtmsg_metric= f->fib_metric; - r->rtmsg_tos = f->fib_tos; - r->rtmsg_class=class->cl_id; - r->rtmsg_flags = fib_flag_trans(f->fib_flag); - - if ((fi = f->fib_info) != NULL) { - r->rtmsg_gateway.s_addr = fi->fib_gateway; - r->rtmsg_flags |= fi->fib_flags; - r->rtmsg_mtu = fi->fib_mtu; - r->rtmsg_window = fi->fib_window; - r->rtmsg_rtt = fi->fib_irtt; - r->rtmsg_ifindex = fi->fib_dev ? fi->fib_dev->ifindex : 0; - } - } - end_bh_atomic(); -} - -static void -__rtmsg_ack(struct nlmsghdr *n, int err) -{ - nlmsg_ack(&rtmsg_ctl, n->nlmsg_seq, n->nlmsg_pid, err); -} - - -static void -rtmsg_dev(unsigned long type, struct device *dev, struct nlmsghdr *n) -{ - struct in_ifmsg *r; - - start_bh_atomic(); - r = nlmsg_send(&rtmsg_ctl, type, sizeof(*r), n ? n->nlmsg_seq : 0, - n ? n->nlmsg_pid : 0); - if (r) - { - memset(r, 0, sizeof(*r)); - r->ifmsg_lladdr.sa_family = dev->type; - memcpy(&r->ifmsg_lladdr.sa_data, dev->dev_addr, dev->addr_len); - r->ifmsg_prefix.s_addr = dev->pa_addr; - if (dev->flags & IFF_POINTOPOINT || dev->type == ARPHRD_TUNNEL) - r->ifmsg_brd.s_addr = dev->pa_dstaddr; - else - r->ifmsg_brd.s_addr = dev->pa_brdaddr; - r->ifmsg_flags = dev->flags; - r->ifmsg_mtu = dev->mtu; - r->ifmsg_metric = dev->metric; - r->ifmsg_prefixlen = 32 - fib_logmask(dev->pa_mask); - r->ifmsg_index = dev->ifindex; - strcpy(r->ifmsg_name, dev->name); - } - end_bh_atomic(); -} - -static int fib_netlink_call(int minor, struct sk_buff *skb) -{ - struct nlmsghdr *nlh; - int totlen = 0; - int err = 0; - - fib_lock(); - while (skb->len >= sizeof(*nlh)) { - int rlen; - nlh = (struct nlmsghdr *)skb->data; - rlen = NLMSG_ALIGN(nlh->nlmsg_len); - if (skb->len < rlen) - break; - totlen += rlen; - err = 0; - skb_pull(skb, rlen); - switch (nlh->nlmsg_type) { - case RTMSG_NEWROUTE: - case RTMSG_DELROUTE: - if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_rtmsg)) { - rtmsg_ack(nlh, EINVAL); - err = -EINVAL; - break; - } - err = rtmsg_process(nlh, (struct in_rtmsg*)nlh->nlmsg_data); - break; - case RTMSG_NEWRULE: - case RTMSG_DELRULE: - if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_rtrulemsg)) { - rtmsg_ack(nlh, EINVAL); - err = -EINVAL; - break; - } - err = rtrulemsg_process(nlh, (struct in_rtrulemsg*)nlh->nlmsg_data); - break; - case RTMSG_NEWDEVICE: - case RTMSG_DELDEVICE: - if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_ifmsg)) { - rtmsg_ack(nlh, EINVAL); - err = -EINVAL; - break; - } - err = ifmsg_process(nlh, (struct in_ifmsg*)nlh->nlmsg_data); - break; - case RTMSG_CONTROL: - if (nlh->nlmsg_len < sizeof(*nlh)+sizeof(struct in_rtctlmsg)) { - rtmsg_ack(nlh, EINVAL); - err = -EINVAL; - break; - } - err = rtcmsg_process(nlh, (struct in_rtctlmsg*)nlh->nlmsg_data); - break; - default: - break; - } - } - kfree_skb(skb, FREE_READ); - fib_unlock(); - if (!err || rt_nl_flags&RTCTL_ACK) - return totlen; - return err; -} - -#endif - - -static int fib_magic(int op, unsigned flags, u32 dst, u32 mask, struct device *dev) -{ - struct nlmsghdr n; - struct in_rtmsg r; - memset(&r, 0, sizeof(r)); - n.nlmsg_seq=0; - n.nlmsg_pid=0; - r.rtmsg_metric = MAGIC_METRIC; - r.rtmsg_prefix.s_addr = dst; - if (dev->flags&IFF_LOOPBACK) - flags |= RTF_LOCAL; - r.rtmsg_flags = flags; - r.rtmsg_prefixlen = 32 - fib_logmask(mask); - - return (op == RTMSG_NEWROUTE ? fib_create : fib_delete) - (&r, dev, (flags&RTF_LOCAL) ? &local_class : &main_class, &n); -} - -static void ip_rt_del_broadcasts(struct device *dev) -{ - u32 net = dev->pa_addr&dev->pa_mask; - - fib_magic(RTMSG_DELROUTE, RTF_IFBRD, dev->pa_brdaddr, ~0, dev); - fib_magic(RTMSG_DELROUTE, RTF_IFBRD, net, ~0, dev); - fib_magic(RTMSG_DELROUTE, RTF_IFBRD, net|~dev->pa_mask, ~0, dev); -} - -static void ip_rt_add_broadcasts(struct device *dev, u32 brd, u32 mask) -{ - u32 net = dev->pa_addr&mask; - - if (dev->flags&IFF_BROADCAST) - fib_magic(RTMSG_NEWROUTE, RTF_IFBRD, brd, ~0, dev); - - if (net && !(mask&htonl(1))) { - fib_magic(RTMSG_NEWROUTE, RTF_IFBRD, net, ~0, dev); - fib_magic(RTMSG_NEWROUTE, RTF_IFBRD, net|~mask, ~0, dev); - } -} - -void ip_rt_change_broadcast(struct device *dev, u32 new_brd) -{ - fib_lock(); - printk(KERN_DEBUG "%s changes brd %08X -> %08X\n", - dev->name, (u32)dev->pa_brdaddr, new_brd); - if (!ZERONET(dev->pa_addr) && dev->flags&IFF_BROADCAST) { - fib_magic(RTMSG_DELROUTE, RTF_IFBRD, dev->pa_brdaddr, ~0, dev); - rtmsg_dev(RTMSG_DELDEVICE, dev, NULL); - rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL); - ip_rt_add_broadcasts(dev, new_brd, dev->pa_mask); - } - fib_unlock(); -} - -void ip_rt_change_dstaddr(struct device *dev, u32 dstaddr) -{ - fib_lock(); - if (!ZERONET(dev->pa_addr) && (dev->flags&IFF_POINTOPOINT) && dev->type != ARPHRD_TUNNEL) { - printk(KERN_DEBUG "%s changes dst %08X -> %08X\n", - dev->name, (u32)dev->pa_dstaddr, dstaddr); - fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev); - rtmsg_dev(RTMSG_DELDEVICE, dev, NULL); - rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL); - if (dstaddr) - fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, dstaddr, ~0, dev); - } - fib_unlock(); -} - -void ip_rt_change_netmask(struct device *dev, u32 mask) -{ - u32 net; - - fib_lock(); - printk(KERN_DEBUG "%s changes netmask %08X -> %08X\n", - dev->name, (u32)dev->pa_mask, mask); - if (ZERONET(dev->pa_addr)) { - fib_unlock(); - return; - } - net = dev->pa_addr&dev->pa_mask; - fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, net, dev->pa_mask, dev); - ip_rt_del_broadcasts(dev); - if (mask != 0xFFFFFFFF && dev->flags&IFF_POINTOPOINT) - fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev); - rtmsg_dev(RTMSG_DELDEVICE, dev, NULL); - - if (mask != 0xFFFFFFFF) - dev->flags &= ~IFF_POINTOPOINT; - - rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL); - net = dev->pa_addr&mask; - if (net) - fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, net, mask, dev); - ip_rt_add_broadcasts(dev, dev->pa_addr, mask); - fib_unlock(); -} - -int ip_rt_event(int event, struct device *dev) -{ - fib_lock(); - if (event == NETDEV_DOWN) { - fib_flush(dev); - rtmsg_dev(RTMSG_DELDEVICE, dev, NULL); - fib_unlock(); - return NOTIFY_DONE; - } - if (event == NETDEV_CHANGE) { - printk(KERN_DEBUG "%s(%s) changes state fl=%08x pa=%08X/%08X brd=%08X dst=%08X\n", - dev->name, current->comm, dev->flags, (u32)dev->pa_addr, (u32)dev->pa_mask, - (u32)dev->pa_brdaddr, (u32)dev->pa_dstaddr); - if (!(dev->flags&IFF_BROADCAST)) - fib_magic(RTMSG_DELROUTE, RTF_IFBRD, dev->pa_brdaddr, ~0, dev); - if (!(dev->flags&IFF_POINTOPOINT)) - fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev); - else { - u32 net = dev->pa_addr&dev->pa_mask; - fib_magic(RTMSG_DELROUTE, RTF_IFPREFIX, net, dev->pa_mask, dev); - ip_rt_del_broadcasts(dev); - } - rtmsg_dev(RTMSG_DELDEVICE, dev, NULL); - } - - if ((event == NETDEV_UP || event == NETDEV_CHANGE) && !ZERONET(dev->pa_addr)) { - if (dev->flags&IFF_POINTOPOINT) { - dev->pa_mask = 0xFFFFFFFF; - dev->ip_flags &= ~IFF_IP_MASK_OK; - dev->flags &= ~IFF_BROADCAST; - dev->pa_brdaddr = 0; - } - - if (event == NETDEV_UP) - printk(KERN_DEBUG "%s UP fl=%08x pa=%08X/%08X brd=%08X dst=%08X\n", - dev->name, dev->flags, (u32)dev->pa_addr, - (u32)dev->pa_mask, (u32)dev->pa_brdaddr, (u32)dev->pa_dstaddr); - - rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL); - - if (dev->flags&IFF_POINTOPOINT) { - if (dev->pa_dstaddr && dev->type != ARPHRD_TUNNEL) - fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, dev->pa_dstaddr, ~0, dev); - } else { - u32 net = dev->pa_addr&dev->pa_mask; - - if (net) - fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, net, dev->pa_mask, dev); - ip_rt_add_broadcasts(dev, dev->pa_brdaddr, dev->pa_mask); - } - fib_magic(RTMSG_NEWROUTE, RTF_IFLOCAL, dev->pa_addr, ~0, dev); - if (dev == &loopback_dev) { - if (dev->pa_addr != htonl(INADDR_LOOPBACK)) { - u32 mask = htonl(0xFF000000); - fib_magic(RTMSG_NEWROUTE, RTF_IFPREFIX, - htonl(INADDR_LOOPBACK)&mask, - mask, dev); - fib_magic(RTMSG_NEWROUTE, RTF_IFLOCAL, - htonl(INADDR_LOOPBACK), - mask, dev); - } - } - } - if (event == NETDEV_CHANGEMTU || event == NETDEV_CHANGEADDR) - rtmsg_dev(RTMSG_NEWDEVICE, dev, NULL); - fib_unlock(); - return NOTIFY_DONE; -} - - -__initfunc(void ip_fib_init(void)) -{ - struct in_rtrulemsg r; - -#ifdef CONFIG_PROC_FS - proc_net_register(&(struct proc_dir_entry) { - PROC_NET_ROUTE, 5, "route", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - fib_get_info - }); - proc_net_register(&(struct proc_dir_entry) { - PROC_NET_RTCLASSES, 10, "rt_classes", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - fib_class_get_info - }); - proc_net_register(&(struct proc_dir_entry) { - PROC_NET_RTLOCAL, 8, "rt_local", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - fib_local_get_info - }); - proc_net_register(&(struct proc_dir_entry) { - PROC_NET_RTRULES, 8, "rt_rules", - S_IFREG | S_IRUGO, 1, 0, 0, - 0, &proc_net_inode_operations, - fib_rules_get_info - }); -#endif /* CONFIG_PROC_FS */ - - fib_classes[RT_CLASS_LOCAL] = &local_class; - fib_classes[RT_CLASS_MAIN] = &main_class; - fib_classes[RT_CLASS_DEFAULT] = &default_class; - - memset(&r, 0, sizeof(r)); - r.rtrmsg_class = RT_CLASS_LOCAL; - r.rtrmsg_preference = 0; - fib_rule_add(&r, NULL, NULL); - - memset(&r, 0, sizeof(r)); - r.rtrmsg_class = RT_CLASS_DEFAULT; - r.rtrmsg_preference = 255; - fib_rule_add(&r, NULL, NULL); - - memset(&r, 0, sizeof(r)); - r.rtrmsg_class = RT_CLASS_MAIN; - r.rtrmsg_preference = 254; - fib_rule_add(&r, NULL, NULL); - -#ifdef CONFIG_RTNETLINK - netlink_attach(NETLINK_ROUTE, fib_netlink_call); -#endif -} diff -u --recursive --new-file v2.1.67/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c --- v2.1.67/linux/net/ipv4/fib_frontend.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/fib_frontend.c Sun Nov 30 14:00:39 1997 @@ -0,0 +1,572 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 Forwarding Information Base: FIB frontend. + * + * Version: $Id: fib_frontend.c,v 1.4 1997/11/09 20:05:23 kuznet Exp $ + * + * Authors: Alexey Kuznetsov, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define FFprint(a...) printk(KERN_DEBUG a) + +#ifndef CONFIG_IP_MULTIPLE_TABLES + +#define RT_TABLE_MIN RT_TABLE_MAIN + +struct fib_table *local_table; +struct fib_table *main_table; + +#else + +#define RT_TABLE_MIN 1 + +struct fib_table *fib_tables[RT_TABLE_MAX+1]; + +struct fib_table *__fib_new_table(int id) +{ + struct fib_table *tb; + + tb = fib_hash_init(id); + if (!tb) + return NULL; + fib_tables[id] = tb; + return tb; +} + + +#endif /* CONFIG_IP_MULTIPLE_TABLES */ + + +void fib_flush(void) +{ + int flushed = 0; +#ifdef CONFIG_IP_MULTIPLE_TABLES + struct fib_table *tb; + int id; + + for (id = RT_TABLE_MAX; id>0; id--) { + if ((tb = fib_get_table(id))==NULL) + continue; + flushed += tb->tb_flush(tb); + } +#else /* CONFIG_IP_MULTIPLE_TABLES */ + flushed += main_table->tb_flush(main_table); + flushed += local_table->tb_flush(local_table); +#endif /* CONFIG_IP_MULTIPLE_TABLES */ + + if (flushed) + rt_cache_flush(RT_FLUSH_DELAY); +} + + +#ifdef CONFIG_PROC_FS + +/* + * Called from the PROCfs module. This outputs /proc/net/route. + * + * It always works in backward compatibility mode. + * The format of the file is not supposed to be changed. + */ + +static int +fib_get_procinfo(char *buffer, char **start, off_t offset, int length, int dummy) +{ + int first = offset/128; + char *ptr = buffer; + int count = (length+127)/128; + int len; + + *start = buffer + offset%128; + + if (--first < 0) { + sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT"); + --count; + ptr += 128; + first = 0; + } + + /* rtnl_shlock(); -- it is pointless at the moment --ANK */ + if (main_table && count > 0) { + int n = main_table->tb_get_info(main_table, ptr, first, count); + count -= n; + ptr += n*128; + } + /* rtnl_shunlock(); */ + len = ptr - *start; + if (len >= length) + return length; + if (len >= 0) + return len; + return 0; +} + +#endif /* CONFIG_PROC_FS */ + +/* + * Find the first device with a given source address. + */ + +struct device * ip_dev_find(u32 addr) +{ + struct rt_key key; + struct fib_result res; + + memset(&key, 0, sizeof(key)); + key.dst = addr; + key.scope = RT_SCOPE_UNIVERSE; + + if (!local_table || local_table->tb_lookup(local_table, &key, &res) + || res.type != RTN_LOCAL) + return NULL; + + return FIB_RES_DEV(res); +} + +unsigned inet_addr_type(u32 addr) +{ + struct rt_key key; + struct fib_result res; + + if (ZERONET(addr) || BADCLASS(addr)) + return RTN_BROADCAST; + if (MULTICAST(addr)) + return RTN_MULTICAST; + + memset(&key, 0, sizeof(key)); + key.dst = addr; + + if (local_table) { + if (local_table->tb_lookup(local_table, &key, &res) == 0) + return res.type; + return RTN_UNICAST; + } + return RTN_BROADCAST; +} + +/* Given (packet source, input interface) and optional (dst, oif, tos): + - (main) check, that source is valid i.e. not broadcast or our local + address. + - figure out what "logical" interface this packet arrived + and calculate "specific destination" address. + - check, that packet arrived from expected physical interface. + */ + +int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, + struct device *dev, u32 *spec_dst) +{ + struct in_device *in_dev = dev->ip_ptr; + struct rt_key key; + struct fib_result res; + + key.dst = src; + key.src = dst; + key.tos = tos; + key.oif = 0; + key.iif = oif; + key.scope = RT_SCOPE_UNIVERSE; + + if (in_dev == NULL) + return -EINVAL; + if (fib_lookup(&key, &res)) + goto last_resort; + if (res.type != RTN_UNICAST) + return -EINVAL; + *spec_dst = FIB_RES_PREFSRC(res); +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) +#else + if (FIB_RES_DEV(res) == dev) +#endif + return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + + if (in_dev->ifa_list == NULL) + goto last_resort; + if (IN_DEV_RPFILTER(in_dev)) + return -EINVAL; + key.oif = dev->ifindex; + if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST) { + *spec_dst = FIB_RES_PREFSRC(res); + return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + } + return 0; + +last_resort: + if (IN_DEV_RPFILTER(in_dev)) + return -EINVAL; + *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); + return 0; +} + +#ifndef CONFIG_IP_NOSIOCRT + +/* + * Handle IP routing ioctl calls. These are used to manipulate the routing tables + */ + +int ip_rt_ioctl(unsigned int cmd, void *arg) +{ + int err; + struct kern_rta rta; + struct rtentry r; + struct { + struct nlmsghdr nlh; + struct rtmsg rtm; + } req; + + switch (cmd) { + case SIOCADDRT: /* Add a route */ + case SIOCDELRT: /* Delete a route */ + if (!suser()) + return -EPERM; + if (copy_from_user(&r, arg, sizeof(struct rtentry))) + return -EFAULT; + rtnl_lock(); + err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, arg); + if (err == 0) { + if (cmd == SIOCDELRT) { + struct fib_table *tb = fib_get_table(req.rtm.rtm_table); + err = -ESRCH; + if (tb) + err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + } else { + struct fib_table *tb = fib_new_table(req.rtm.rtm_table); + err = -ENOBUFS; + if (tb) + err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + } + } + rtnl_unlock(); + return err; + } + return -EINVAL; +} + +#else + +int ip_rt_ioctl(unsigned int cmd, void *arg) +{ + return -EINVAL; +} + +#endif + +#ifdef CONFIG_RTNETLINK + +int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_table * tb; + struct kern_rta *rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); + + tb = fib_get_table(r->rtm_table); + if (tb) + return tb->tb_delete(tb, r, rta, nlh, &NETLINK_CB(skb)); + return -ESRCH; +} + +int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_table * tb; + struct kern_rta *rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); + + tb = fib_new_table(r->rtm_table); + if (tb) + return tb->tb_insert(tb, r, rta, nlh, &NETLINK_CB(skb)); + return -ENOBUFS; +} + +int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) +{ + int t; + int s_t; + struct fib_table *tb; + + s_t = cb->args[0]; + if (s_t == 0) + s_t = cb->args[0] = RT_TABLE_MIN; + + for (t=s_t; t<=RT_TABLE_MAX; t++) { + if (t < s_t) continue; + if (t > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); + if ((tb = fib_get_table(t))==NULL) + continue; + if (tb->tb_dump(tb, skb, cb) < 0) + break; + } + + cb->args[0] = t; + + return skb->len; +} + +#endif + +/* Prepare and feed intra-kernel routing request. + Really, it should be netlink message, but :-( netlink + can be not configured, so that we feed it directly + to fib engine. It is legal, because all events occur + only when netlink is already locked. + */ + +static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) +{ + struct fib_table * tb; + struct { + struct nlmsghdr nlh; + struct rtmsg rtm; + } req; + struct kern_rta rta; + + memset(&req.rtm, 0, sizeof(req.rtm)); + memset(&rta, 0, sizeof(rta)); + + if (type == RTN_UNICAST) + tb = fib_new_table(RT_TABLE_MAIN); + else + tb = fib_new_table(RT_TABLE_LOCAL); + + if (tb == NULL) + return; + + req.nlh.nlmsg_len = sizeof(req); + req.nlh.nlmsg_type = cmd; + req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; + req.nlh.nlmsg_pid = 0; + req.nlh.nlmsg_seq = 0; + + req.rtm.rtm_dst_len = dst_len; + req.rtm.rtm_table = tb->tb_id; + req.rtm.rtm_protocol = RTPROT_KERNEL; + req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); + req.rtm.rtm_type = type; + + rta.rta_dst = &dst; + rta.rta_prefsrc = &ifa->ifa_local; + rta.rta_oif = &ifa->ifa_dev->dev->ifindex; + + if (cmd == RTM_NEWROUTE) + tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + else + tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); +} + +static void fib_add_ifaddr(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct device *dev = in_dev->dev; + struct in_ifaddr *prim = ifa; + u32 mask = ifa->ifa_mask; + u32 addr = ifa->ifa_local; + u32 prefix = ifa->ifa_address&mask; + + if (ifa->ifa_flags&IFA_F_SECONDARY) + prim = inet_ifa_byprefix(in_dev, prefix, mask); + + fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); + + if (!(dev->flags&IFF_UP)) + return; + + /* Add broadcast address, if it is explicitly assigned. */ + if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF) + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); + + if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY)) { + fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : + RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); + + /* Add network specific broadcasts, when it takes a sense */ + if (ifa->ifa_prefixlen < 31) { + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); + } + } +} + +static void fib_del_ifaddr(struct in_ifaddr *ifa) +{ + struct in_device *in_dev = ifa->ifa_dev; + struct device *dev = in_dev->dev; + struct in_ifaddr *ifa1; + struct in_ifaddr *prim = ifa; + u32 brd = ifa->ifa_address|~ifa->ifa_mask; + u32 any = ifa->ifa_address&ifa->ifa_mask; +#define LOCAL_OK 1 +#define BRD_OK 2 +#define BRD0_OK 4 +#define BRD1_OK 8 + unsigned ok = 0; + + if (!(ifa->ifa_flags&IFA_F_SECONDARY)) + fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : + RTN_UNICAST, any, ifa->ifa_prefixlen, prim); + else + prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); + + /* Deletion is more complicated than add. + We should take care of not to delete too much :-) + + Scan address list to be sure that addresses are really gone. + */ + + for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + if (ifa->ifa_local == ifa1->ifa_local) + ok |= LOCAL_OK; + if (ifa->ifa_broadcast == ifa1->ifa_broadcast) + ok |= BRD_OK; + if (brd == ifa1->ifa_broadcast) + ok |= BRD1_OK; + if (any == ifa1->ifa_broadcast) + ok |= BRD0_OK; + } + + if (!(ok&BRD_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); + if (!(ok&BRD1_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); + if (!(ok&BRD0_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); + if (!(ok&LOCAL_OK)) { + fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); + + /* Check, that this local address finally disappeared. */ + if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { + /* And the last, but not the least thing. + We must flush stray FIB entries. + + First of all, we scan fib_info list searching + for stray nexthop entries, then ignite fib_flush. + */ + if (fib_sync_down(ifa->ifa_local, NULL)) + fib_flush(); + } + } +#undef LOCAL_OK +#undef BRD_OK +#undef BRD0_OK +#undef BRD1_OK +} + +static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + + switch (event) { + case NETDEV_UP: + fib_add_ifaddr(ifa); + rt_cache_flush(2*HZ); + break; + case NETDEV_DOWN: + fib_del_ifaddr(ifa); + rt_cache_flush(1*HZ); + break; + } + return NOTIFY_DONE; +} + +static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct device *dev = ptr; + struct in_device *in_dev = dev->ip_ptr; + + if (!in_dev) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: + for_ifa(in_dev) { + fib_add_ifaddr(ifa); + } endfor_ifa(in_dev); +#ifdef CONFIG_IP_ROUTE_MULTIPATH + fib_sync_up(dev); +#endif + rt_cache_flush(2*HZ); + break; + case NETDEV_DOWN: + if (fib_sync_down(0, dev)) + fib_flush(); + rt_cache_flush(0); + break; + case NETDEV_UNREGISTER: + if (in_dev->ifa_list) + printk("About to crash!\n"); + rt_cache_flush(0); + break; + } + return NOTIFY_DONE; +} + +struct notifier_block fib_inetaddr_notifier = { + fib_inetaddr_event, + NULL, + 0 +}; + +struct notifier_block fib_netdev_notifier = { + fib_netdev_event, + NULL, + 0 +}; + +__initfunc(void ip_fib_init(void)) +{ +#ifdef CONFIG_PROC_FS + proc_net_register(&(struct proc_dir_entry) { + PROC_NET_ROUTE, 5, "route", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_net_inode_operations, + fib_get_procinfo + }); +#endif /* CONFIG_PROC_FS */ + +#ifndef CONFIG_IP_MULTIPLE_TABLES + local_table = fib_hash_init(RT_TABLE_LOCAL); + main_table = fib_hash_init(RT_TABLE_MAIN); +#else + fib_rules_init(); +#endif + + register_netdevice_notifier(&fib_netdev_notifier); + register_inetaddr_notifier(&fib_inetaddr_notifier); +} + diff -u --recursive --new-file v2.1.67/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c --- v2.1.67/linux/net/ipv4/fib_hash.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/fib_hash.c Sun Nov 30 14:00:39 1997 @@ -0,0 +1,754 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 FIB: lookup engine and maintenance routines. + * + * Version: $Id: fib_hash.c,v 1.1 1997/11/09 19:53:13 kuznet Exp $ + * + * Authors: Alexey Kuznetsov, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define FTprint(a...) +/* +printk(KERN_DEBUG a) + */ + +/* + These bizarre types are just to force strict type checking. + When I reversed order of bytes and changed to natural mask lengths, + I forgot to make fixes in several places. Now I am lazy to return + it back. + */ + +typedef struct { + u32 datum; +} fn_key_t; + +typedef struct { + u32 datum; +} fn_hash_idx_t; + +struct fib_node +{ + struct fib_node *fn_next; + struct fib_info *fn_info; +#define FIB_INFO(f) ((f)->fn_info) + fn_key_t fn_key; + u8 fn_tos; + u8 fn_type; + u8 fn_scope; + u8 fn_state; +}; + +#define FN_S_ZOMBIE 1 +#define FN_S_ACCESSED 2 + +static int fib_hash_zombies; + +struct fn_zone +{ + struct fn_zone *fz_next; /* Next not empty zone */ + struct fib_node **fz_hash; /* Hash table pointer */ + int fz_nent; /* Number of entries */ + + int fz_divisor; /* Hash divisor */ + u32 fz_hashmask; /* (1<fz_hashmask) + + int fz_order; /* Zone order */ + u32 fz_mask; +#define FZ_MASK(fz) ((fz)->fz_mask) +}; + +/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask + can be cheaper than memory lookup, so that FZ_* macros are used. + */ + +struct fn_hash +{ + struct fn_zone *fn_zones[33]; + struct fn_zone *fn_zone_list; +}; + +static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz) +{ + u32 h = ntohl(key.datum)>>(32 - fz->fz_order); + h ^= (h>>20); + h ^= (h>>10); + h ^= (h>>5); + h &= FZ_HASHMASK(fz); + return *(fn_hash_idx_t*)&h; +} + +#define fz_key_0(key) ((key).datum = 0) +#define fz_prefix(key,fz) ((key).datum) + +static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz) +{ + fn_key_t k; + k.datum = dst & FZ_MASK(fz); + return k; +} + +static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz) +{ + return &fz->fz_hash[fn_hash(key, fz).datum]; +} + +static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz) +{ + return fz->fz_hash[fn_hash(key, fz).datum]; +} + +extern __inline__ int fn_key_eq(fn_key_t a, fn_key_t b) +{ + return a.datum == b.datum; +} + +#define FZ_MAX_DIVISOR 1024 + +#ifdef CONFIG_IP_ROUTE_LARGE_TABLES + +static __inline__ void fn_rebuild_zone(struct fn_zone *fz, + struct fib_node **old_ht, + int old_divisor) +{ + int i; + struct fib_node *f, **fp, *next; + + for (i=0; ifn_next; + f->fn_next = NULL; + for (fp = fz_chain_p(f->fn_key, fz); *fp; fp = &(*fp)->fn_next) + /* NONE */; + *fp = f; + } + } +} + +static void fn_rehash_zone(struct fn_zone *fz) +{ + struct fib_node **ht, **old_ht; + int old_divisor, new_divisor; + u32 new_hashmask; + + old_divisor = fz->fz_divisor; + + switch (old_divisor) { + case 16: + new_divisor = 256; + new_hashmask = 0xFF; + break; + case 256: + new_divisor = 1024; + new_hashmask = 0x3FF; + break; + default: + printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); + return; + } +#if RT_CACHE_DEBUG >= 2 + printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); +#endif + + ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL); + + if (ht) { + memset(ht, 0, new_divisor*sizeof(struct fib_node*)); + start_bh_atomic(); + old_ht = fz->fz_hash; + fz->fz_hash = ht; + fz->fz_hashmask = new_hashmask; + fz->fz_divisor = new_divisor; + fn_rebuild_zone(fz, old_ht, old_divisor); + end_bh_atomic(); + kfree(old_ht); +FTprint("REHASHED ZONE: order %d mask %08x hash %d/%08x\n", fz->fz_order, fz->fz_mask, fz->fz_divisor, fz->fz_hashmask); + } +} +#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */ + +static void fn_free_node(struct fib_node * f) +{ + fib_release_info(FIB_INFO(f)); + kfree_s(f, sizeof(struct fib_node)); +} + + +static struct fn_zone * +fn_new_zone(struct fn_hash *table, int z) +{ + int i; + struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL); + if (!fz) + return NULL; + + memset(fz, 0, sizeof(struct fn_zone)); + if (z) { + fz->fz_divisor = 16; + fz->fz_hashmask = 0xF; + } else { + fz->fz_divisor = 1; + fz->fz_hashmask = 0; + } + fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL); + if (!fz->fz_hash) { + kfree(fz); + return NULL; + } + memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*)); + fz->fz_order = z; + fz->fz_mask = inet_make_mask(z); + + /* Find the first not empty zone with more specific mask */ + for (i=z+1; i<=32; i++) + if (table->fn_zones[i]) + break; + start_bh_atomic(); + if (i>32) { + /* No more specific masks, we are the first. */ + fz->fz_next = table->fn_zone_list; + table->fn_zone_list = fz; + } else { + fz->fz_next = table->fn_zones[i]->fz_next; + table->fn_zones[i]->fz_next = fz; + } + table->fn_zones[z] = fz; + end_bh_atomic(); +FTprint("NEW ZONE: order %d mask %08x hash %d/%08x\n", fz->fz_order, fz->fz_mask, fz->fz_divisor, fz->fz_hashmask); + return fz; +} + +static int +fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res) +{ + int err; + struct fn_zone *fz; + struct fn_hash *t = (struct fn_hash*)tb->tb_data; + + for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { + struct fib_node *f; + fn_key_t k = fz_key(key->dst, fz); + int matched = 0; + + for (f = fz_chain(k, fz); f; f = f->fn_next) { + if (!fn_key_eq(k, f->fn_key) +#ifdef CONFIG_IP_ROUTE_TOS + || (f->fn_tos && f->fn_tos != key->tos) +#endif + ) { + if (matched) + return 1; + continue; + } + matched = 1; + f->fn_state |= FN_S_ACCESSED; + + if (f->fn_state&FN_S_ZOMBIE) + continue; + if (f->fn_scope < key->scope) + continue; + + err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res); + if (err == 0) { + res->type = f->fn_type; + res->scope = f->fn_scope; + res->prefixlen = fz->fz_order; + res->prefix = &fz_prefix(f->fn_key, fz); + return 0; + } + if (err < 0) + return err; + } + } + return 1; +} + +#define FIB_SCAN(f, fp) \ +for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) + +#define FIB_SCAN_KEY(f, fp, key) \ +for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) + +#define FIB_CONTINUE(f, fp) \ +{ \ + fp = &f->fn_next; \ + continue; \ +} + +#ifdef CONFIG_RTNETLINK +static void rtmsg_fib(int, struct fib_node*, int, int, + struct nlmsghdr *n, + struct netlink_skb_parms *); +#else +#define rtmsg_fib(a, b, c, d, e, f) +#endif + + +static int +fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *n, struct netlink_skb_parms *req) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fib_node *new_f, *f, **fp; + struct fn_zone *fz; + struct fib_info *fi; + + int z = r->rtm_dst_len; + int type = r->rtm_type; +#ifdef CONFIG_IP_ROUTE_TOS + u8 tos = r->rtm_tos; +#endif + fn_key_t key; + unsigned state = 0; + int err; + +FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ? +*(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1, +rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0); + if (z > 32) + return -EINVAL; + fz = table->fn_zones[z]; + if (!fz && !(fz = fn_new_zone(table, z))) + return -ENOBUFS; + + fz_key_0(key); + if (rta->rta_dst) { + u32 dst; + memcpy(&dst, rta->rta_dst, 4); + if (dst & ~FZ_MASK(fz)) + return -EINVAL; + key = fz_key(dst, fz); + } + + if ((fi = fib_create_info(r, rta, n, &err)) == NULL) { +FTprint("fib_create_info err=%d\n", err); + return err; + } + +#ifdef CONFIG_IP_ROUTE_LARGE_TABLES + if (fz->fz_nent > (fz->fz_divisor<<2) && + fz->fz_divisor < FZ_MAX_DIVISOR && + (z==32 || (1< fz->fz_divisor)) + fn_rehash_zone(fz); +#endif + + fp = fz_chain_p(key, fz); + + /* + * Scan list to find the first route with the same destination + */ + FIB_SCAN(f, fp) { + if (fn_key_eq(f->fn_key,key)) + break; + } + +#ifdef CONFIG_IP_ROUTE_TOS + /* + * Find route with the same destination and tos. + */ + FIB_SCAN_KEY(f, fp, key) { + if (f->fn_tos <= tos) + break; + } +#endif + + if (f && fn_key_eq(f->fn_key, key) +#ifdef CONFIG_IP_ROUTE_TOS + && f->fn_tos == tos +#endif + ) { + state = f->fn_state; + if (n->nlmsg_flags&NLM_F_EXCL && !(state&FN_S_ZOMBIE)) + return -EEXIST; + if (n->nlmsg_flags&NLM_F_REPLACE) { + struct fib_info *old_fi = FIB_INFO(f); + if (old_fi != fi) { + rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req); + start_bh_atomic(); + FIB_INFO(f) = fi; + f->fn_type = r->rtm_type; + f->fn_scope = r->rtm_scope; + end_bh_atomic(); + rtmsg_fib(RTM_NEWROUTE, f, z, tb->tb_id, n, req); + } + state = f->fn_state; + f->fn_state = 0; + fib_release_info(old_fi); + if (state&FN_S_ACCESSED) + rt_cache_flush(RT_FLUSH_DELAY); + return 0; + } + for ( ; (f = *fp) != NULL && fn_key_eq(f->fn_key, key) +#ifdef CONFIG_IP_ROUTE_TOS + && f->fn_tos == tos +#endif + ; fp = &f->fn_next) { + state |= f->fn_state; + if (f->fn_type == type && f->fn_scope == r->rtm_scope + && FIB_INFO(f) == fi) { + fib_release_info(fi); + if (f->fn_state&FN_S_ZOMBIE) { + f->fn_state = 0; + rtmsg_fib(RTM_NEWROUTE, f, z, tb->tb_id, n, req); + if (state&FN_S_ACCESSED) + rt_cache_flush(RT_FLUSH_DELAY); + return 0; + } + return -EEXIST; + } + } + } else { + if (!(n->nlmsg_flags&NLM_F_CREATE)) + return -ENOENT; + } + + new_f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL); + if (new_f == NULL) { + fib_release_info(fi); + return -ENOBUFS; + } + + memset(new_f, 0, sizeof(struct fib_node)); + + new_f->fn_key = key; +#ifdef CONFIG_IP_ROUTE_TOS + new_f->fn_tos = tos; +#endif + new_f->fn_type = type; + new_f->fn_scope = r->rtm_scope; + FIB_INFO(new_f) = fi; + + /* + * Insert new entry to the list. + */ + + start_bh_atomic(); + new_f->fn_next = f; + *fp = new_f; + end_bh_atomic(); + fz->fz_nent++; + + rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req); + rt_cache_flush(RT_FLUSH_DELAY); + return 0; +} + + +static int +fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *n, struct netlink_skb_parms *req) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fib_node **fp, *f; + int z = r->rtm_dst_len; + struct fn_zone *fz; + fn_key_t key; +#ifdef CONFIG_IP_ROUTE_TOS + u8 tos = r->rtm_tos; +#endif + +FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ? + *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1); + if (z > 32) + return -EINVAL; + if ((fz = table->fn_zones[z]) == NULL) + return -ESRCH; + + fz_key_0(key); + if (rta->rta_dst) { + u32 dst; + memcpy(&dst, rta->rta_dst, 4); + if (dst & ~FZ_MASK(fz)) + return -EINVAL; + key = fz_key(dst, fz); + } + + fp = fz_chain_p(key, fz); + + FIB_SCAN(f, fp) { + if (fn_key_eq(f->fn_key, key)) + break; + } +#ifdef CONFIG_IP_ROUTE_TOS + FIB_SCAN_KEY(f, fp, key) { + if (f->fn_tos == tos) + break; + } +#endif + + while ((f = *fp) != NULL && fn_key_eq(f->fn_key, key) +#ifdef CONFIG_IP_ROUTE_TOS + && f->fn_tos == tos +#endif + ) { + struct fib_info * fi = FIB_INFO(f); + + if ((f->fn_state&FN_S_ZOMBIE) || + (r->rtm_type && f->fn_type != r->rtm_type) || + (r->rtm_scope && f->fn_scope != r->rtm_scope) || + (r->rtm_protocol && fi->fib_protocol != r->rtm_protocol) || + fib_nh_match(r, n, rta, fi)) + FIB_CONTINUE(f, fp); + break; + } + if (!f) + return -ESRCH; +#if 0 + *fp = f->fn_next; + rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req); + fn_free_node(f); + fz->fz_nent--; + rt_cache_flush(0); +#else + f->fn_state |= FN_S_ZOMBIE; + rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req); + if (f->fn_state&FN_S_ACCESSED) { + f->fn_state &= ~FN_S_ACCESSED; + rt_cache_flush(RT_FLUSH_DELAY); + } + if (++fib_hash_zombies > 128) + fib_flush(); +#endif + return 0; +} + +extern __inline__ int +fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table) +{ + int found = 0; + struct fib_node *f; + + while ((f = *fp) != NULL) { + struct fib_info *fi = FIB_INFO(f); + + if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) { + *fp = f->fn_next; + fn_free_node(f); + found++; + continue; + } + fp = &f->fn_next; + } + return found; +} + +static int fn_hash_flush(struct fib_table *tb) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_zone *fz; + int found = 0; + + fib_hash_zombies = 0; + for (fz = table->fn_zone_list; fz; fz = fz->fz_next) { + int i; + int tmp = 0; + for (i=fz->fz_divisor-1; i>=0; i--) + tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table); + fz->fz_nent -= tmp; + found += tmp; + } + return found; +} + + +#ifdef CONFIG_PROC_FS + +static int fn_hash_get_info(struct fib_table *tb, char *buffer, int first, int count) +{ + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + struct fn_zone *fz; + int pos = 0; + int n = 0; + + for (fz=table->fn_zone_list; fz; fz = fz->fz_next) { + int i; + struct fib_node *f; + int maxslot = fz->fz_divisor; + struct fib_node **fp = fz->fz_hash; + + if (fz->fz_nent == 0) + continue; + + if (pos + fz->fz_nent <= first) { + pos += fz->fz_nent; + continue; + } + + for (i=0; i < maxslot; i++, fp++) { + for (f = *fp; f; f = f->fn_next) { + if (++pos <= first) + continue; + fib_node_get_info(f->fn_type, + f->fn_state&FN_S_ZOMBIE, + FIB_INFO(f), + fz_prefix(f->fn_key, fz), + FZ_MASK(fz), buffer); + buffer += 128; + if (++n >= count) + return n; + } + } + } + return n; +} +#endif + + +#ifdef CONFIG_RTNETLINK + +extern __inline__ int +fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, + struct fib_table *tb, + struct fn_zone *fz, + struct fib_node *f) +{ + int i, s_i; + + s_i = cb->args[3]; + for (i=0; f; i++, f=f->fn_next) { + if (i < s_i) continue; + if (f->fn_state&FN_S_ZOMBIE) continue; + if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + RTM_NEWROUTE, + tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope, + &f->fn_key, fz->fz_order, f->fn_tos, + f->fn_info) < 0) { + cb->args[3] = i; + return -1; + } + } + cb->args[3] = i; + return skb->len; +} + +extern __inline__ int +fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, + struct fib_table *tb, + struct fn_zone *fz) +{ + int h, s_h; + + s_h = cb->args[2]; + for (h=0; h < fz->fz_divisor; h++) { + if (h < s_h) continue; + if (h > s_h) + memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(int)); + if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL) + continue; + if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) { + cb->args[2] = h; + return -1; + } + } + cb->args[2] = h; + return skb->len; +} + +static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) +{ + int m, s_m; + struct fn_zone *fz; + struct fn_hash *table = (struct fn_hash*)tb->tb_data; + + s_m = cb->args[1]; + for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { + if (m < s_m) continue; + if (m > s_m) + memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(int)); + if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { + cb->args[1] = m; + return -1; + } + } + cb->args[1] = m; + return skb->len; +} + +static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id, + struct nlmsghdr *n, struct netlink_skb_parms *req) +{ + struct sk_buff *skb; + pid_t pid = req ? req->pid : 0; + int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; + + if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, + f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos, + FIB_INFO(f)) < 0) { + kfree_skb(skb, 0); + return; + } + NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; + if (n->nlmsg_flags&NLM_F_ECHO) + atomic_inc(&skb->users); + netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); + if (n->nlmsg_flags&NLM_F_ECHO) + netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); +} + +#endif /* CONFIG_RTNETLINK */ + +#ifdef CONFIG_IP_MULTIPLE_TABLES +struct fib_table * fib_hash_init(int id) +#else +__initfunc(struct fib_table * fib_hash_init(int id)) +#endif +{ + struct fib_table *tb; + tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL); + if (tb == NULL) + return NULL; + tb->tb_id = id; + tb->tb_lookup = fn_hash_lookup; + tb->tb_insert = fn_hash_insert; + tb->tb_delete = fn_hash_delete; + tb->tb_flush = fn_hash_flush; +#ifdef CONFIG_RTNETLINK + tb->tb_dump = fn_hash_dump; +#endif +#ifdef CONFIG_PROC_FS + tb->tb_get_info = fn_hash_get_info; +#endif + memset(tb->tb_data, 0, sizeof(struct fn_hash)); + return tb; +} diff -u --recursive --new-file v2.1.67/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c --- v2.1.67/linux/net/ipv4/fib_rules.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/fib_rules.c Sun Nov 30 14:00:39 1997 @@ -0,0 +1,363 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 Forwarding Information Base: policy rules. + * + * Version: $Id: fib_rules.c,v 1.2 1997/10/10 22:40:49 davem Exp $ + * + * Authors: Alexey Kuznetsov, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define FRprintk(a...) + +struct fib_rule +{ + struct fib_rule *r_next; + unsigned r_preference; + unsigned char r_table; + unsigned char r_action; + unsigned char r_dst_len; + unsigned char r_src_len; + u32 r_src; + u32 r_srcmask; + u32 r_dst; + u32 r_dstmask; + u32 r_srcmap; + u8 r_flags; + u8 r_tos; + int r_ifindex; + char r_ifname[IFNAMSIZ]; +}; + +static struct fib_rule default_rule = { NULL, 0x7FFF, RT_TABLE_DEFAULT, RTN_UNICAST, }; +static struct fib_rule main_rule = { &default_rule, 0x7FFE, RT_TABLE_MAIN, RTN_UNICAST, }; +static struct fib_rule local_rule = { &main_rule, 0, RT_TABLE_LOCAL, RTN_UNICAST, }; + +static struct fib_rule *fib_rules = &local_rule; + +int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct kern_rta *rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct fib_rule *r, **rp; + + for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) { + if ((!rta->rta_src || memcmp(rta->rta_src, &r->r_src, 4) == 0) && + rtm->rtm_src_len == r->r_src_len && + rtm->rtm_dst_len == r->r_dst_len && + (!rta->rta_dst || memcmp(rta->rta_dst, &r->r_dst, 4) == 0) && + rtm->rtm_tos == r->r_tos && + rtm->rtm_type == r->r_action && + (!rta->rta_priority || *rta->rta_priority == r->r_preference) && + (!rta->rta_ifname || strcmp(rta->rta_ifname, r->r_ifname) == 0) && + (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { + *rp = r->r_next; + if (r != &default_rule && r != &main_rule && r != &local_rule) + kfree(r); + return 0; + } + } + return -ESRCH; +} + +/* Allocate new unique table id */ + +static struct fib_table *fib_empty_table(void) +{ + int id; + + for (id = 1; id <= RT_TABLE_MAX; id++) + if (fib_tables[id] == NULL) + return __fib_new_table(id); + return NULL; +} + + +int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct kern_rta *rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct fib_rule *r, *new_r, **rp; + unsigned char table_id; + + if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || + (rtm->rtm_tos & ~IPTOS_TOS_MASK)) + return -EINVAL; + + table_id = rtm->rtm_table; + if (table_id == RT_TABLE_UNSPEC) { + struct fib_table *table; + if (rtm->rtm_type == RTN_UNICAST || rtm->rtm_type == RTN_NAT) { + if ((table = fib_empty_table()) == NULL) + return -ENOBUFS; + table_id = table->tb_id; + } + } + + new_r = kmalloc(sizeof(*new_r), GFP_KERNEL); + if (!new_r) + return -ENOMEM; + memset(new_r, 0, sizeof(*new_r)); + if (rta->rta_src) + memcpy(&new_r->r_src, rta->rta_src, 4); + if (rta->rta_dst) + memcpy(&new_r->r_dst, rta->rta_dst, 4); + if (rta->rta_gw) + memcpy(&new_r->r_srcmap, rta->rta_gw, 4); + new_r->r_src_len = rtm->rtm_src_len; + new_r->r_dst_len = rtm->rtm_dst_len; + new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); + new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); + new_r->r_tos = rtm->rtm_tos; + new_r->r_action = rtm->rtm_type; + new_r->r_flags = rtm->rtm_flags; + if (rta->rta_priority) + new_r->r_preference = *rta->rta_priority; + new_r->r_table = table_id; + if (rta->rta_ifname) { + struct device *dev; + memcpy(new_r->r_ifname, rta->rta_ifname, IFNAMSIZ); + new_r->r_ifindex = -1; + dev = dev_get(rta->rta_ifname); + if (dev) + new_r->r_ifindex = dev->ifindex; + } + + rp = &fib_rules; + if (!new_r->r_preference) { + r = fib_rules; + if (r && (r = r->r_next) != NULL) { + rp = &fib_rules->r_next; + if (r->r_preference) + new_r->r_preference = r->r_preference - 1; + } + } + + while ( (r = *rp) != NULL ) { + if (r->r_preference > new_r->r_preference) + break; + rp = &r->r_next; + } + + new_r->r_next = r; + *rp = new_r; + return 0; +} + +u32 fib_rules_map_destination(u32 daddr, struct fib_result *res) +{ + u32 mask = inet_make_mask(res->prefixlen); + return (daddr&~mask)|res->fi->fib_nh->nh_gw; +} + +u32 fib_rules_policy(u32 saddr, struct fib_result *res, unsigned *flags) +{ + struct fib_rule *r = res->r; + + if (r->r_action == RTN_NAT) { + int addrtype = inet_addr_type(r->r_srcmap); + + if (addrtype == RTN_NAT) { + /* Packet is from translated source; remember it */ + saddr = (saddr&~r->r_srcmask)|r->r_srcmap; + *flags |= RTCF_SNAT; + } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { + /* Packet is from masqueraded source; remember it */ + saddr = r->r_srcmap; + *flags |= RTCF_MASQ; + } + } + return saddr; +} + +static void fib_rules_detach(struct device *dev) +{ + struct fib_rule *r; + + for (r=fib_rules; r; r=r->r_next) { + if (r->r_ifindex == dev->ifindex) + r->r_ifindex = -1; + } +} + +static void fib_rules_attach(struct device *dev) +{ + struct fib_rule *r; + + for (r=fib_rules; r; r=r->r_next) { + if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) + r->r_ifindex = dev->ifindex; + } +} + +int fib_lookup(const struct rt_key *key, struct fib_result *res) +{ + int err; + struct fib_rule *r, *policy; + struct fib_table *tb; + + u32 daddr = key->dst; + u32 saddr = key->src; + +FRprintk("Lookup: %08x <- %08x ", key->dst, key->src); + for (r = fib_rules; r; r=r->r_next) { + if (((saddr^r->r_src) & r->r_srcmask) || + ((daddr^r->r_dst) & r->r_dstmask) || +#ifdef CONFIG_IP_TOS_ROUTING + (r->r_tos && r->r_tos != key->tos) || +#endif + (r->r_ifindex && r->r_ifindex != key->iif)) + continue; + +FRprintk("tb %d r %d ", r->r_table, r->r_action); + switch (r->r_action) { + case RTN_UNICAST: + policy = NULL; + break; + case RTN_NAT: + policy = r; + break; + case RTN_UNREACHABLE: + return -ENETUNREACH; + default: + case RTN_BLACKHOLE: + return -EINVAL; + case RTN_PROHIBIT: + return -EACCES; + } + + if ((tb = fib_get_table(r->r_table)) == NULL) + continue; + err = tb->tb_lookup(tb, key, res); + if (err == 0) { +FRprintk("ok\n"); + res->r = policy; + return 0; + } + if (err < 0) + return err; +FRprintk("RCONT "); + } +FRprintk("FAILURE\n"); + return -ENETUNREACH; +} + +static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct device *dev = ptr; + + if (event == NETDEV_UNREGISTER) + fib_rules_detach(dev); + else if (event == NETDEV_REGISTER) + fib_rules_attach(dev); + return NOTIFY_DONE; +} + + +struct notifier_block fib_rules_notifier = { + fib_rules_event, + NULL, + 0 +}; + +#ifdef CONFIG_RTNETLINK + +extern __inline__ int inet_fill_rule(struct sk_buff *skb, + struct fib_rule *r, + struct netlink_callback *cb) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); + rtm = NLMSG_DATA(nlh); + rtm->rtm_family = AF_INET; + rtm->rtm_dst_len = r->r_dst_len; + rtm->rtm_src_len = r->r_src_len; + rtm->rtm_tos = r->r_tos; + rtm->rtm_table = r->r_table; + rtm->rtm_protocol = 0; + rtm->rtm_scope = 0; + rtm->rtm_nhs = 0; + rtm->rtm_type = r->r_action; + rtm->rtm_optlen = 0; + rtm->rtm_flags = r->r_flags; + + if (r->r_dst_len) + RTA_PUT(skb, RTA_DST, 4, &r->r_dst); + if (r->r_src_len) + RTA_PUT(skb, RTA_SRC, 4, &r->r_src); + if (r->r_ifname[0]) + RTA_PUT(skb, RTA_IFNAME, IFNAMSIZ, &r->r_ifname); + if (r->r_preference) + RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); + if (r->r_srcmap) + RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_put(skb, b - skb->tail); + return -1; +} + +int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx; + int s_idx = cb->args[0]; + struct fib_rule *r; + + for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { + if (idx < s_idx) + continue; + if (inet_fill_rule(skb, r, cb) < 0) + break; + } + cb->args[0] = idx; + + return skb->len; +} + +#endif /* CONFIG_RTNETLINK */ + +__initfunc(void fib_rules_init(void)) +{ + register_netdevice_notifier(&fib_rules_notifier); +} diff -u --recursive --new-file v2.1.67/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c --- v2.1.67/linux/net/ipv4/fib_semantics.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/fib_semantics.c Sun Nov 30 14:00:39 1997 @@ -0,0 +1,908 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 Forwarding Information Base: semantics. + * + * Version: $Id: fib_semantics.c,v 1.5 1997/10/10 22:40:50 davem Exp $ + * + * Authors: Alexey Kuznetsov, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define FSprintk(a...) + +static struct fib_info *fib_info_list; + +#define for_fib_info() { struct fib_info *fi; \ + for (fi = fib_info_list; fi; fi = fi->fib_next) + +#define endfor_fib_info() } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ +for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) + +#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ +for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) + +#else /* CONFIG_IP_ROUTE_MULTIPATH */ + +/* Hope, that gcc will optimize it to get rid of dummy loop */ + +#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ +for (nhsel=0; nhsel < 1; nhsel++) + +#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ +for (nhsel=0; nhsel < 1; nhsel++) + +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ + +#define endfor_nexthops(fi) } + + +static struct +{ + int error; + u8 scope; +} fib_props[RTA_MAX+1] = { + { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */ + { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */ + { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */ + { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */ + { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */ + { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */ + { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */ + { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */ + { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */ + { 1, RT_SCOPE_UNIVERSE}, /* RTN_THROW */ +#ifdef CONFIG_IP_ROUTE_NAT + { 0, RT_SCOPE_HOST}, /* RTN_NAT */ +#else + { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */ +#endif + { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */ +}; + +/* Release a nexthop info record */ + +void fib_release_info(struct fib_info *fi) +{ + if (fi && !--fi->fib_refcnt) { + if (fi->fib_next) + fi->fib_next->fib_prev = fi->fib_prev; + if (fi->fib_prev) + fi->fib_prev->fib_next = fi->fib_next; + if (fi == fib_info_list) + fib_info_list = fi->fib_next; + kfree(fi); + } +} + +extern __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) +{ + const struct fib_nh *onh = ofi->fib_nh; + + for_nexthops(fi) { + if (nh->nh_oif != onh->nh_oif || + nh->nh_gw != onh->nh_gw || +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->nh_weight != onh->nh_weight || +#endif + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) + return -1; + onh++; + } endfor_nexthops(fi); + return 0; +} + +extern __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi) +{ + for_fib_info() { + if (fi->fib_nhs != nfi->fib_nhs) + continue; + if (nfi->fib_protocol == fi->fib_protocol && + nfi->fib_prefsrc == fi->fib_prefsrc && + nfi->fib_mtu == fi->fib_mtu && + nfi->fib_rtt == fi->fib_rtt && + nfi->fib_window == fi->fib_window && + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && + (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) + return fi; + } endfor_fib_info(); + return NULL; +} + +/* Check, that the gateway is already configured. + Used only by redirect accept routine. + */ + +int ip_fib_check_default(u32 gw, struct device *dev) +{ + for_fib_info() { + if (fi->fib_flags & RTNH_F_DEAD) + continue; + for_nexthops(fi) { + if (nh->nh_dev == dev && nh->nh_gw == gw && + !(nh->nh_flags&RTNH_F_DEAD)) + return 0; + } endfor_nexthops(fi); + } endfor_fib_info(); + return -1; +} + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) +{ + while (RTA_OK(attr,attrlen)) { + if (attr->rta_type == type) + return *(u32*)RTA_DATA(attr); + attr = RTA_NEXT(attr, attrlen); + } + return 0; +} + +static int +fib_get_nhs(struct fib_info *fi, const struct nlmsghdr *nlh, const struct rtmsg *r) +{ + struct rtnexthop *nhp = RTM_RTNH(r); + int nhlen = RTM_NHLEN(nlh, r); + +printk("get nhs %d/%d\n", r->rtm_nhs, nhlen); + change_nexthops(fi) { + int attrlen = nhlen - sizeof(struct rtnexthop); + if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + return -EINVAL; + nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; + nh->nh_oif = nhp->rtnh_ifindex; + nh->nh_weight = nhp->rtnh_hops + 1; + if (attrlen) + nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); +printk("Got nh: via %08x dev %d w %d fl %02x\n", nh->nh_gw, nh->nh_oif, + nh->nh_weight, nh->nh_flags); + nhp = RTNH_NEXT(nhp); + } endfor_nexthops(fi); + return 0; +} + +#endif + +int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, + struct fib_info *fi) +{ +#ifdef CONFIG_IP_ROUTE_MULTIPATH + struct rtnexthop *nhp; + int nhlen; +#endif + + if (rta->rta_oif || rta->rta_gw) { + if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && + (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) + return 0; + return 1; + } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (r->rtm_nhs == 0) + return 0; + + nhp = RTM_RTNH(r); + nhlen = RTM_NHLEN(nlh, r); + + for_nexthops(fi) { + int attrlen = nhlen - sizeof(struct rtnexthop); + u32 gw; + + if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + return -EINVAL; + if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) + return 1; + if (attrlen) { + gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + if (gw && gw != nh->nh_gw) + return 1; + } + nhp = RTNH_NEXT(nhp); + } endfor_nexthops(fi); +#endif + return 0; +} + + +/* + Picture + ------- + + Semantics of nexthop is very messy by historical reasons. + We have to take into account, that: + a) gateway can be actually local interface address, + so that gatewayed route is direct. + b) gateway must be on-link address, possibly + described not by an ifaddr, but also by a direct route. + c) If both gateway and interface are specified, they should not + contradict. + d) If we use tunnel routes, gateway could be not on-link. + + Attempt to reconcile all of these (alas, self-contradictory) conditions + results in pretty ugly and hairy code with obscure logic. + + I choosed to generalized it instead, so that the size + of code does not increase practically, but it becomes + much more general. + Every prefix is assigned a "scope" value: "host" is local address, + "link" is direct route, + [ ... "site" ... "interior" ... ] + and "universe" is true gateway route with global meaning. + + Every prefix refers to a set of "nexthop"s (gw, oif), + where gw must have narrower scope. This recursion stops + when gw has LOCAL scope or if "nexthop" is declared ONLINK, + which means that gw is forced to be on link. + + Code is still hairy, but now it is apparently logically + consistent and very flexible. F.e. as by-product it allows + to co-exists in peace independent exterior and interior + routing processes. + + Normally it looks as following. + + {universe prefix} -> (gw, oif) [scope link] + | + |-> {link prefix} -> (gw, oif) [scope local] + | + |-> {local prefix} (terminal node) + */ + +static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) +{ + int err; + + if (nh->nh_gw) { + struct rt_key key; + struct fib_result res; + +#ifdef CONFIG_IP_ROUTE_PERVASIVE + if (nh->nh_flags&RTNH_F_PERVASIVE) + return 0; +#endif + if (nh->nh_flags&RTNH_F_ONLINK) { + struct device *dev; + + if (r->rtm_scope >= RT_SCOPE_LINK) + return -EINVAL; + if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) + return -EINVAL; + if ((dev = dev_get_by_index(nh->nh_oif)) == NULL) + return -ENODEV; + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + nh->nh_dev = dev; + nh->nh_scope = RT_SCOPE_LINK; + return 0; + } + memset(&key, 0, sizeof(key)); + key.dst = nh->nh_gw; + key.oif = nh->nh_oif; + key.scope = r->rtm_scope + 1; + + /* It is not necessary, but requires a bit of thinking */ + if (key.scope < RT_SCOPE_LINK) + key.scope = RT_SCOPE_LINK; + + if ((err = fib_lookup(&key, &res)) != 0) + return err; + nh->nh_scope = res.scope; + nh->nh_oif = FIB_RES_OIF(res); + nh->nh_dev = FIB_RES_DEV(res); + } else { + struct in_device *in_dev; + + if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) + return -EINVAL; + + in_dev = inetdev_by_index(nh->nh_oif); + if (in_dev == NULL) + return -ENODEV; + if (!(in_dev->dev->flags&IFF_UP)) + return -ENETDOWN; + nh->nh_dev = in_dev->dev; + nh->nh_scope = RT_SCOPE_HOST; + } + return 0; +} + +struct fib_info * +fib_create_info(const struct rtmsg *r, struct kern_rta *rta, + const struct nlmsghdr *nlh, int *errp) +{ + int err; + struct fib_info *fi = NULL; + struct fib_info *ofi; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int nhs = r->rtm_nhs ? : 1; +#else + const int nhs = 1; +#endif + + /* Fast check to catch the most weird cases */ + if (fib_props[r->rtm_type].scope > r->rtm_scope) { + printk("Einval 1\n"); + goto err_inval; + } + + fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); + err = -ENOBUFS; + if (fi == NULL) + goto failure; + memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh)); + + fi->fib_protocol = r->rtm_protocol; + fi->fib_nhs = nhs; + fi->fib_flags = r->rtm_flags; + if (rta->rta_mtu) + fi->fib_mtu = *rta->rta_mtu; + if (rta->rta_rtt) + fi->fib_rtt = *rta->rta_rtt; + if (rta->rta_window) + fi->fib_window = *rta->rta_window; + if (rta->rta_prefsrc) + memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); + + if (r->rtm_nhs) { +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if ((err = fib_get_nhs(fi, nlh, r)) != 0) + goto failure; + if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + goto err_inval; + if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) + goto err_inval; +#else + goto err_inval; +#endif + } else { + struct fib_nh *nh = fi->fib_nh; + if (rta->rta_oif) + nh->nh_oif = *rta->rta_oif; + if (rta->rta_gw) + memcpy(&nh->nh_gw, rta->rta_gw, 4); + nh->nh_flags = r->rtm_flags; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->nh_weight = 1; +#endif + } + +#ifdef CONFIG_IP_ROUTE_NAT + if (r->rtm_type == RTN_NAT) { + if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) + goto err_inval; + memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4); + goto link_it; + } +#endif + + if (fib_props[r->rtm_type].error) { + if (rta->rta_gw || rta->rta_oif || r->rtm_nhs) + goto err_inval; + goto link_it; + } + + if (r->rtm_scope > RT_SCOPE_HOST) + goto err_inval; + + if (r->rtm_scope == RT_SCOPE_HOST) { + struct fib_nh *nh = fi->fib_nh; + + /* Local address is added. */ + if (nhs != 1 || nh->nh_gw) + goto err_inval; + nh->nh_scope = RT_SCOPE_NOWHERE; + nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + err = -ENODEV; + if (nh->nh_dev == NULL) + goto failure; + } else { + change_nexthops(fi) { + if ((err = fib_check_nh(r, fi, nh)) != 0) { + if (err == -EINVAL) + printk("Einval 2\n"); + goto failure; + } + } endfor_nexthops(fi) + } + + if (fi->fib_prefsrc) { + if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || + memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) + if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) { + printk("Einval 3\n"); + goto err_inval; + } + } + +link_it: + if ((ofi = fib_find_info(fi)) != NULL) { + kfree(fi); + ofi->fib_refcnt++; + return ofi; + } + + fi->fib_refcnt++; + fi->fib_next = fib_info_list; + fi->fib_prev = NULL; + if (fib_info_list) + fib_info_list->fib_prev = fi; + fib_info_list = fi; + return fi; + +err_inval: + err = -EINVAL; + +failure: + *errp = err; + if (fi) + kfree(fi); + return NULL; +} + +int +fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res) +{ + int err = fib_props[type].error; + + if (err == 0) { + if (fi->fib_flags&RTNH_F_DEAD) + return 1; + + res->fi = fi; + + switch (type) { +#ifdef CONFIG_IP_ROUTE_NAT + case RTN_NAT: + FIB_RES_RESET(*res); + return 0; +#endif + case RTN_UNICAST: + case RTN_LOCAL: + case RTN_BROADCAST: + case RTN_ANYCAST: + case RTN_MULTICAST: + for_nexthops(fi) { + if (nh->nh_flags&RTNH_F_DEAD) + continue; + if (!key->oif || key->oif == nh->nh_oif) + break; + } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (nhsel < fi->fib_nhs) { + res->nh_sel = nhsel; + return 0; + } +#else + if (nhsel < 1) + return 0; +#endif + endfor_nexthops(fi); + return 1; + default: + printk(KERN_DEBUG "impossible 102\n"); + return -EINVAL; + } + } + return err; +} + +/* Find appropriate source address to this destination */ + +u32 __fib_res_prefsrc(struct fib_result *res) +{ + return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); +} + +#ifdef CONFIG_RTNETLINK + +int +fib_dump_info(struct sk_buff *skb, pid_t pid, u32 seq, int event, + u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, + struct fib_info *fi) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + unsigned char *o; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); + rtm = NLMSG_DATA(nlh); + rtm->rtm_family = AF_INET; + rtm->rtm_dst_len = dst_len; + rtm->rtm_src_len = 0; + rtm->rtm_tos = tos; + rtm->rtm_table = tb_id; + rtm->rtm_type = type; + rtm->rtm_flags = fi->fib_flags; + rtm->rtm_scope = scope; + rtm->rtm_nhs = 0; + + o = skb->tail; + if (rtm->rtm_dst_len) + RTA_PUT(skb, RTA_DST, 4, dst); + rtm->rtm_protocol = fi->fib_protocol; + if (fi->fib_mtu) + RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &fi->fib_mtu); + if (fi->fib_window) + RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &fi->fib_window); + if (fi->fib_rtt) + RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &fi->fib_rtt); + if (fi->fib_prefsrc) + RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); + if (fi->fib_nhs == 1) { + if (fi->fib_nh->nh_gw) + RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); + if (fi->fib_nh->nh_oif) + RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); + } + rtm->rtm_optlen = skb->tail - o; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (fi->fib_nhs > 1) { + struct rtnexthop *nhp; + for_nexthops(fi) { + if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) + goto rtattr_failure; + nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp->rtnh_flags = nh->nh_flags & 0xFF; + nhp->rtnh_hops = nh->nh_weight-1; + nhp->rtnh_ifindex = nh->nh_oif; + if (nh->nh_gw) + RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); + nhp->rtnh_len = skb->tail - (unsigned char*)nhp; + rtm->rtm_nhs++; + } endfor_nexthops(fi); + } +#endif + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_put(skb, b - skb->tail); + return -1; +} + +#endif /* CONFIG_RTNETLINK */ + +#ifndef CONFIG_IP_NOSIOCRT + +int +fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, + struct kern_rta *rta, struct rtentry *r) +{ + int plen; + u32 *ptr; + + memset(rtm, 0, sizeof(*rtm)); + memset(rta, 0, sizeof(*rta)); + + if (r->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* Check mask for validity: + a) it must be contiguous. + b) destination must have all host bits clear. + c) if application forgot to set correct family (AF_INET), + reject request unless it is absolutely clear i.e. + both family and mask are zero. + */ + plen = 32; + ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; + if (!(r->rt_flags&RTF_HOST)) { + u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; + if (r->rt_genmask.sa_family != AF_INET) { + if (mask || r->rt_genmask.sa_family) + return -EAFNOSUPPORT; + } + if (bad_mask(mask, *ptr)) + return -EINVAL; + plen = inet_mask_len(mask); + } + + nl->nlmsg_flags = NLM_F_REQUEST; + nl->nlmsg_pid = 0; + nl->nlmsg_seq = 0; + nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); + if (cmd == SIOCDELRT) { + nl->nlmsg_type = RTM_DELROUTE; + nl->nlmsg_flags = 0; + } else { + nl->nlmsg_type = RTM_NEWROUTE; + nl->nlmsg_flags = NLM_F_CREATE; + rtm->rtm_protocol = RTPROT_BOOT; + if (plen != 0) + nl->nlmsg_flags |= NLM_F_REPLACE; + } + + rtm->rtm_dst_len = plen; + rta->rta_dst = ptr; + + if (r->rt_flags&RTF_REJECT) { + rtm->rtm_scope = RT_SCOPE_HOST; + rtm->rtm_type = RTN_UNREACHABLE; + return 0; + } + rtm->rtm_scope = RT_SCOPE_LINK; + rtm->rtm_type = RTN_UNICAST; + + if (r->rt_dev) { +#ifdef CONFIG_IP_ALIAS + char *colon; +#endif + struct device *dev; + char devname[IFNAMSIZ]; + + if (copy_from_user(devname, r->rt_dev, 15)) + return -EFAULT; + devname[IFNAMSIZ-1] = 0; +#ifdef CONFIG_IP_ALIAS + colon = strchr(devname, ':'); + if (colon) + *colon = 0; +#endif + dev = dev_get(devname); + if (!dev) + return -ENODEV; + rta->rta_oif = &dev->ifindex; +#ifdef CONFIG_IP_ALIAS + if (colon) { + struct in_ifaddr *ifa; + struct in_device *in_dev = dev->ip_ptr; + if (!in_dev) + return -ENODEV; + *colon = ':'; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + if (strcmp(ifa->ifa_label, devname) == 0) + break; + if (ifa == NULL) + return -ENODEV; + rta->rta_prefsrc = &ifa->ifa_local; + } +#endif + } + + ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; + if (r->rt_gateway.sa_family == AF_INET && *ptr) { + rta->rta_gw = ptr; + if (r->rt_flags&RTF_GATEWAY) + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + } + + if (cmd == SIOCDELRT) + return 0; + + if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) + return -EINVAL; + + /* Ugly conversion from rtentry types to unsigned */ + + if (r->rt_flags&RTF_IRTT) { + rta->rta_rtt = (unsigned*)&r->rt_pad3; + *rta->rta_rtt = r->rt_irtt; + } + if (r->rt_flags&RTF_WINDOW) { + rta->rta_window = (unsigned*)&r->rt_window; + if (sizeof(*rta->rta_window) != sizeof(r->rt_window)) + *rta->rta_window = r->rt_window; + } + if (r->rt_flags&RTF_MTU) { + rta->rta_mtu = (unsigned*)&r->rt_mtu; + if (sizeof(*rta->rta_mtu) != sizeof(r->rt_mtu)) + *rta->rta_mtu = r->rt_mtu; + } + return 0; +} + +#endif + +/* + Update FIB if: + - local address disappeared -> we must delete all the entries + referring to it. + - device went down -> we must shutdown all nexthops going via it. + */ + +int fib_sync_down(u32 local, struct device *dev) +{ + int ret = 0; + + for_fib_info() { + if (local && fi->fib_prefsrc == local) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; + } else if (dev && fi->fib_nhs) { + int dead = 0; + + change_nexthops(fi) { + if (nh->nh_flags&RTNH_F_DEAD) + dead++; + else if (nh->nh_dev == dev && + nh->nh_scope != RT_SCOPE_NOWHERE) { + nh->nh_flags |= RTNH_F_DEAD; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + fi->fib_power -= nh->nh_power; + nh->nh_power = 0; +#endif + dead++; + } + } endfor_nexthops(fi) + if (dead == fi->fib_nhs) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; + } + } + } endfor_fib_info(); + return ret; +} + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* + Dead device goes up. We wake up dead nexthops. + It takes sense only on multipath routes. + */ + +int fib_sync_up(struct device *dev) +{ + int ret = 0; + + if (!(dev->flags&IFF_UP)) + return 0; + + for_fib_info() { + int alive = 0; + + change_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD)) { + alive++; + continue; + } + if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) + continue; + if (nh->nh_dev != dev || dev->ip_ptr == NULL) + continue; + alive++; + nh->nh_power = 0; + nh->nh_flags &= ~RTNH_F_DEAD; + } endfor_nexthops(fi) + + if (alive == fi->fib_nhs) { + fi->fib_flags &= ~RTNH_F_DEAD; + ret++; + } + } endfor_fib_info(); + return ret; +} + +/* + The algorithm is suboptimal, but it provides really + fair weighted route distribution. + */ + +void fib_select_multipath(const struct rt_key *key, struct fib_result *res) +{ + struct fib_info *fi = res->fi; + int w; + + if (fi->fib_power <= 0) { + int power = 0; + change_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD)) { + power += nh->nh_weight; + nh->nh_power = nh->nh_weight; + } + } endfor_nexthops(fi); + fi->fib_power = power; +#if 1 + if (power <= 0) { + printk(KERN_CRIT "impossible 777\n"); + return; + } +#endif + } + + + /* w should be random number [0..fi->fib_power-1], + it is pretty bad approximation. + */ + + w = jiffies % fi->fib_power; + + change_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { + if ((w -= nh->nh_power) <= 0) { + nh->nh_power--; + fi->fib_power--; + res->nh_sel = nhsel; + return; + } + } + } endfor_nexthops(fi); + +#if 1 + printk(KERN_CRIT "impossible 888\n"); +#endif + return; +} +#endif + + +#ifdef CONFIG_PROC_FS + +static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi) +{ + static unsigned type2flags[RTN_MAX+1] = { + 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0 + }; + unsigned flags = type2flags[type]; + + if (fi && fi->fib_nh->nh_gw) + flags |= RTF_GATEWAY; + if (mask == 0xFFFFFFFF) + flags |= RTF_HOST; + if (!dead) + flags |= RTF_UP; + return flags; +} + +void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer) +{ + int len; + unsigned flags = fib_flag_trans(type, dead, mask, fi); + + if (fi) { + len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", prefix, + fi->fib_nh->nh_gw, flags, 0, 0, 0, + mask, fi->fib_mtu, fi->fib_window, fi->fib_rtt); + } else { + len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", + prefix, 0, + flags, 0, 0, 0, + mask, 0, 0, 0); + } + memset(buffer+len, 0, 127-len); + buffer[127] = '\n'; +} + +#endif diff -u --recursive --new-file v2.1.67/linux/net/ipv4/icmp.c linux/net/ipv4/icmp.c --- v2.1.67/linux/net/ipv4/icmp.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/icmp.c Sun Nov 30 14:00:39 1997 @@ -3,6 +3,8 @@ * * Alan Cox, * + * Version: $Id: icmp.c,v 1.35 1997/10/19 18:17:13 freitag Exp $ + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -41,6 +43,10 @@ * Andi Kleen : Check all packet lengths properly * and moved all kfree_skb() up to * icmp_rcv. + * Andi Kleen : Move the rate limit bookkeeping + * into the dest entry and use a tocken + * bucket filter (thanks to ANK). Make + * the rates sysctl configurable. * * RFC1122 (Host Requirements -- Comm. Layer) Status: * (boy, are there a lot of rules for ICMP) @@ -77,7 +83,7 @@ * [Solaris 2.X seems to assert EPROTO when this occurs] -- AC * 3.2.2.6 (Echo Request/Reply) * MUST reply to ECHO_REQUEST, and give app to do ECHO stuff (OK, OK) - * MAY discard broadcast ECHO_REQUESTs. (We don't, but that's OK.) + * MAY discard broadcast ECHO_REQUESTs. (Configurable with a sysctl.) * MUST reply using same source address as the request was sent to. * We're OK for unicast ECHOs, and it doesn't say anything about * how to handle broadcast ones, since it's optional. @@ -293,39 +299,9 @@ { EHOSTUNREACH, 1 } /* ICMP_PREC_CUTOFF */ }; -/* - * A spare long used to speed up statistics updating - */ - -unsigned long dummy; - -/* - * ICMP transmit rate limit control structures. We use a relatively simple - * approach to the problem: For each type of ICMP message with rate limit - * we count the number of messages sent during some time quantum. If this - * count exceeds given maximal value, we ignore all messages not separated - * from the last message sent at least by specified time. - */ - -#define XRLIM_CACHE_SIZE 16 /* How many destination hosts do we cache */ - -struct icmp_xrl_cache /* One entry of the ICMP rate cache */ -{ - __u32 daddr; /* Destination address */ - unsigned long counter; /* Message counter */ - unsigned long next_reset; /* Time of next reset of the counter */ - unsigned long last_access; /* Time of last access to this entry (LRU) */ - unsigned int restricted; /* Set if we're in restricted mode */ - unsigned long next_packet; /* When we'll allow a next packet if restricted */ -}; - -struct icmp_xrlim -{ - unsigned long timeout; /* Time quantum for rate measuring */ - unsigned long limit; /* Maximal number of messages per time quantum allowed */ - unsigned long delay; /* How long we wait between packets when restricting */ - struct icmp_xrl_cache cache[XRLIM_CACHE_SIZE]; /* Rate cache */ -}; +/* Control parameters for ECHO relies. */ +int sysctl_icmp_echo_ignore_all = 0; +int sysctl_icmp_echo_ignore_broadcasts = 0; /* * ICMP control array. This specifies what to do with each ICMP. @@ -336,8 +312,8 @@ unsigned long *output; /* Address to increment on output */ unsigned long *input; /* Address to increment on input */ void (*handler)(struct icmphdr *icmph, struct sk_buff *skb, int len); - unsigned long error; /* This ICMP is classed as an error message */ - struct icmp_xrlim *xrlim; /* Transmit rate limit control structure or NULL for no limits */ + short error; /* This ICMP is classed as an error message */ + int *timeout; /* Rate limit */ }; static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; @@ -369,100 +345,47 @@ * Send an ICMP frame. */ - -/* - * Initialize the transmit rate limitation mechanism. - */ - -#ifndef CONFIG_NO_ICMP_LIMIT - -__initfunc(static void xrlim_init(void)) -{ - int type, entry; - struct icmp_xrlim *xr; - - for (type=0; type<=NR_ICMP_TYPES; type++) { - xr = icmp_pointers[type].xrlim; - if (xr) { - for (entry=0; entrycache[entry].daddr = INADDR_NONE; - } - } -} - /* * Check transmit rate limitation for given message. + * The rate information is held in the destination cache now. + * This function is generic and could be used for other purposes + * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. * * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate - * SHOULD allow setting of rate limits (we allow - * in the source) + * SHOULD allow setting of rate limits + * + * Shared between ICMPv4 and ICMPv6. */ - -static int xrlim_allow(int type, __u32 addr) +#define XRLIM_BURST_FACTOR 6 +int xrlim_allow(struct dst_entry *dst, int timeout) { - struct icmp_xrlim *r; - struct icmp_xrl_cache *c; unsigned long now; - if (type > NR_ICMP_TYPES) /* No time limit present */ - return 1; - r = icmp_pointers[type].xrlim; - if (!r) + now = jiffies; + dst->rate_tokens += now - dst->rate_last; + if (dst->rate_tokens > 6*timeout) + dst->rate_tokens = XRLIM_BURST_FACTOR*timeout; + if (dst->rate_tokens >= timeout) { + dst->rate_tokens -= timeout; return 1; + } + return 0; +} - for (c = r->cache; c < &r->cache[XRLIM_CACHE_SIZE]; c++) - /* Cache lookup */ - if (c->daddr == addr) - break; - - now = jiffies; /* Cache current time (saves accesses to volatile variable) */ +static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) +{ + struct dst_entry *dst = &rt->u.dst; - if (c == &r->cache[XRLIM_CACHE_SIZE]) { /* Cache miss */ - unsigned long oldest = now; /* Find the oldest entry to replace */ - struct icmp_xrl_cache *d; - c = r->cache; - for (d = r->cache; d < &r->cache[XRLIM_CACHE_SIZE]; d++) - if (!d->daddr) { /* Unused entry */ - c = d; - break; - } else if (d->last_access < oldest) { - oldest = d->last_access; - c = d; - } - c->last_access = now; /* Fill the entry with new data */ - c->daddr = addr; - c->counter = 1; - c->next_reset = now + r->timeout; - c->restricted = 0; + if (type > NR_ICMP_TYPES || !icmp_pointers[type].timeout) return 1; - } - c->last_access = now; - if (c->next_reset > now) { /* Let's increment the counter */ - c->counter++; - if (c->counter == r->limit) { /* Limit exceeded, start restrictions */ - c->restricted = 1; - c->next_packet = now + r->delay; - return 0; - } - if (c->restricted) { /* Any restrictions pending? */ - if (c->next_packet > now) - return 0; - c->next_packet = now + r->delay; - return 1; - } - } else { /* Reset the counter */ - if (c->counter < r->limit) /* Switch off all restrictions */ - c->restricted = 0; - c->next_reset = now + r->timeout; - c->counter = 0; - } + /* Don't limit PMTU discovery. */ + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + return 1; - return 1; /* Send the packet */ + return xrlim_allow(dst, *(icmp_pointers[type].timeout)); } -#endif /* CONFIG_NO_ICMP_LIMIT */ - /* * Maintain the counters used in the SNMP statistics for outgoing ICMP */ @@ -530,7 +453,7 @@ ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) daddr = icmp_param->replyopts.faddr; - if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), NULL)) + if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) return; ip_build_xmit(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+sizeof(struct icmphdr), @@ -578,7 +501,7 @@ */ if (!rt) return; - if (rt->rt_flags&(RTF_BROADCAST|RTF_MULTICAST)) + if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) return; @@ -610,34 +533,30 @@ } } - /* - * Check the rate limit - */ - -#ifndef CONFIG_NO_ICMP_LIMIT - if (!xrlim_allow(type, iph->saddr)) - return; -#endif /* * Construct source address and options. */ saddr = iph->daddr; - if (!(rt->rt_flags&RTF_LOCAL)) + if (!(rt->rt_flags&RTCF_LOCAL)) saddr = 0; tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), NULL)) + /* XXX: use a more aggressive expire for routes created by + * this call (not longer than the rate limit timeout). + * It could be also worthwhile to not put them into ipv4 + * fast routing cache at first. + */ + if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0)) return; - if (ip_options_echo(&icmp_param.replyopts, skb_in)) { - ip_rt_put(rt); - return; - } + if (ip_options_echo(&icmp_param.replyopts, skb_in)) + goto ende; + /* * Prepare data for ICMP header. @@ -655,10 +574,13 @@ ipc.opt = &icmp_param.replyopts; if (icmp_param.replyopts.srr) { ip_rt_put(rt); - if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), NULL)) + if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0)) return; } + if (!icmpv4_xrlim_allow(rt, type, code)) + goto ende; + /* RFC says return as much as we can without exceeding 576 bytes. */ room = rt->u.dst.pmtu; @@ -674,6 +596,7 @@ icmp_param.data_len+sizeof(struct icmphdr), &ipc, rt, MSG_DONTWAIT); +ende: ip_rt_put(rt); } @@ -753,7 +676,7 @@ * get the other vendor to fix their kit. */ - if(__ip_chk_addr(iph->daddr)==IS_BROADCAST) + if (inet_addr_type(iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) printk("%s sent an invalid ICMP error to a broadcast.\n", @@ -770,12 +693,12 @@ hash = iph->protocol & (MAX_INET_PROTOS - 1); if ((raw_sk = raw_v4_htable[hash]) != NULL) { - raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr); + raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); while (raw_sk) { raw_err(raw_sk, skb); raw_sk = raw_v4_lookup(raw_sk->next, iph->protocol, - iph->saddr, iph->daddr); + iph->saddr, iph->daddr, skb->dev->ifindex); } } @@ -797,7 +720,7 @@ /* appropriate protocol layer (MUST), as per 3.2.2. */ if (iph->protocol == ipprot->protocol && ipprot->err_handler) - ipprot->err_handler(skb, dp); + ipprot->err_handler(skb, dp, len); ipprot = nextip; } @@ -850,18 +773,18 @@ * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring echo requests, MUST have default=NOT. * See also WRT handling of options once they are done and working. */ - + static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, int len) { -#ifndef CONFIG_IP_IGNORE_ECHO_REQUESTS - struct icmp_bxm icmp_param; + if (!sysctl_icmp_echo_ignore_all) { + struct icmp_bxm icmp_param; - icmp_param.icmph=*icmph; - icmp_param.icmph.type=ICMP_ECHOREPLY; - icmp_param.data_ptr=(icmph+1); - icmp_param.data_len=len; - icmp_reply(&icmp_param, skb); -#endif + icmp_param.icmph=*icmph; + icmp_param.icmph.type=ICMP_ECHOREPLY; + icmp_param.data_ptr=(icmph+1); + icmp_param.data_len=len; + icmp_reply(&icmp_param, skb); + } } /* @@ -928,32 +851,16 @@ * Gratuitous mask announcements suffer from the same problem. * RFC1812 explains it, but still allows to use ADDRMASK, * that is pretty silly. --ANK + * + * All these rules are so bizarre, that I removed kernel addrmask + * support at all. It is wrong, it is obsolete, nobody uses it in + * any case. --ANK */ - + static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len) { - struct icmp_bxm icmp_param; - struct rtable *rt = (struct rtable*)skb->dst; - struct device *dev = skb->dev; - - if (!ipv4_config.addrmask_agent || - len < 4 || - ZERONET(rt->rt_src) || - rt->rt_src_dev != rt->u.dst.dev || - !(rt->rt_flags&RTCF_DIRECTSRC) || - (rt->rt_flags&RTF_GATEWAY) || - !(dev->ip_flags&IFF_IP_ADDR_OK) || - !(dev->ip_flags&IFF_IP_MASK_OK)) { - icmp_statistics.IcmpInErrors++; - return; - } - - icmp_param.icmph.type=ICMP_ADDRESSREPLY; - icmp_param.icmph.code=0; - icmp_param.icmph.un.echo = icmph->un.echo; - icmp_param.data_ptr=&dev->pa_mask; - icmp_param.data_len=4; - icmp_reply(&icmp_param, skb); + if (net_ratelimit()) + printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n"); } /* @@ -965,27 +872,29 @@ { struct rtable *rt = (struct rtable*)skb->dst; struct device *dev = skb->dev; + struct in_device *in_dev = dev->ip_ptr; + struct in_ifaddr *ifa; u32 mask; if (!ipv4_config.log_martians || + !IS_ROUTER || + !in_dev || !in_dev->ifa_list || len < 4 || - !(rt->rt_flags&RTCF_DIRECTSRC) || - (rt->rt_flags&RTF_GATEWAY) || - !(dev->ip_flags&IFF_IP_ADDR_OK) || - !(dev->ip_flags&IFF_IP_MASK_OK)) { - icmp_statistics.IcmpInErrors++; + !(rt->rt_flags&RTCF_DIRECTSRC)) return; - } mask = *(u32*)&icmph[1]; - if (mask != dev->pa_mask && net_ratelimit()) + for (ifa=in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + if (mask == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) + return; + } + if (net_ratelimit()) printk(KERN_INFO "Wrong address mask %08lX from %08lX/%s\n", ntohl(mask), ntohl(rt->rt_src), dev->name); } static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len) { - return; } #ifdef CONFIG_IP_TRANSPARENT_PROXY @@ -1000,8 +909,8 @@ */ /* This should work with the new hashes now. -DaveM */ -extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport); -extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport); +extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); +extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); int icmp_chkaddr(struct sk_buff *skb) { @@ -1017,7 +926,7 @@ { struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); - sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source); + sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex); if (!sk) return 0; if (sk->saddr != iph->saddr) return 0; if (sk->daddr != iph->daddr) return 0; @@ -1031,9 +940,9 @@ { struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); - sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source); + sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); if (!sk) return 0; - if (sk->saddr != iph->saddr && __ip_chk_addr(iph->saddr) != IS_MYADDR) + if (sk->saddr != iph->saddr && inet_addr_type(iph->saddr) != RTN_LOCAL) return 0; /* * This packet may have come from us. @@ -1067,46 +976,59 @@ if(len < sizeof(struct icmphdr) || ip_compute_csum((unsigned char *) icmph, len) || icmph->type > NR_ICMP_TYPES) - { - icmp_statistics.IcmpInErrors++; - kfree_skb(skb, FREE_READ); - return 0; - } + goto error; /* * Parse the ICMP message */ - if (rt->rt_flags&(RTF_BROADCAST|RTF_MULTICAST)) { + if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) { /* - * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be silently ignored (we don't as it is used - * by some network mapping tools). - * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently discarded if to broadcast/multicast. + * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be + * silently ignored (we let user decide with a sysctl). + * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently + * discarded if to broadcast/multicast. */ + if (icmph->type == ICMP_ECHO && + sysctl_icmp_echo_ignore_broadcasts) { + goto error; + } if (icmph->type != ICMP_ECHO && icmph->type != ICMP_TIMESTAMP && icmph->type != ICMP_ADDRESS && icmph->type != ICMP_ADDRESSREPLY) { - icmp_statistics.IcmpInErrors++; - kfree_skb(skb, FREE_READ); - return(0); + goto error; } } len -= sizeof(struct icmphdr); (*icmp_pointers[icmph->type].input)++; (icmp_pointers[icmph->type].handler)(icmph, skb, len); + +drop: kfree_skb(skb, FREE_READ); return 0; +error: + icmp_statistics.IcmpInErrors++; + goto drop; } /* - * This table defined limits of ICMP sending rate for various ICMP messages. + * A spare long used to speed up statistics updating */ + +static unsigned long dummy; -static struct icmp_xrlim - xrl_unreach = { 4*HZ, 80, HZ/4 }, /* Host Unreachable */ - xrl_generic = { 3*HZ, 30, HZ/4 }; /* All other errors */ +/* + * Configurable rate limits. + * Send at most one packets per time. + * Someone should check if these default values are correct. + */ +int sysctl_icmp_sourcequench_time = 1*HZ; +int sysctl_icmp_destunreach_time = 1*HZ; +int sysctl_icmp_timeexceed_time = 1*HZ; +int sysctl_icmp_paramprob_time = 1*HZ; +int sysctl_icmp_echoreply_time = 0; /* don't limit it per default. */ /* * This table is the definition of how we handle ICMP. @@ -1114,38 +1036,38 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = { /* ECHO REPLY (0) */ - { &icmp_statistics.IcmpOutEchoReps, &icmp_statistics.IcmpInEchoReps, icmp_discard, 0, NULL }, - { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL }, - { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL }, + { &icmp_statistics.IcmpOutEchoReps, &icmp_statistics.IcmpInEchoReps, icmp_discard, 0, &sysctl_icmp_echoreply_time}, + { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, + { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, /* DEST UNREACH (3) */ - { &icmp_statistics.IcmpOutDestUnreachs, &icmp_statistics.IcmpInDestUnreachs, icmp_unreach, 1, &xrl_unreach }, + { &icmp_statistics.IcmpOutDestUnreachs, &icmp_statistics.IcmpInDestUnreachs, icmp_unreach, 1, &sysctl_icmp_destunreach_time }, /* SOURCE QUENCH (4) */ - { &icmp_statistics.IcmpOutSrcQuenchs, &icmp_statistics.IcmpInSrcQuenchs, icmp_unreach, 1, NULL }, + { &icmp_statistics.IcmpOutSrcQuenchs, &icmp_statistics.IcmpInSrcQuenchs, icmp_unreach, 1, &sysctl_icmp_sourcequench_time }, /* REDIRECT (5) */ - { &icmp_statistics.IcmpOutRedirects, &icmp_statistics.IcmpInRedirects, icmp_redirect, 1, NULL }, - { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL }, - { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL }, + { &icmp_statistics.IcmpOutRedirects, &icmp_statistics.IcmpInRedirects, icmp_redirect, 1, }, + { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, + { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, /* ECHO (8) */ - { &icmp_statistics.IcmpOutEchos, &icmp_statistics.IcmpInEchos, icmp_echo, 0, NULL }, - { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL }, - { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, NULL }, + { &icmp_statistics.IcmpOutEchos, &icmp_statistics.IcmpInEchos, icmp_echo, 0, }, + { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, + { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, /* TIME EXCEEDED (11) */ - { &icmp_statistics.IcmpOutTimeExcds, &icmp_statistics.IcmpInTimeExcds, icmp_unreach, 1, &xrl_generic }, + { &icmp_statistics.IcmpOutTimeExcds, &icmp_statistics.IcmpInTimeExcds, icmp_unreach, 1, &sysctl_icmp_timeexceed_time }, /* PARAMETER PROBLEM (12) */ /* FIXME: RFC1122 3.2.2.5 - MUST pass PARAM_PROB messages to transport layer */ - { &icmp_statistics.IcmpOutParmProbs, &icmp_statistics.IcmpInParmProbs, icmp_discard, 1, &xrl_generic }, + { &icmp_statistics.IcmpOutParmProbs, &icmp_statistics.IcmpInParmProbs, icmp_discard, 1, &sysctl_icmp_paramprob_time }, /* TIMESTAMP (13) */ - { &icmp_statistics.IcmpOutTimestamps, &icmp_statistics.IcmpInTimestamps, icmp_timestamp, 0, NULL }, + { &icmp_statistics.IcmpOutTimestamps, &icmp_statistics.IcmpInTimestamps, icmp_timestamp, 0, }, /* TIMESTAMP REPLY (14) */ - { &icmp_statistics.IcmpOutTimestampReps, &icmp_statistics.IcmpInTimestampReps, icmp_discard, 0, NULL }, + { &icmp_statistics.IcmpOutTimestampReps, &icmp_statistics.IcmpInTimestampReps, icmp_discard, 0, }, /* INFO (15) */ - { &dummy, &dummy, icmp_discard, 0, NULL }, + { &dummy, &dummy, icmp_discard, 0, }, /* INFO REPLY (16) */ - { &dummy, &dummy, icmp_discard, 0, NULL }, + { &dummy, &dummy, icmp_discard, 0, }, /* ADDR MASK (17) */ - { &icmp_statistics.IcmpOutAddrMasks, &icmp_statistics.IcmpInAddrMasks, icmp_address, 0, NULL }, + { &icmp_statistics.IcmpOutAddrMasks, &icmp_statistics.IcmpInAddrMasks, icmp_address, 0, }, /* ADDR MASK REPLY (18) */ - { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, NULL } + { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, } }; __initfunc(void icmp_init(struct net_proto_family *ops)) @@ -1166,8 +1088,4 @@ icmp_socket->sk->allocation=GFP_ATOMIC; icmp_socket->sk->num = 256; /* Don't receive any data */ icmp_socket->sk->ip_ttl = MAXTTL; -#ifndef CONFIG_NO_ICMP_LIMIT - xrlim_init(); -#endif } - diff -u --recursive --new-file v2.1.67/linux/net/ipv4/igmp.c linux/net/ipv4/igmp.c --- v2.1.67/linux/net/ipv4/igmp.c Mon Jun 16 16:36:01 1997 +++ linux/net/ipv4/igmp.c Sun Nov 30 14:00:39 1997 @@ -8,6 +8,8 @@ * the older version didn't come out right using gcc 2.5.8, the newer one * seems to fall out with gcc 2.6.2. * + * Version: $Id: igmp.c,v 1.22 1997/10/29 20:27:24 kuznet Exp $ + * * Authors: * Alan Cox * @@ -65,9 +67,11 @@ * fix from pending 2.1.x patches. * Alan Cox: Forget to enable FDDI support earlier. * Alexey Kuznetsov: Fixed leaving groups on device down. + * Alexey Kuznetsov: Accordance to igmp-v2-06 draft. */ +#include #include #include #include @@ -79,141 +83,52 @@ #include #include #include +#include +#include +#include #include +#include #include #include #include -#include #include -#include #include +#ifdef CONFIG_IP_MROUTE +#include +#endif -int sysctl_igmp_max_host_report_delay = IGMP_MAX_HOST_REPORT_DELAY; -int sysctl_igmp_timer_scale = IGMP_TIMER_SCALE; -int sysctl_igmp_age_threshold = IGMP_AGE_THRESHOLD; -/* - * If time expired, change the router type to IGMP_NEW_ROUTER. - */ +#ifdef CONFIG_IP_MULTICAST -static void ip_router_timer_expire(unsigned long data) -{ - struct ip_router_info *i=(struct ip_router_info *)data; +/* Parameter names and values are taken from igmp-v2-06 draft */ - del_timer(&i->timer); - i->type=IGMP_NEW_ROUTER; /* Revert to new multicast router */ - i->time=0; -} +#define IGMP_V1_Router_Present_Timeout (400*HZ) +#define IGMP_Unsolicited_Report_Interval (10*HZ) +#define IGMP_Query_Response_Interval (10*HZ) +#define IGMP_Unsolicited_Report_Count 2 -/* - * Multicast router info manager - */ -struct ip_router_info *ip_router_info_head=(struct ip_router_info *)0; +#define IGMP_Initial_Report_Delay (1*HZ) -/* - * Get the multicast router info on that device +/* IGMP_Initial_Report_Delay is not from IGMP specs! + * IGMP specs require to report membership immediately after + * joining a group, but we delay the first report by a + * small interval. It seems more natural and still does not + * contradict to specs provided this delay is small enough. */ -static struct ip_router_info *igmp_get_mrouter_info(struct device *dev) -{ - register struct ip_router_info *i; - - for(i=ip_router_info_head;i!=NULL;i=i->next) - { - if (i->dev == dev) - { - return i; - } - } - - /* - * Not found. Create a new entry. The default is IGMP V2 router - */ - - i=(struct ip_router_info *)kmalloc(sizeof(*i), GFP_ATOMIC); - if(i==NULL) - return NULL; - i->dev = dev; - i->type = IGMP_NEW_ROUTER; - i->time = sysctl_igmp_age_threshold; - i->next = ip_router_info_head; - ip_router_info_head = i; - - init_timer(&i->timer); - i->timer.data=(unsigned long)i; - i->timer.function=&ip_router_timer_expire; - - return i; -} - -/* - * Set the multicast router info on that device - */ - -static struct ip_router_info *igmp_set_mrouter_info(struct device *dev,int type,int time) -{ - register struct ip_router_info *i; - - for(i=ip_router_info_head;i!=NULL;i=i->next) - { - if (i->dev == dev) - { - if(i->type==IGMP_OLD_ROUTER) - { - del_timer(&i->timer); - } - - i->type = type; - i->time = time; - - if(i->type==IGMP_OLD_ROUTER) - { - i->timer.expires=jiffies+i->time*HZ; - add_timer(&i->timer); - } - return i; - } - } - - /* - * Not found. Create a new entry. - */ - i=(struct ip_router_info *)kmalloc(sizeof(*i), GFP_ATOMIC); - if(i==NULL) - return NULL; - i->dev = dev; - i->type = type; - i->time = time; - i->next = ip_router_info_head; - ip_router_info_head = i; - - init_timer(&i->timer); - i->timer.data=(unsigned long)i; - i->timer.function=&ip_router_timer_expire; - if(i->type==IGMP_OLD_ROUTER) - { - i->timer.expires=jiffies+i->time*HZ; - add_timer(&i->timer); - } - - return i; -} - +#define IGMP_V1_SEEN(in_dev) ((in_dev)->mr_v1_seen && jiffies - (in_dev)->mr_v1_seen < 0) /* * Timer management */ -static void igmp_stop_timer(struct ip_mc_list *im) +static __inline__ void igmp_stop_timer(struct ip_mc_list *im) { - if (im->tm_running) - { - del_timer(&im->timer); - im->tm_running=0; - } - else - printk(KERN_DEBUG "igmp_stop_timer() called with timer not running by %p\n",__builtin_return_address(0)); + if (im->tm_running) { + del_timer(&im->timer); + im->tm_running=0; + } } extern __inline__ unsigned int random(void) @@ -223,17 +138,13 @@ return seed^jiffies; } -/* - * Inlined as it's only called once. - */ - -static void igmp_start_timer(struct ip_mc_list *im,unsigned char max_resp_time) +static __inline__ void igmp_start_timer(struct ip_mc_list *im, int max_delay) { int tv; - if(im->tm_running) + if (im->tm_running) return; - tv=random()%(max_resp_time*HZ/sysctl_igmp_timer_scale); /* Pick a number any number 8) */ - im->timer.expires=jiffies+tv; + tv=random() % max_delay; + im->timer.expires=jiffies+tv+2; im->tm_running=1; add_timer(&im->timer); } @@ -244,20 +155,32 @@ #define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4) -static void igmp_send_report(struct device *dev, u32 group, int type) +static int igmp_send_report(struct device *dev, u32 group, int type) { struct sk_buff *skb; struct iphdr *iph; struct igmphdr *ih; struct rtable *rt; + u32 dst; - if (ip_route_output(&rt, group, 0, 0, dev)) - return; + /* According to IGMPv2 specs, LEAVE messages are + * sent to all-routers group. + */ + dst = group; + if (type == IGMP_HOST_LEAVE_MESSAGE) + dst = IGMP_ALL_ROUTER; + + if (ip_route_output(&rt, dst, 0, 0, dev->ifindex)) + return -1; + if (rt->rt_src == 0) { + ip_rt_put(rt); + return -1; + } skb=alloc_skb(IGMP_SIZE+dev->hard_header_len+15, GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); - return; + return -1; } skb->dst = &rt->u.dst; @@ -272,7 +195,7 @@ iph->tos = 0; iph->frag_off = 0; iph->ttl = 1; - iph->daddr = group; + iph->daddr = dst; iph->saddr = rt->rt_src; iph->protocol = IPPROTO_IGMP; iph->tot_len = htons(IGMP_SIZE); @@ -290,115 +213,140 @@ ih->group=group; ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr)); - skb->dst->output(skb); + return skb->dst->output(skb); } static void igmp_timer_expire(unsigned long data) { struct ip_mc_list *im=(struct ip_mc_list *)data; - struct ip_router_info *r; + struct in_device *in_dev = im->interface; + int err; im->tm_running=0; - r=igmp_get_mrouter_info(im->interface); - if(r==NULL) - return; - if(r->type==IGMP_NEW_ROUTER) - igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_NEW_MEMBERSHIP_REPORT); + + if (IGMP_V1_SEEN(in_dev)) + err = igmp_send_report(in_dev->dev, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT); else - igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT); - im->reporter = 1; -} + err = igmp_send_report(in_dev->dev, im->multiaddr, IGMP_HOST_NEW_MEMBERSHIP_REPORT); -static void igmp_init_timer(struct ip_mc_list *im) -{ - im->tm_running=0; - init_timer(&im->timer); - im->timer.data=(unsigned long)im; - im->timer.function=&igmp_timer_expire; -} + /* Failed. Retry later. */ + if (err) { + igmp_start_timer(im, IGMP_Unsolicited_Report_Interval); + return; + } + if (im->unsolicit_count) { + im->unsolicit_count--; + igmp_start_timer(im, IGMP_Unsolicited_Report_Interval); + } + im->reporter = 1; +} -static void igmp_heard_report(struct device *dev, u32 group, u32 source) +static void igmp_heard_report(struct in_device *in_dev, u32 group) { struct ip_mc_list *im; /* Timers are only set for non-local groups */ + if (LOCAL_MCAST(group)) return; - for (im=dev->ip_mc_list; im!=NULL; im=im->next) { + for (im=in_dev->mc_list; im!=NULL; im=im->next) { if (im->multiaddr == group) { - if (im->tm_running) - igmp_stop_timer(im); - if (source != dev->pa_addr) - im->reporter = 0; + igmp_stop_timer(im); + im->reporter = 0; + im->unsolicit_count = 0; return; } } } -static void igmp_heard_query(struct device *dev, unsigned char max_resp_time, +static void igmp_heard_query(struct in_device *in_dev, unsigned char max_resp_time, u32 group) { - struct ip_mc_list *im; - int mrouter_type; + struct ip_mc_list *im; + int max_delay; + + max_delay = max_resp_time*(HZ/IGMP_TIMER_SCALE); + if (max_resp_time == 0) { + /* Alas, old v1 router presents here. */ + + max_delay = IGMP_Query_Response_Interval; + in_dev->mr_v1_seen = jiffies + IGMP_V1_Router_Present_Timeout; + group = 0; + } + /* - * The max_resp_time is in units of 1/10 second. + * - Start the timers in all of our membership records + * that the query applies to for the interface on + * which the query arrived excl. those that belong + * to a "local" group (224.0.0.X) + * - For timers already running check if they need to + * be reset. + * - Use the igmp->igmp_code field as the maximum + * delay possible */ - if(max_resp_time>0) { - mrouter_type=IGMP_NEW_ROUTER; - - if (igmp_set_mrouter_info(dev,mrouter_type,0)==NULL) - return; - /* - * - Start the timers in all of our membership records - * that the query applies to for the interface on - * which the query arrived excl. those that belong - * to a "local" group (224.0.0.X) - * - For timers already running check if they need to - * be reset. - * - Use the igmp->igmp_code field as the maximum - * delay possible - */ - for(im=dev->ip_mc_list;im!=NULL;im=im->next) { - if (group && group != im->multiaddr) - continue; - if(im->tm_running) { - if(im->timer.expires>jiffies+max_resp_time*HZ/sysctl_igmp_timer_scale) { - igmp_stop_timer(im); - igmp_start_timer(im,max_resp_time); - } - } else if (!LOCAL_MCAST(im->multiaddr)) - igmp_start_timer(im,max_resp_time); - } - } else { - mrouter_type=IGMP_OLD_ROUTER; - max_resp_time=sysctl_igmp_max_host_report_delay*sysctl_igmp_timer_scale; + for (im=in_dev->mc_list; im!=NULL; im=im->next) { + if (group && group != im->multiaddr) + continue; + if (LOCAL_MCAST(im->multiaddr)) + continue; + im->unsolicit_count = 0; + if (im->tm_running && im->timer.expires-jiffies > max_delay) + igmp_stop_timer(im); + igmp_start_timer(im, max_delay); + } +} - if(igmp_set_mrouter_info(dev,mrouter_type,sysctl_igmp_age_threshold)==NULL) - return; +int igmp_rcv(struct sk_buff *skb, unsigned short len) +{ + /* This basically follows the spec line by line -- see RFC1112 */ + struct igmphdr *ih = skb->h.igmph; + struct in_device *in_dev = skb->dev->ip_ptr; - /* - * Start the timers in all of our membership records for - * the interface on which the query arrived, except those - * that are already running and those that belong to a - * "local" group (224.0.0.X). - */ - - for(im=dev->ip_mc_list;im!=NULL;im=im->next) { - if(!im->tm_running && !LOCAL_MCAST(im->multiaddr)) - igmp_start_timer(im,max_resp_time); - } + if (len < sizeof(struct igmphdr) || ip_compute_csum((void *)ih, len) + || in_dev==NULL) { + kfree_skb(skb, FREE_READ); + return 0; } + + switch (ih->type) { + case IGMP_HOST_MEMBERSHIP_QUERY: + igmp_heard_query(in_dev, ih->code, ih->group); + break; + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMP_HOST_NEW_MEMBERSHIP_REPORT: + /* Is it our report looped back? */ + if (((struct rtable*)skb->dst)->key.iif == 0) + break; + igmp_heard_report(in_dev, ih->group); + break; + case IGMP_PIM: +#ifdef CONFIG_IP_PIMSM_V1 + return pim_rcv_v1(skb, len); +#endif + case IGMP_DVMRP: + case IGMP_TRACE: + case IGMP_HOST_LEAVE_MESSAGE: + case IGMP_MTRACE: + case IGMP_MTRACE_RESP: + break; + default: + NETDEBUG(printk(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type)); + } + kfree_skb(skb, FREE_READ); + return 0; } +#endif + /* * Map a multicast IP onto multicast MAC for type ethernet. */ -extern __inline__ void ip_mc_map(unsigned long addr, char *buf) +extern __inline__ void ip_mc_map(u32 addr, char *buf) { addr=ntohl(addr); buf[0]=0x01; @@ -415,15 +363,16 @@ * Add a filter to a device */ -void ip_mc_filter_add(struct device *dev, unsigned long addr) +static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) { char buf[6]; - ip_rt_multicast_event(dev); - if(!(dev->flags & IFF_MULTICAST)) + struct device *dev = in_dev->dev; + + if (!(dev->flags & IFF_MULTICAST)) return; - if(dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI) + if (dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI) return; /* Only do ethernet or FDDI for now */ - ip_mc_map(addr,buf); + ip_mc_map(addr, buf); dev_mc_add(dev,buf,ETH_ALEN,0); } @@ -431,70 +380,49 @@ * Remove a filter from a device */ -void ip_mc_filter_del(struct device *dev, unsigned long addr) +static void ip_mc_filter_del(struct in_device *in_dev, u32 addr) { char buf[6]; - ip_rt_multicast_event(dev); - if(dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI) + struct device *dev = in_dev->dev; + + if (dev->type!=ARPHRD_ETHER && dev->type!=ARPHRD_FDDI) return; /* Only do ethernet or FDDI for now */ ip_mc_map(addr,buf); dev_mc_delete(dev,buf,ETH_ALEN,0); } -extern __inline__ void igmp_group_dropped(struct ip_mc_list *im) +static void igmp_group_dropped(struct ip_mc_list *im) { - del_timer(&im->timer); - if (im->reporter) - igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_LEAVE_MESSAGE); ip_mc_filter_del(im->interface, im->multiaddr); -} -extern __inline__ void igmp_group_added(struct ip_mc_list *im) -{ - struct ip_router_info *r; - igmp_init_timer(im); - ip_mc_filter_add(im->interface, im->multiaddr); - r=igmp_get_mrouter_info(im->interface); - if(r==NULL) +#ifdef CONFIG_IP_MULTICAST + if (LOCAL_MCAST(im->multiaddr)) return; - if(r->type==IGMP_NEW_ROUTER) - igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_NEW_MEMBERSHIP_REPORT); - else - igmp_send_report(im->interface, im->multiaddr, IGMP_HOST_MEMBERSHIP_REPORT); + + start_bh_atomic(); + igmp_stop_timer(im); + end_bh_atomic(); + + if (im->reporter && !IGMP_V1_SEEN(im->interface)) + igmp_send_report(im->interface->dev, im->multiaddr, IGMP_HOST_LEAVE_MESSAGE); +#endif } -int igmp_rcv(struct sk_buff *skb, unsigned short len) +static void igmp_group_added(struct ip_mc_list *im) { - /* This basically follows the spec line by line -- see RFC1112 */ - struct igmphdr *ih = skb->h.igmph; + ip_mc_filter_add(im->interface, im->multiaddr); - if (len < sizeof(struct igmphdr) || ip_compute_csum((void *)ih, len)) { - kfree_skb(skb, FREE_READ); - return 0; - } - - switch (ih->type) { - case IGMP_HOST_MEMBERSHIP_QUERY: - igmp_heard_query(skb->dev, ih->code, ih->group); - break; - case IGMP_HOST_MEMBERSHIP_REPORT: - case IGMP_HOST_NEW_MEMBERSHIP_REPORT: - igmp_heard_report(skb->dev, ih->group, skb->nh.iph->saddr); - break; - case IGMP_DVMRP: - case IGMP_PIM: - case IGMP_TRACE: - case IGMP_HOST_LEAVE_MESSAGE: - case IGMP_MTRACE: - case IGMP_MTRACE_RESP: - break; - default: - NETDEBUG(printk(KERN_DEBUG "Unknown IGMP type=%d\n", ih->type)); - } - kfree_skb(skb, FREE_READ); - return 0; +#ifdef CONFIG_IP_MULTICAST + if (LOCAL_MCAST(im->multiaddr)) + return; + + start_bh_atomic(); + igmp_start_timer(im, IGMP_Initial_Report_Delay); + end_bh_atomic(); +#endif } + /* * Multicast list managers */ @@ -504,143 +432,210 @@ * A socket has joined a multicast group on device dev. */ -static void ip_mc_inc_group(struct device *dev, unsigned long addr) +void ip_mc_inc_group(struct in_device *in_dev, u32 addr) { - struct ip_mc_list *i; - for(i=dev->ip_mc_list;i!=NULL;i=i->next) - { - if(i->multiaddr==addr) - { + struct ip_mc_list *i, *im; + + im = (struct ip_mc_list *)kmalloc(sizeof(*im), GFP_KERNEL); + + for (i=in_dev->mc_list; i; i=i->next) { + if (i->multiaddr == addr) { i->users++; + if (im) + kfree(im); return; } } - i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL); - if(!i) + if (!im) return; - i->users=1; - i->interface=dev; - i->multiaddr=addr; - i->next=dev->ip_mc_list; - igmp_group_added(i); - dev->ip_mc_list=i; + im->users=1; + im->interface=in_dev; + im->multiaddr=addr; +#ifdef CONFIG_IP_MULTICAST + im->tm_running=0; + init_timer(&im->timer); + im->timer.data=(unsigned long)im; + im->timer.function=&igmp_timer_expire; + im->unsolicit_count = IGMP_Unsolicited_Report_Count; + im->reporter = 0; +#endif + im->next=in_dev->mc_list; + in_dev->mc_list=im; + if (in_dev->dev->flags & IFF_UP) { + igmp_group_added(im); + ip_rt_multicast_event(in_dev); + } + return; } /* * A socket has left a multicast group on device dev */ -static void ip_mc_dec_group(struct device *dev, unsigned long addr) +int ip_mc_dec_group(struct in_device *in_dev, u32 addr) { - struct ip_mc_list **i; - for(i=&(dev->ip_mc_list);(*i)!=NULL;i=&(*i)->next) - { - if((*i)->multiaddr==addr) - { - if(--((*i)->users) == 0) - { - struct ip_mc_list *tmp= *i; - igmp_group_dropped(tmp); - *i=(*i)->next; - kfree_s(tmp,sizeof(*tmp)); + struct ip_mc_list *i, **ip; + + for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { + if (i->multiaddr==addr) { + if (--i->users == 0) { + *ip = i->next; + if (in_dev->dev->flags & IFF_UP) { + igmp_group_dropped(i); + ip_rt_multicast_event(in_dev); + } + kfree_s(i, sizeof(*i)); } - return; + return 0; } } + return -ESRCH; } -/* - * Device going down: Clean up. - */ +/* Device going down */ -void ip_mc_drop_device(struct device *dev) +void ip_mc_down(struct in_device *in_dev) { struct ip_mc_list *i; - struct ip_mc_list *j; - start_bh_atomic(); - for(i=dev->ip_mc_list;i!=NULL;i=j) - { - j=i->next; - if(i->tm_running) - del_timer(&i->timer); - kfree_s(i,sizeof(*i)); - } - dev->ip_mc_list=NULL; - end_bh_atomic(); + + for (i=in_dev->mc_list; i; i=i->next) + igmp_group_dropped(i); +} + +/* Device going up */ + +void ip_mc_up(struct in_device *in_dev) +{ + struct ip_mc_list *i; + + for (i=in_dev->mc_list; i; i=i->next) + igmp_group_added(i); } /* - * Device going up. Make sure it is in all hosts + * Device is about to be destroyed: clean up. */ -void ip_mc_allhost(struct device *dev) +void ip_mc_destroy_dev(struct in_device *in_dev) { struct ip_mc_list *i; - for(i=dev->ip_mc_list;i!=NULL;i=i->next) - if(i->multiaddr==IGMP_ALL_HOSTS) - return; - i=(struct ip_mc_list *)kmalloc(sizeof(*i), GFP_KERNEL); - if(!i) - return; - i->users=1; - i->interface=dev; - i->multiaddr=IGMP_ALL_HOSTS; - i->tm_running=0; - i->next=dev->ip_mc_list; - dev->ip_mc_list=i; - ip_mc_filter_add(i->interface, i->multiaddr); + + while ((i = in_dev->mc_list) != NULL) { + in_dev->mc_list = i->next; + kfree_s(i, sizeof(*i)); + } +} + +/* Initialize multicasting on an IP interface */ + +void ip_mc_init_dev(struct in_device *in_dev) +{ + in_dev->mc_list = NULL; + in_dev->mr_v1_seen = 0; + ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); +} + +static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) +{ + struct rtable *rt; + struct device *dev = NULL; + + if (imr->imr_address.s_addr) { + dev = ip_dev_find(imr->imr_address.s_addr); + if (!dev) + return NULL; + } + + if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) { + dev = rt->u.dst.dev; + ip_rt_put(rt); + } + if (dev) { + imr->imr_ifindex = dev->ifindex; + return dev->ip_ptr; + } + return NULL; } /* * Join a socket to a group */ -int ip_mc_join_group(struct sock *sk , struct device *dev, unsigned long addr) +int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { - int unused= -1; - int i; - if(!MULTICAST(addr)) + int err; + u32 addr = imr->imr_multiaddr.s_addr; + struct ip_mc_socklist *iml, *i; + struct in_device *in_dev; + int count = 0; + + if (!MULTICAST(addr)) return -EINVAL; - if(sk->ip_mc_list==NULL) - { - if((sk->ip_mc_list=(struct ip_mc_socklist *)kmalloc(sizeof(*sk->ip_mc_list), GFP_KERNEL))==NULL) - return -ENOMEM; - memset(sk->ip_mc_list,'\0',sizeof(*sk->ip_mc_list)); - } - for(i=0;iip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev) - return -EADDRINUSE; - if(sk->ip_mc_list->multidev[i]==NULL) - unused=i; - } - if(unused==-1) - return -ENOBUFS; - sk->ip_mc_list->multiaddr[unused]=addr; - sk->ip_mc_list->multidev[unused]=dev; - ip_mc_inc_group(dev,addr); - return 0; + rtnl_shlock(); + + if (!imr->imr_ifindex) + in_dev = ip_mc_find_dev(imr); + else + in_dev = inetdev_by_index(imr->imr_ifindex); + + if (!in_dev) { + iml = NULL; + err = -ENODEV; + goto done; + } + + iml = (struct ip_mc_socklist *)kmalloc(sizeof(*iml), GFP_KERNEL); + + err = -EADDRINUSE; + for (i=sk->ip_mc_list; i; i=i->next) { + if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) { + /* New style additions are reference counted */ + if (imr->imr_address.s_addr == 0) { + i->count++; + err = 0; + } + goto done; + } + count++; + } + err = -ENOBUFS; + if (iml == NULL || count >= IP_MAX_MEMBERSHIPS) + goto done; + memcpy(&iml->multi, imr, sizeof(*imr)); + iml->next = sk->ip_mc_list; + iml->count = 1; + sk->ip_mc_list = iml; + ip_mc_inc_group(in_dev, addr); + iml = NULL; + err = 0; +done: + rtnl_shunlock(); + if (iml) + kfree(iml); + return err; } /* * Ask a socket to leave a group. */ -int ip_mc_leave_group(struct sock *sk, struct device *dev, unsigned long addr) +int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { - int i; - if(!MULTICAST(addr)) - return -EINVAL; - if(sk->ip_mc_list==NULL) - return -EADDRNOTAVAIL; + struct ip_mc_socklist *iml, **imlp; - for(i=0;iip_mc_list->multiaddr[i]==addr && sk->ip_mc_list->multidev[i]==dev) - { - sk->ip_mc_list->multidev[i]=NULL; - ip_mc_dec_group(dev,addr); + for (imlp=&sk->ip_mc_list; (iml=*imlp)!=NULL; imlp=&iml->next) { + if (iml->multi.imr_multiaddr.s_addr==imr->imr_multiaddr.s_addr && + iml->multi.imr_address.s_addr==imr->imr_address.s_addr && + (!imr->imr_ifindex || iml->multi.imr_ifindex==imr->imr_ifindex)) { + struct in_device *in_dev; + if (--iml->count) + return 0; + *imlp = iml->next; + in_dev = inetdev_by_index(iml->multi.imr_ifindex); + if (in_dev) + ip_mc_dec_group(in_dev, imr->imr_multiaddr.s_addr); + kfree_s(iml, sizeof(*iml)); return 0; } } @@ -653,69 +648,63 @@ void ip_mc_drop_socket(struct sock *sk) { - int i; + struct ip_mc_socklist *iml; - if(sk->ip_mc_list==NULL) - return; - - for(i=0;iip_mc_list->multidev[i]) - { - ip_mc_dec_group(sk->ip_mc_list->multidev[i], sk->ip_mc_list->multiaddr[i]); - sk->ip_mc_list->multidev[i]=NULL; - } + while ((iml=sk->ip_mc_list) != NULL) { + struct in_device *in_dev; + sk->ip_mc_list = iml->next; + if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) + ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); + kfree_s(iml, sizeof(*iml)); } - kfree_s(sk->ip_mc_list,sizeof(*sk->ip_mc_list)); - sk->ip_mc_list=NULL; } -/* - * Write an multicast group list table for the IGMP daemon to - * read. - */ +#ifdef CONFIG_IP_MULTICAST int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy) { off_t pos=0, begin=0; struct ip_mc_list *im; - unsigned long flags; int len=0; struct device *dev; - len=sprintf(buffer,"Device : Count\tGroup Users Timer\tReporter\n"); - save_flags(flags); - cli(); + len=sprintf(buffer,"Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n"); for(dev = dev_base; dev; dev = dev->next) { - if(dev->flags&IFF_UP) - { - len+=sprintf(buffer+len,"%-10s: %5d\n", - dev->name, dev->mc_count); - for(im = dev->ip_mc_list; im; im = im->next) - { - len+=sprintf(buffer+len, - "\t\t\t%08lX %5d %d:%08lX\t%d\n", - im->multiaddr, im->users, - im->tm_running, im->timer.expires-jiffies, im->reporter); - pos=begin+len; - if(posoffset+length) - break; - } - } + struct in_device *in_dev = dev->ip_ptr; + char *querier = "NONE"; + + if (in_dev == NULL) + continue; + + querier = IGMP_V1_SEEN(in_dev) ? "V1" : "V2"; + + len+=sprintf(buffer+len,"%d\t%-10s: %5d %7s\n", + dev->ifindex, dev->name, dev->mc_count, querier); + + for (im = in_dev->mc_list; im; im = im->next) { + len+=sprintf(buffer+len, + "\t\t\t\t%08lX %5d %d:%08lX\t\t%d\n", + im->multiaddr, im->users, + im->tm_running, im->timer.expires-jiffies, im->reporter); + + pos=begin+len; + if(posoffset+length) + break; + } } - restore_flags(flags); *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) - len=length; + len=length; return len; } +#endif diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_alias.c linux/net/ipv4/ip_alias.c --- v2.1.67/linux/net/ipv4/ip_alias.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_alias.c Wed Dec 31 16:00:00 1969 @@ -1,170 +0,0 @@ -/* - * IP_ALIAS (AF_INET) aliasing module. - * - * - * Version: @(#)ip_alias.c 0.43 12/20/95 - * - * Author: Juan Jose Ciarlante, - * - * Fixes: - * JJC : ip_alias_dev_select method. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef ALIAS_USER_LAND_DEBUG -#include "net_alias.h" -#include "ip_alias.h" -#include "user_stubs.h" -#endif - -#include -#include - -/* - * AF_INET alias init - */ - -static int ip_alias_init_1(struct net_alias_type *this, struct net_alias *alias, struct sockaddr *sa) -{ -#ifdef ALIAS_USER_LAND_DEBUG - printk("alias_init(%s) called.\n", alias->name); -#endif - MOD_INC_USE_COUNT; - return 0; -} - -/* - * AF_INET alias done - */ - -static int ip_alias_done_1(struct net_alias_type *this, struct net_alias *alias) -{ -#ifdef ALIAS_USER_LAND_DEBUG - printk("alias_done(%s) called.\n", alias->name); -#endif - MOD_DEC_USE_COUNT; - return 0; -} - -/* - * Print alias address info - */ - -int ip_alias_print_1(struct net_alias_type *this, struct net_alias *alias, char *buf, int len) -{ - char *p; - - p = (char *) &alias->dev.pa_addr; - return sprintf(buf, "%d.%d.%d.%d", - (p[0] & 255), (p[1] & 255), (p[2] & 255), (p[3] & 255)); -} - -struct device *ip_alias_dev_select(struct net_alias_type *this, struct device *main_dev, struct sockaddr *sa) -{ - __u32 addr; -#if 0 - struct rtable *rt; -#endif - struct device *dev=NULL; - - /* - * Defensive... - */ - - if (main_dev == NULL) - return NULL; - - /* - * Get u32 address. - */ - - addr = (sa)? (*(struct sockaddr_in *)sa).sin_addr.s_addr : 0; - if (addr == 0) - return NULL; - - /* - * Find 'closest' device to address given. any other suggestions? ... - * net_alias module will check if returned device is main_dev's alias - */ - -#if 0 - rt = ip_rt_route(addr, 0); - if(rt) - { - dev=rt->rt_dev; - ip_rt_put(rt); - } -#endif - return dev; -} - -/* - * net_alias AF_INET type defn. - */ - -struct net_alias_type ip_alias_type = -{ - AF_INET, /* type */ - 0, /* n_attach */ - "ip", /* name */ - NULL, /* get_addr32() */ - NULL, /* dev_addr_chk() */ - ip_alias_dev_select, /* dev_select() */ - ip_alias_init_1, /* alias_init_1() */ - ip_alias_done_1, /* alias_done_1() */ - ip_alias_print_1, /* alias_print_1() */ - NULL /* next */ -}; - -/* - * ip_alias module initialization - */ - -__initfunc(int ip_alias_init(void)) -{ - return register_net_alias_type(&ip_alias_type, AF_INET); -} - -/* - * ip_alias module done - */ - -int ip_alias_done(void) -{ - return unregister_net_alias_type(&ip_alias_type); -} - -#ifdef MODULE - -int init_module(void) -{ - if (ip_alias_init() != 0) - return -EIO; - return 0; -} - -void cleanup_module(void) -{ - if (ip_alias_done() != 0) - printk(KERN_INFO "ip_alias: can't remove module"); -} - -#endif /* MODULE */ diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_forward.c linux/net/ipv4/ip_forward.c --- v2.1.67/linux/net/ipv4/ip_forward.c Fri Apr 4 08:52:28 1997 +++ linux/net/ipv4/ip_forward.c Sun Nov 30 14:00:39 1997 @@ -5,6 +5,8 @@ * * The IP forwarding functionality. * + * Version: $Id: ip_forward.c,v 1.32 1997/10/24 17:16:06 kuznet Exp $ + * * Authors: see ip.c * * Fixes: @@ -76,10 +78,13 @@ int fw_res = 0; #endif - if (skb->pkt_type != PACKET_HOST) { - kfree_skb(skb,FREE_WRITE); - return 0; + if (IPCB(skb)->opt.router_alert) { + if (ip_call_ra_chain(skb)) + return 0; } + + if (skb->pkt_type != PACKET_HOST) + goto drop; /* * According to the RFC, we must first decrease the TTL field. If @@ -90,27 +95,25 @@ iph = skb->nh.iph; rt = (struct rtable*)skb->dst; +#ifdef CONFIG_CPU_IS_SLOW + if (net_cpu_congestion > 1 && !(iph->tos&IPTOS_RELIABILITY) && + IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) { + if (((xtime.tv_usec&0xF)< 0x1C) + goto drop; + } +#endif + + #ifdef CONFIG_TRANSPARENT_PROXY if (ip_chk_sock(skb)) - return ip_local_deliver(skb); + goto local_pkt; #endif - if (ip_decrease_ttl(iph) <= 0) { - /* Tell the sender its packet died... */ - icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); - kfree_skb(skb, FREE_WRITE); - return -1; - } - - if (opt->is_strictroute && (rt->rt_flags&RTF_GATEWAY)) { - /* - * Strict routing permits no gatewaying - */ - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); - kfree_skb(skb, FREE_WRITE); - return -1; - } + if (ip_decrease_ttl(iph) <= 0) + goto too_many_hops; + if (opt->is_strictroute && (rt->rt_flags&RTF_GATEWAY)) + goto sr_failed; /* * Having picked a route we can now send the frame out @@ -139,19 +142,23 @@ */ if (dev2->flags & IFF_UP) { - if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF)) { - ip_statistics.IpFragFails++; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - kfree_skb(skb, FREE_WRITE); - return -1; - } + if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF)) + goto frag_needed; - if (rt->rt_flags&RTCF_NAT) { +#ifdef CONFIG_IP_ROUTE_NAT + if (rt->rt_flags & RTCF_NAT) { + if (skb_headroom(skb) < dev2->hard_header_len || skb_cloned(skb)) { + struct sk_buff *skb2; + skb2 = skb_realloc_headroom(skb, (dev2->hard_header_len + 15)&~15); + kfree_skb(skb, FREE_WRITE); + skb = skb2; + } if (ip_do_nat(skb)) { kfree_skb(skb, FREE_WRITE); return -1; } } +#endif #ifdef CONFIG_IP_MASQUERADE if(!(IPCB(skb)->flags&IPSKB_MASQUERADED)) { @@ -168,7 +175,7 @@ * and skip the firewall checks */ if (iph->protocol == IPPROTO_ICMP) { - if ((fw_res = ip_fw_masq_icmp(&skb, dev2)) < 0) { + if ((fw_res = ip_fw_masq_icmp(&skb)) < 0) { kfree_skb(skb, FREE_READ); return -1; } @@ -179,7 +186,8 @@ } if (rt->rt_flags&RTCF_MASQ) goto skip_call_fw_firewall; -#endif +#endif /* CONFIG_IP_MASQUERADE */ + #ifdef CONFIG_FIREWALL fw_res=call_fw_firewall(PF_INET, dev2, iph, NULL, &skb); switch (fw_res) { @@ -205,7 +213,16 @@ */ if (!(IPCB(skb)->flags&IPSKB_MASQUERADED) && (fw_res==FW_MASQUERADE || rt->rt_flags&RTCF_MASQ)) { - if (ip_fw_masquerade(&skb, dev2) < 0) { + u32 maddr; + +#ifdef CONFIG_IP_ROUTE_NAT + maddr = (rt->rt_flags&RTCF_MASQ) ? rt->rt_src_map : 0; + + if (maddr == 0) +#endif + maddr = inet_select_addr(dev2, rt->rt_gateway, RT_SCOPE_UNIVERSE); + + if (ip_fw_masquerade(&skb, maddr) < 0) { kfree_skb(skb, FREE_READ); return -1; } @@ -238,10 +255,36 @@ ip_statistics.IpForwDatagrams++; - if (opt->optlen) - ip_forward_options(skb); - + if (opt->optlen == 0) { + ip_send(skb); + return 0; + } + ip_forward_options(skb); ip_send(skb); } return 0; + +#ifdef CONFIG_TRANSPARENT_PROXY +local_pkt: +#endif + return ip_local_deliver(skb); + +frag_needed: + ip_statistics.IpFragFails++; + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + goto drop; + +sr_failed: + /* + * Strict routing permits no gatewaying + */ + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0); + goto drop; + +too_many_hops: + /* Tell the sender its packet died... */ + icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); +drop: + kfree_skb(skb,FREE_WRITE); + return -1; } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_fragment.c linux/net/ipv4/ip_fragment.c --- v2.1.67/linux/net/ipv4/ip_fragment.c Wed Nov 12 13:34:28 1997 +++ linux/net/ipv4/ip_fragment.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * The IP fragmentation functionality. * - * Version: $Id: ip_fragment.c,v 1.26 1997/09/04 22:35:00 davem Exp $ + * Version: $Id: ip_fragment.c,v 1.29 1997/11/22 12:31:05 freitag Exp $ * * Authors: Fred N. van Kempen * Alan Cox @@ -130,7 +130,7 @@ /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and return the queue entry address if found. */ -static inline struct ipq *ip_find(struct iphdr *iph) +static inline struct ipq *ip_find(struct iphdr *iph, struct dst_entry *dst) { __u16 id = iph->id; __u32 saddr = iph->saddr; @@ -314,7 +314,8 @@ len = qp->ihlen + qp->len; if(len>65535) { - printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr)); + if (net_ratelimit()) + printk(KERN_INFO "Oversized IP packet from %d.%d.%d.%d.\n", NIPQUAD(qp->iph->saddr)); ip_statistics.IpReasmFails++; ip_free(qp); return NULL; @@ -322,7 +323,7 @@ if ((skb = dev_alloc_skb(len)) == NULL) { ip_statistics.IpReasmFails++; - NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp)); + NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing queue %p\n", qp)); ip_free(qp); return NULL; } @@ -390,7 +391,7 @@ ip_evictor(); /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */ - qp = ip_find(iph); + qp = ip_find(iph, skb->dst); /* Is this a non-fragmented datagram? */ offset = ntohs(iph->frag_off); @@ -435,7 +436,8 @@ /* Attempt to construct an oversize packet. */ if(ntohs(iph->tot_len)+(int)offset>65535) { - printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr)); + if (net_ratelimit()) + printk(KERN_INFO "Oversized packet received from %d.%d.%d.%d\n", NIPQUAD(iph->saddr)); frag_kfree_skb(skb, FREE_READ); ip_statistics.IpReasmFails++; return NULL; diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_fw.c linux/net/ipv4/ip_fw.c --- v2.1.67/linux/net/ipv4/ip_fw.c Thu Sep 11 09:02:24 1997 +++ linux/net/ipv4/ip_fw.c Sun Nov 30 14:00:39 1997 @@ -6,6 +6,8 @@ * license in recognition of the original copyright. * -- Alan Cox. * + * $Id: ip_fw.c,v 1.29 1997/10/10 22:41:01 davem Exp $ + * * Ported from BSD to Linux, * Alan Cox 22/Nov/1994. * Zeroing /proc and other additions @@ -104,7 +106,7 @@ #include #include #include -#include +#include #include #include #include @@ -165,6 +167,10 @@ #endif +#ifdef CONFIG_IP_FIREWALL_NETLINK +struct sock *ipfwsk; +#endif + /* * Returns 1 if the port is matched by the vector, 0 otherwise */ @@ -376,15 +382,6 @@ continue; /* - * Look for a VIA address match - */ - if(f->fw_via.s_addr && rif) - { - if(rif->pa_addr!=f->fw_via.s_addr) - continue; /* Mismatch */ - } - - /* * Look for a VIA device match */ if(f->fw_viadev) @@ -651,6 +648,11 @@ if ((ftmp->fw_vianame)[0]) { if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame))) ftmp->fw_viadev = (struct device *) -1; + } else if (ftmp->fw_via.s_addr) { + if (!(ftmp->fw_viadev = ip_dev_find(ftmp->fw_via.s_addr))) + ftmp->fw_viadev = (struct device *) -1; + else + memcpy(ftmp->fw_vianame, ftmp->fw_viadev->name, IFNAMSIZ); } else ftmp->fw_viadev = NULL; @@ -695,6 +697,11 @@ if ((ftmp->fw_vianame)[0]) { if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame))) ftmp->fw_viadev = (struct device *) -1; + } else if (ftmp->fw_via.s_addr) { + if (!(ftmp->fw_viadev = ip_dev_find(ftmp->fw_via.s_addr))) + ftmp->fw_viadev = (struct device *) -1; + else + memcpy(ftmp->fw_vianame, ftmp->fw_viadev->name, IFNAMSIZ); } else ftmp->fw_viadev = NULL; @@ -957,12 +964,6 @@ printk("ip_fw_ctl: invalid device \"%s\"\n", ipfwp->fwp_vianame); #endif return(EINVAL); - } else if ( viadev->pa_addr != ipfwp->fwp_via.s_addr ) { -#ifdef DEBUG_IP_FIREWALL - printk("ip_fw_ctl: device \"%s\" has another IP address\n", - ipfwp->fwp_vianame); -#endif - return(EINVAL); } else if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) { #ifdef DEBUG_IP_FIREWALL printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl, @@ -1066,6 +1067,7 @@ } #endif /* CONFIG_IP_FIREWALL */ +#ifdef CONFIG_PROC_FS #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) static int ip_chain_procinfo(int stage, char *buffer, char **start, @@ -1120,9 +1122,9 @@ ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr), (i->fw_vianame)[0] ? i->fw_vianame : "-", ntohl(i->fw_via.s_addr),i->fw_flg); - /* 9 is enough for a 32 bit box but the counters are 64bit on + /* 10 is enough for a 32 bit box but the counters are 64bit on the Alpha and Ultrapenguin */ - len+=sprintf(buffer+len,"%u %u %-19lu %-19lu", + len+=sprintf(buffer+len,"%u %u %-20lu %-20lu", i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt); for (p = 0; p < IP_FW_MAX_PORTS; p++) len+=sprintf(buffer+len, " %u", i->fw_pts[p]); @@ -1192,6 +1194,7 @@ reset); } #endif +#endif #ifdef CONFIG_IP_FIREWALL @@ -1323,8 +1326,7 @@ /* Register for device up/down reports */ register_netdevice_notifier(&ipfw_dev_notifier); #endif - #ifdef CONFIG_IP_FIREWALL_NETLINK - netlink_attach(NETLINK_FIREWALL, netlink_donothing); /* XXX */ -#endif /* CONFIG_IP_FIREWALL_NETLINK */ + ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL); +#endif } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_gre.c linux/net/ipv4/ip_gre.c --- v2.1.67/linux/net/ipv4/ip_gre.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/ip_gre.c Sun Nov 30 14:00:39 1997 @@ -0,0 +1,1191 @@ +/* + * Linux NET3: GRE over IP protocol decoder. + * + * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IPV6 +#include +#include +#include +#endif + +/* + Problems & solutions + -------------------- + + 1. The most important issue is detecting local dead loops. + They would cause complete host lockup in transmit, which + would be "resolved" by stack overflow or, if queueing is enabled, + with infinite looping in net_bh. + + We cannot track such dead loops during route installation, + it is infeasible task. The most general solutions would be + to keep skb->encapsulation counter (sort of local ttl), + and silently drop packet when it expires. It is the best + solution, but it supposes maintaing new variable in ALL + skb, even if no tunneling is used. + + Current solution: t->recursion lock breaks dead loops. It looks + like dev->tbusy flag, but I preferred new variable, because + the semantics is different. One day, when hard_start_xmit + will be multithreaded we will have to use skb->encapsulation. + + + + 2. Networking dead loops would not kill routers, but would really + kill network. IP hop limit plays role of "t->recursion" in this case, + if we copy it from packet being encapsulated to upper header. + It is very good solution, but it introduces two problems: + + - Routing protocols, using packets with ttl=1 (OSPF, RIP2), + do not work over tunnels. + - traceroute does not work. I planned to relay ICMP from tunnel, + so that this problem would be solved and traceroute output + would even more informative. This idea appeared to be wrong: + only Linux complies to rfc1812 now (yes, guys, Linux is the only + true router now :-)), all routers (at least, in neighbourhood of mine) + return only 8 bytes of payload. It is the end. + + Hence, if we want that OSPF worked or traceroute said something reasonable, + we should search for another solution. + + One of them is to parse packet trying to detect inner encapsulation + made by our node. It is difficult or even impossible, especially, + taking into account fragmentation. TO be short, tt is not solution at all. + + Current solution: The solution was UNEXPECTEDLY SIMPLE. + We force DF flag on tunnels with preconfigured hop limit, + that is ALL. :-) Well, it does not remove the problem completely, + but exponential growth of network traffic is changed to linear + (branches, that exceed pmtu are pruned) and tunnel mtu + fastly degrades to value <68, where looping stops. + Yes, it is not good if there exists a router in the loop, + which does not force DF, even when encapsulating packets have DF set. + But it is not our problem! Nobody could accuse us, we made + all that we could make. Even if it is your gated who injected + fatal route to network, even if it were you who configured + fatal static route: you are innocent. :-) + + + + 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain + practically identical code. It would be good to glue them + together, but it is not very evident, how to make them modular. + sit is integral part of IPv6, ipip and gre are naturally modular. + We could extract common parts (hash table, ioctl etc) + to a separate module (ip_tunnel.c). + + Alexey Kuznetsov. + */ + +static int ipgre_tunnel_init(struct device *dev); + +/* Fallback tunnel: no source, no destination, no key, no options */ + +static int ipgre_fb_tunnel_init(struct device *dev); + +static struct device ipgre_fb_tunnel_dev = { + NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init, +}; + +static struct ip_tunnel ipgre_fb_tunnel = { + NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", } +}; + +/* Tunnel hash table */ + +/* + 4 hash tables: + + 3: (remote,local) + 2: (remote,*) + 1: (*,local) + 0: (*,*) + + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + */ + +#define HASH_SIZE 16 +#define HASH(addr) ((addr^(addr>>4))&0xF) + +static struct ip_tunnel *tunnels[4][HASH_SIZE]; + +#define tunnels_r_l (tunnels[3]) +#define tunnels_r (tunnels[2]) +#define tunnels_l (tunnels[1]) +#define tunnels_wc (tunnels[0]) + +/* Given src, dst and key, find approriate for input tunnel. */ + +static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) +{ + unsigned h0 = HASH(remote); + unsigned h1 = HASH(key); + struct ip_tunnel *t; + + for (t = tunnels_r_l[h0^h1]; t; t = t->next) { + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + } + for (t = tunnels_r[h0^h1]; t; t = t->next) { + if (remote == t->parms.iph.daddr) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + } + for (t = tunnels_l[h1]; t; t = t->next) { + if (local == t->parms.iph.saddr || + (local == t->parms.iph.daddr && MULTICAST(local))) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + } + for (t = tunnels_wc[h1]; t; t = t->next) { + if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) + return t; + } + if (ipgre_fb_tunnel_dev.flags&IFF_UP) + return &ipgre_fb_tunnel; + return NULL; +} + +static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) +{ + u32 remote = parms->iph.daddr; + u32 local = parms->iph.saddr; + u32 key = parms->i_key; + struct ip_tunnel *t, **tp, *nt; + struct device *dev; + unsigned h = HASH(key); + int prio = 0; + + if (local) + prio |= 1; + if (remote && !MULTICAST(remote)) { + prio |= 2; + h ^= HASH(remote); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { + if (key == t->parms.i_key) + return t; + } + } + if (!create) + return NULL; + + MOD_INC_USE_COUNT; + dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL); + if (dev == NULL) { + MOD_DEC_USE_COUNT; + return NULL; + } + memset(dev, 0, sizeof(*dev) + sizeof(*t)); + dev->priv = (void*)(dev+1); + nt = (struct ip_tunnel*)dev->priv; + nt->dev = dev; + dev->name = nt->parms.name; + dev->init = ipgre_tunnel_init; + memcpy(&nt->parms, parms, sizeof(*parms)); + if (dev->name[0] == 0) { + int i; + for (i=1; i<100; i++) { + sprintf(dev->name, "gre%d", i); + if (dev_get(dev->name) == NULL) + break; + } + if (i==100) + goto failed; + memcpy(parms->name, dev->name, IFNAMSIZ); + } + if (register_netdevice(dev) < 0) + goto failed; + + start_bh_atomic(); + nt->next = t; + *tp = nt; + end_bh_atomic(); + /* Do not decrement MOD_USE_COUNT here. */ + return nt; + +failed: + kfree(dev); + MOD_DEC_USE_COUNT; + return NULL; +} + +static void ipgre_tunnel_destroy(struct device *dev) +{ + struct ip_tunnel *t, **tp; + struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv; + u32 remote = t0->parms.iph.daddr; + u32 local = t0->parms.iph.saddr; + unsigned h = HASH(t0->parms.i_key); + int prio = 0; + + if (local) + prio |= 1; + if (remote && !MULTICAST(remote)) { + prio |= 2; + h ^= HASH(remote); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (t == t0) { + *tp = t->next; + if (dev != &ipgre_fb_tunnel_dev) { + kfree(dev); + MOD_DEC_USE_COUNT; + } + break; + } + } +} + + +void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len) +{ +#ifndef I_WISH_WORLD_WERE_PERFECT + +/* It is not :-( All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. + + Moreover, Cisco "wise men" put GRE key to the third word + in GRE header. It makes impossible maintaining even soft state for keyed + GRE tunnels with enabled checksum. Tell them "thank you". + + Well, I wonder, rfc1812 was written by Cisco employee, + what the hell these idiots break standrads established + by themself??? + */ + + struct iphdr *iph = (struct iphdr*)dp; + u16 *p = (u16*)(dp+(iph->ihl<<2)); + int grehlen = (iph->ihl<<2) + 4; + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; + struct ip_tunnel *t; + u16 flags; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_KEY) { + grehlen += 4; + if (flags&GRE_CSUM) + grehlen += 4; + } + } + + /* If only 8 bytes returned, keyed message will be dropped here */ + if (len < grehlen) + return; + + switch (type) { + default: + case ICMP_PARAMETERPROB: + return; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* Soft state for pmtu is maintained by IP core. */ + return; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe they are just ether pollution. --ANK + */ + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; + } + + t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0); + if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) + return; + + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) + return; + + if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; + return; +#else + struct iphdr *iph = (struct iphdr*)dp; + struct iphdr *eiph; + u16 *p = (u16*)(dp+(iph->ihl<<2)); + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; + int rel_type = 0; + int rel_code = 0; + int rel_info = 0; + u16 flags; + int grehlen = (iph->ihl<<2) + 4; + struct sk_buff *skb2; + struct rtable *rt; + + if (p[1] != __constant_htons(ETH_P_IP)) + return; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_CSUM) + grehlen += 4; + if (flags&GRE_KEY) + grehlen += 4; + if (flags&GRE_SEQ) + grehlen += 4; + } + if (len < grehlen + sizeof(struct iphdr)) + return; + eiph = (struct iphdr*)(dp + grehlen); + + switch (type) { + default: + return; + case ICMP_PARAMETERPROB: + if (skb->h.icmph->un.gateway < (iph->ihl<<2)) + return; + + /* So... This guy found something strange INSIDE encapsulated + packet. Well, he is fool, but what can we do ? + */ + rel_type = ICMP_PARAMETERPROB; + rel_info = skb->h.icmph->un.gateway - grehlen; + break; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* And it is the only really necesary thing :-) */ + rel_info = ntohs(skb->h.icmph->un.frag.mtu); + if (rel_info < grehlen+68) + return; + rel_info -= grehlen; + /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ + if (rel_info > ntohs(eiph->tot_len)) + return; + break; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe, it is just ether pollution. --ANK + */ + rel_type = ICMP_DEST_UNREACH; + rel_code = ICMP_HOST_UNREACH; + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; + } + + /* Prepare fake skb to feed it to icmp_send */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 == NULL) + return; + dst_release(skb2->dst); + skb2->dst = NULL; + skb_pull(skb2, skb->data - (u8*)eiph); + skb2->nh.raw = skb2->data; + + /* Try to guess incoming interface */ + if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) { + kfree_skb(skb2, FREE_WRITE); + return; + } + skb2->dev = rt->u.dst.dev; + + /* route "incoming" packet */ + if (rt->rt_flags&RTCF_LOCAL) { + ip_rt_put(rt); + rt = NULL; + if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) || + rt->u.dst.dev->type != ARPHRD_IPGRE) { + ip_rt_put(rt); + kfree_skb(skb2, FREE_WRITE); + return; + } + } else { + ip_rt_put(rt); + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || + skb2->dst->dev->type != ARPHRD_IPGRE) { + kfree_skb(skb2, FREE_WRITE); + return; + } + } + + /* change mtu on this route */ + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + if (rel_info > skb2->dst->pmtu) { + kfree_skb(skb2, FREE_WRITE); + return; + } + skb2->dst->pmtu = rel_info; + rel_info = htonl(rel_info); + } else if (type == ICMP_TIME_EXCEEDED) { + struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; + if (t->parms.iph.ttl) { + rel_type = ICMP_DEST_UNREACH; + rel_code = ICMP_HOST_UNREACH; + } + } + + icmp_send(skb2, rel_type, rel_code, rel_info); + kfree_skb(skb2, FREE_WRITE); +#endif +} + +int ipgre_rcv(struct sk_buff *skb, unsigned short len) +{ + struct iphdr *iph = skb->nh.iph; + u8 *h = skb->h.raw; + u16 flags = *(u16*)h; + u16 csum = 0; + u32 key = 0; + u32 seqno = 0; + struct ip_tunnel *tunnel; + int offset = 4; + + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { + /* - Version must be 0. + - We do not support routing headers. + */ + if (flags&(GRE_VERSION|GRE_ROUTING)) + goto drop; + + if (flags&GRE_CSUM) { + csum = ip_compute_csum(h, len); + offset += 4; + } + if (flags&GRE_KEY) { + key = *(u32*)(h + offset); + offset += 4; + } + if (flags&GRE_SEQ) { + seqno = ntohl(*(u32*)(h + offset)); + offset += 4; + } + } + + if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { + skb->mac.raw = skb->nh.raw; + skb->nh.raw = skb_pull(skb, h + offset - skb->data); + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + skb->ip_summed = 0; + skb->protocol = *(u16*)(h + 2); + skb->pkt_type = PACKET_HOST; +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (MULTICAST(iph->daddr)) { + /* Looped back packet, drop it! */ + if (((struct rtable*)skb->dst)->key.iif == 0) + goto drop; + tunnel->stat.multicast++; + skb->pkt_type = PACKET_BROADCAST; + } +#endif + + if (((flags&GRE_CSUM) && csum) || + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { + tunnel->stat.rx_crc_errors++; + tunnel->stat.rx_errors++; + goto drop; + } + if (tunnel->parms.i_flags&GRE_SEQ) { + if (!(flags&GRE_SEQ) || + (tunnel->i_seqno && seqno - tunnel->i_seqno < 0)) { + tunnel->stat.rx_fifo_errors++; + tunnel->stat.rx_errors++; + goto drop; + } + tunnel->i_seqno = seqno + 1; + } + tunnel->stat.rx_packets++; + tunnel->stat.rx_bytes += skb->len; + skb->dev = tunnel->dev; + dst_release(skb->dst); + skb->dst = NULL; + netif_rx(skb); + return(0); + } + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); + +drop: + kfree_skb(skb, FREE_READ); + return(0); +} + +static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct net_device_stats *stats = &tunnel->stat; + struct iphdr *old_iph = skb->nh.iph; + struct iphdr *tiph; + u8 tos; + u16 df; + struct rtable *rt; /* Route to the other host */ + struct device *tdev; /* Device to other host */ + struct iphdr *iph; /* Our new IP header */ + int max_headroom; /* The extra header space needed */ + int gre_hlen; + u32 dst; + int mtu; + + if (tunnel->recursion++) { + tunnel->stat.collisions++; + goto tx_error; + } + + if (dev->hard_header) { + gre_hlen = 0; + tiph = (struct iphdr*)skb->data; + } else { + gre_hlen = tunnel->hlen; + tiph = &tunnel->parms.iph; + } + + if ((dst = tiph->daddr) == 0) { + /* NBMA tunnel */ + + if (skb->dst == NULL) { + tunnel->stat.tx_fifo_errors++; + goto tx_error; + } + + if (skb->protocol == __constant_htons(ETH_P_IP)) { + rt = (struct rtable*)skb->dst; + if ((dst = rt->rt_gateway) == 0) + goto tx_error_icmp; + } +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { + struct in6_addr *addr6; + int addr_type; + struct nd_neigh *neigh = (struct nd_neigh *) skb->dst->neighbour; + + if (neigh == NULL) + goto tx_error; + + addr6 = &neigh->ndn_addr; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) { + addr6 = &skb->nh.ipv6h->daddr; + addr_type = ipv6_addr_type(addr6); + } + + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + goto tx_error_icmp; + + dst = addr6->s6_addr32[3]; + } +#endif + else + goto tx_error; + } + + tos = tiph->tos; + if (tos&1) { + if (skb->protocol == __constant_htons(ETH_P_IP)) + tos = old_iph->tos; + tos &= ~1; + } + + if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { + tunnel->stat.tx_carrier_errors++; + goto tx_error; + } + tdev = rt->u.dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); + tunnel->stat.collisions++; + goto tx_error; + } + + df = tiph->frag_off; + mtu = rt->u.dst.pmtu - tunnel->hlen; + + if (skb->protocol == __constant_htons(ETH_P_IP)) { + if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68) + skb->dst->pmtu = mtu; + + df |= (old_iph->frag_off&__constant_htons(IP_DF)); + + if ((old_iph->frag_off&__constant_htons(IP_DF)) && + mtu < ntohs(old_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } + } +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info*)skb->dst; + + if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= 576) { + if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + skb->dst->pmtu = mtu; + } + } + + if (mtu >= 576 && mtu < skb->len - tunnel->hlen + gre_hlen) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + ip_rt_put(rt); + goto tx_error; + } + } +#endif + + if (tunnel->err_count > 0) { + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + tunnel->err_count--; + + if (skb->protocol == __constant_htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev); +#endif + } else + tunnel->err_count = 0; + } + + skb->h.raw = skb->nh.raw; + + max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen; + + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + ip_rt_put(rt); + stats->tx_dropped++; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; + } + dev_kfree_skb(skb, FREE_WRITE); + skb = new_skb; + } + + skb->nh.raw = skb_push(skb, gre_hlen); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* + * Push down and install the IPIP header. + */ + + iph = skb->nh.iph; + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = IPPROTO_GRE; + iph->tos = tos; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + + if ((iph->ttl = tiph->ttl) == 0) { + if (skb->protocol == __constant_htons(ETH_P_IP)) + iph->ttl = old_iph->ttl; +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) + iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; +#endif + else + iph->ttl = ip_statistics.IpDefaultTTL; + } + + ((u16*)(iph+1))[0] = tunnel->parms.o_flags; + ((u16*)(iph+1))[1] = skb->protocol; + + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { + u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4); + + if (tunnel->parms.o_flags&GRE_SEQ) { + ++tunnel->o_seqno; + *ptr = htonl(tunnel->o_seqno); + ptr--; + } + if (tunnel->parms.o_flags&GRE_KEY) { + *ptr = tunnel->parms.o_key; + ptr--; + } + if (tunnel->parms.o_flags&GRE_CSUM) { + *ptr = 0; + *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); + } + } + + iph->tot_len = htons(skb->len); + iph->id = htons(ip_id_count++); + ip_send_check(iph); + + stats->tx_bytes += skb->len; + stats->tx_packets++; + ip_send(skb); + tunnel->recursion--; + return 0; + +tx_error_icmp: + if (skb->protocol == __constant_htons(ETH_P_IP)) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); +#ifdef CONFIG_IPV6 + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev); +#endif + +tx_error: + stats->tx_errors++; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; +} + +static int +ipgre_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip_tunnel_parm p; + struct ip_tunnel *t; + + MOD_INC_USE_COUNT; + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == &ipgre_fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + t = ipgre_tunnel_locate(&p, 0); + } + if (t == NULL) + t = (struct ip_tunnel*)dev->priv; + memcpy(&p, &t->parms, sizeof(p)); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + goto done; + if (p.iph.ttl) + p.iph.frag_off |= __constant_htons(IP_DF); + + if (!(p.i_flags&GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags&GRE_KEY)) + p.o_key = 0; + + t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); + + if (t) { + err = 0; + if (cmd == SIOCCHGTUNNEL) { + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + t->parms.iph.frag_off = p.iph.frag_off; + } + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + if (dev == &ipgre_fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) + goto done; + err = -EPERM; + if (t == &ipgre_fb_tunnel) + goto done; + } + err = unregister_netdevice(dev); + break; + + default: + err = -EINVAL; + } + +done: + MOD_DEC_USE_COUNT; + return err; +} + +static struct net_device_stats *ipgre_tunnel_get_stats(struct device *dev) +{ + return &(((struct ip_tunnel*)dev->priv)->stat); +} + +static int ipgre_tunnel_change_mtu(struct device *dev, int new_mtu) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +#ifdef CONFIG_NET_IPGRE_BROADCAST +/* Nice toy. Unfortunately, useless in real life :-) + It allows to construct virtual multiprotocol broadcast "LAN" + over the Internet, provided multicast routing is tuned. + + + I have no idea was this bicycle invented before me, + so that I had to set ARPHRD_IPGRE to a random value. + I have an impression, that Cisco could make something similar, + but this feature is apparently missing in IOS<=11.2(8). + + I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks + with broadcast 224.66.66.66. If you have access to mbone, play with me :-) + + ping -t 255 224.66.66.66 + + If nobody answers, mbone does not work. + + ip tunnel add Universe mode gre remote 224.66.66.66 local ttl 255 + ip addr add 10.66.66./24 dev Universe + ifconfig Universe up + ifconfig Universe add fe80::/10 + ifconfig Universe add fec0:6666:6666::/96 + ftp 10.66.66.66 + ... + ftp fec0:6666:6666::193.233.7.65 + ... + + */ + +static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); + u16 *p = (u16*)(iph+1); + + memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); + p[0] = t->parms.o_flags; + p[1] = htons(type); + + /* + * Set the source hardware address. + */ + + if (saddr) + memcpy(&iph->saddr, saddr, 4); + + if (daddr) { + memcpy(&iph->daddr, daddr, 4); + return t->hlen; + } + if (iph->daddr && !MULTICAST(iph->daddr)) + return t->hlen; + + return -t->hlen; +} + +static int ipgre_rebuild_header(struct sk_buff *skb) +{ + struct device *dev = skb->dev; + struct iphdr *iph = (struct iphdr *)skb->data; + u16 *p = (u16*)(iph + 1); + struct neighbour *neigh = NULL; + + if (skb->dst) + neigh = skb->dst->neighbour; + + if (neigh) + return neigh->ops->resolve((void*)&iph->daddr, skb); + + if (p[1] == __constant_htons(ETH_P_IP)) + return arp_find((void*)&iph->daddr, skb); + + if (net_ratelimit()) + printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n", + dev->name, (int)p[1]); + return 0; +} + +static int ipgre_open(struct device *dev) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + + MOD_INC_USE_COUNT; + if (MULTICAST(t->parms.iph.daddr)) { + struct rtable *rt; + if (ip_route_output(&rt, t->parms.iph.daddr, + t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), + t->parms.link)) { + MOD_DEC_USE_COUNT; + return -EADDRNOTAVAIL; + } + dev = rt->u.dst.dev; + ip_rt_put(rt); + if (dev->ip_ptr == NULL) { + MOD_DEC_USE_COUNT; + return -EADDRNOTAVAIL; + } + t->mlink = dev->ifindex; + ip_mc_inc_group(dev->ip_ptr, t->parms.iph.daddr); + } + return 0; +} + +static int ipgre_close(struct device *dev) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + if (MULTICAST(t->parms.iph.daddr) && t->mlink) { + dev = dev_get_by_index(t->mlink); + if (dev && dev->ip_ptr) + ip_mc_dec_group(dev->ip_ptr, t->parms.iph.daddr); + } + MOD_DEC_USE_COUNT; + return 0; +} + +#endif + +static void ipgre_tunnel_init_gen(struct device *dev) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + + dev->destructor = ipgre_tunnel_destroy; + dev->hard_start_xmit = ipgre_tunnel_xmit; + dev->get_stats = ipgre_tunnel_get_stats; + dev->do_ioctl = ipgre_tunnel_ioctl; + dev->change_mtu = ipgre_tunnel_change_mtu; + + dev_init_buffers(dev); + + dev->type = ARPHRD_IPGRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; + dev->mtu = 1500 - sizeof(struct iphdr) - 4; + dev->flags = IFF_NOARP; + dev->iflink = 0; + dev->addr_len = 4; + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); +} + +static int ipgre_tunnel_init(struct device *dev) +{ + struct device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + int hlen = LL_MAX_HEADER; + int mtu = 1500; + int addend = sizeof(struct iphdr) + 4; + + tunnel = (struct ip_tunnel*)dev->priv; + iph = &tunnel->parms.iph; + + ipgre_tunnel_init_gen(dev); + + /* Guess output device to choose reasonable mtu and hard_header_len */ + + if (iph->daddr) { + struct rtable *rt; + if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) { + tdev = rt->u.dst.dev; + ip_rt_put(rt); + } + + dev->flags |= IFF_POINTOPOINT; + +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (MULTICAST(iph->daddr)) { + if (!iph->saddr) + return -EINVAL; + dev->flags = IFF_BROADCAST; + dev->hard_header = ipgre_header; + dev->rebuild_header = ipgre_rebuild_header; + dev->open = ipgre_open; + dev->stop = ipgre_close; + } +#endif + } + + if (!tdev && tunnel->parms.link) + tdev = dev_get_by_index(tunnel->parms.link); + + if (tdev) { + hlen = tdev->hard_header_len; + mtu = tdev->mtu; + } + dev->iflink = tunnel->parms.link; + + /* Precalculate GRE options length */ + if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { + if (tunnel->parms.o_flags&GRE_CSUM) + addend += 4; + if (tunnel->parms.o_flags&GRE_KEY) + addend += 4; + if (tunnel->parms.o_flags&GRE_SEQ) + addend += 4; + } + dev->hard_header_len = hlen + addend; + dev->mtu = mtu - addend; + tunnel->hlen = addend; + return 0; +} + +#ifdef MODULE +static int ipgre_fb_tunnel_open(struct device *dev) +{ + MOD_INC_USE_COUNT; + return 0; +} + +static int ipgre_fb_tunnel_close(struct device *dev) +{ + MOD_DEC_USE_COUNT; + return 0; +} +#endif + +__initfunc(int ipgre_fb_tunnel_init(struct device *dev)) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct iphdr *iph; + + ipgre_tunnel_init_gen(dev); +#ifdef MODULE + dev->open = ipgre_fb_tunnel_open; + dev->stop = ipgre_fb_tunnel_close; +#endif + + iph = &ipgre_fb_tunnel.parms.iph; + iph->version = 4; + iph->protocol = IPPROTO_GRE; + iph->ihl = 5; + tunnel->hlen = sizeof(struct iphdr) + 4; + + tunnels_wc[0] = &ipgre_fb_tunnel; + return 0; +} + + +static struct inet_protocol ipgre_protocol = { + ipgre_rcv, /* GRE handler */ + ipgre_err, /* TUNNEL error control */ + 0, /* next */ + IPPROTO_GRE, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "GRE" /* name */ +}; + + +/* + * And now the modules code and kernel interface. + */ + +#ifdef MODULE +int init_module(void) +#else +__initfunc(int ipgre_init(void)) +#endif +{ + printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); + + ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel; + ipgre_fb_tunnel_dev.name = ipgre_fb_tunnel.parms.name; +#ifdef MODULE + register_netdev(&ipgre_fb_tunnel_dev); +#else + register_netdevice(&ipgre_fb_tunnel_dev); +#endif + + inet_add_protocol(&ipgre_protocol); + return 0; +} + +#ifdef MODULE + +void cleanup_module(void) +{ + if ( inet_del_protocol(&ipgre_protocol) < 0 ) + printk(KERN_INFO "ipgre close: can't remove protocol\n"); + + unregister_netdev(&ipgre_fb_tunnel_dev); +} + +#endif diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_input.c linux/net/ipv4/ip_input.c --- v2.1.67/linux/net/ipv4/ip_input.c Thu Mar 27 14:40:15 1997 +++ linux/net/ipv4/ip_input.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) module. * - * Version: @(#)ip.c 1.0.16b 9/1/93 + * Version: $Id: ip_input.c,v 1.24 1997/10/24 17:15:58 kuznet Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -153,8 +153,7 @@ #endif #include #include -#include -#include +#include #include /* @@ -184,13 +183,55 @@ #define CONFIG_IP_ALWAYS_DEFRAG 1 #endif +/* + * 0 - deliver + * 1 - block + */ +static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +{ + int type; + + type = skb->h.icmph->type; + if (type < 32) + return test_bit(type, &sk->tp_pinfo.tp_raw4.filter); + + /* Do not block unknown ICMP types */ + return 0; +} + +int ip_call_ra_chain(struct sk_buff *skb) +{ + struct ip_ra_chain *ra; + u8 protocol = skb->nh.iph->protocol; + struct sock *last = NULL; + + for (ra = ip_ra_chain; ra; ra = ra->next) { + struct sock *sk = ra->sk; + if (sk && sk->num == protocol) { + if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + skb = ip_defrag(skb); + if (skb == NULL) + return 1; + } + if (last) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) + raw_rcv(last, skb2); + } + last = sk; + } + } + + if (last) { + raw_rcv(last, skb); + return 1; + } + return 0; +} int ip_local_deliver(struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; -#ifdef CONFIG_IP_MASQUERADE - struct device *dev = skb->dev; -#endif struct inet_protocol *ipprot; struct sock *raw_sk=NULL; unsigned char hash; @@ -214,7 +255,7 @@ * Do we need to de-masquerade this packet? */ { - int ret = ip_fw_demasquerade(&skb, dev); + int ret = ip_fw_demasquerade(&skb); if (ret < 0) { kfree_skb(skb, FREE_WRITE); return 0; @@ -256,22 +297,23 @@ if((raw_sk = raw_v4_htable[hash]) != NULL) { struct sock *sknext = NULL; struct sk_buff *skb1; - raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr); + raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); if(raw_sk) { /* Any raw sockets */ do { /* Find the next */ sknext = raw_v4_lookup(raw_sk->next, iph->protocol, - iph->saddr, iph->daddr); - if(sknext) + iph->saddr, iph->daddr, skb->dev->ifindex); + if (iph->protocol != IPPROTO_ICMP || !icmp_filter(raw_sk, skb)) { + if (sknext == NULL) + break; skb1 = skb_clone(skb, GFP_ATOMIC); - else - break; /* One pending raw socket left */ - if(skb1) - { - if(ipsec_sk_policy(raw_sk,skb1)) - raw_rcv(raw_sk, skb1); - else - kfree_skb(skb1, FREE_WRITE); + if(skb1) + { + if(ipsec_sk_policy(raw_sk,skb1)) + raw_rcv(raw_sk, skb1); + else + kfree_skb(skb1, FREE_WRITE); + } } raw_sk = sknext; } while(raw_sk!=NULL); @@ -350,15 +392,6 @@ struct ip_options * opt = NULL; int err; -#ifdef CONFIG_NET_IPV6 - /* - * Intercept IPv6 frames. We dump ST-II and invalid types just below.. - */ - - if(iph->version == 6) - return ipv6_rcv(skb,dev,pt); -#endif - /* * When interface is in promisc. mode, drop all the crap * that it receives, do not truing to analyse it. @@ -398,13 +431,18 @@ * is IP we can trim to the true length of the frame. * Note this now means skb->len holds ntohs(iph->tot_len). */ - - skb_trim(skb, ntohs(iph->tot_len)); + __skb_trim(skb, ntohs(iph->tot_len)); if (skb->dst == NULL) { err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev); if (err) goto drop; +#ifdef CONFIG_CPU_IS_SLOW + if (net_cpu_congestion > 10 && !(iph->tos&IPTOS_RELIABILITY) && + IPTOS_PREC(iph->tos) < IPTOS_PREC_INTERNETCONTROL) { + goto drop; + } +#endif } #ifdef CONFIG_IP_ALWAYS_DEFRAG @@ -425,12 +463,12 @@ opt = &(IPCB(skb)->opt); if (opt->srr) { if (!ipv4_config.source_route) { - if (ipv4_config.log_martians) + if (ipv4_config.log_martians && net_ratelimit()) printk(KERN_INFO "source route option %08lx -> %08lx\n", ntohl(iph->saddr), ntohl(iph->daddr)); goto drop; } - if (RT_LOCALADDR(((struct rtable*)skb->dst)->rt_flags) && + if (((struct rtable*)skb->dst)->rt_type == RTN_LOCAL && ip_options_rcv_srr(skb)) goto drop; } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_masq.c linux/net/ipv4/ip_masq.c --- v2.1.67/linux/net/ipv4/ip_masq.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_masq.c Sun Nov 30 14:00:39 1997 @@ -339,7 +339,7 @@ * given boundaries MASQ_BEGIN and MASQ_END. */ -struct ip_masq * ip_masq_new(struct device *dev, int proto, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags) +struct ip_masq * ip_masq_new(__u32 maddr, int proto, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags) { struct ip_masq *ms, *mst; int ports_tried, *free_ports_p; @@ -377,7 +377,7 @@ ms->flags |= IP_MASQ_F_NO_DADDR; /* get masq address from rif */ - ms->maddr = dev->pa_addr; + ms->maddr = maddr; for (ports_tried = 0; ports_tried < *free_ports_p; ports_tried++){ save_flags(flags); @@ -449,7 +449,7 @@ uh->check=0xFFFF; } -int ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev) +int ip_fw_masquerade(struct sk_buff **skb_ptr, __u32 maddr) { struct sk_buff *skb=*skb_ptr; struct iphdr *iph = skb->nh.iph; @@ -489,7 +489,7 @@ if (ms==NULL) { - ms = ip_masq_new(dev, iph->protocol, + ms = ip_masq_new(maddr, iph->protocol, iph->saddr, portptr[0], iph->daddr, portptr[1], 0); @@ -512,7 +512,7 @@ * Attempt ip_masq_app call. * will fix ip_masq and iph seq stuff */ - if (ip_masq_app_pkt_out(ms, skb_ptr, dev) != 0) + if (ip_masq_app_pkt_out(ms, skb_ptr, maddr) != 0) { /* * skb has possibly changed, update pointers. @@ -572,7 +572,7 @@ ip_send_check(iph); #ifdef DEBUG_CONFIG_IP_MASQUERADE - printk("O-routed from %lX:%X over %s\n",ntohl(ms->maddr),ntohs(ms->mport),dev->name); + printk("O-routed from %lX:%X via %lX\n",ntohl(ms->maddr),ntohs(ms->mport),ntohl(maddr)); #endif return 0; @@ -586,7 +586,7 @@ * Currently handles error types - unreachable, quench, ttl exceeded */ -int ip_fw_masq_icmp(struct sk_buff **skb_p, struct device *dev) +int ip_fw_masq_icmp(struct sk_buff **skb_p) { struct sk_buff *skb = *skb_p; struct iphdr *iph = skb->nh.iph; @@ -685,7 +685,7 @@ * Currently handles error types - unreachable, quench, ttl exceeded */ -int ip_fw_demasq_icmp(struct sk_buff **skb_p, struct device *dev) +int ip_fw_demasq_icmp(struct sk_buff **skb_p) { struct sk_buff *skb = *skb_p; struct iphdr *iph = skb->nh.iph; @@ -778,7 +778,7 @@ * this function. */ -int ip_fw_demasquerade(struct sk_buff **skb_p, struct device *dev) +int ip_fw_demasquerade(struct sk_buff **skb_p) { struct sk_buff *skb = *skb_p; struct iphdr *iph = skb->nh.iph; @@ -789,7 +789,7 @@ switch (iph->protocol) { case IPPROTO_ICMP: - return(ip_fw_demasq_icmp(skb_p, dev)); + return(ip_fw_demasq_icmp(skb_p)); case IPPROTO_TCP: case IPPROTO_UDP: /* Make sure packet is in the masq range */ @@ -869,7 +869,7 @@ * will fix ip_masq and iph ack_seq stuff */ - if (ip_masq_app_pkt_in(ms, skb_p, dev) != 0) + if (ip_masq_app_pkt_in(ms, skb_p) != 0) { /* * skb has changed, update pointers. @@ -937,6 +937,7 @@ return 0; } +#ifdef CONFIG_PROC_FS /* * /proc/net entry */ @@ -999,7 +1000,6 @@ return len; } -#ifdef CONFIG_PROC_FS static struct proc_dir_entry proc_net_ipmsqhst = { PROC_NET_IPMSQHST, 13, "ip_masquerade", S_IFREG | S_IRUGO, 1, 0, 0, diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_masq_app.c linux/net/ipv4/ip_masq_app.c --- v2.1.67/linux/net/ipv4/ip_masq_app.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_masq_app.c Sun Nov 30 14:00:39 1997 @@ -306,7 +306,7 @@ * returns (new - old) skb->len diff. */ -int ip_masq_app_pkt_out(struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev) +int ip_masq_app_pkt_out(struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr) { struct ip_masq_app * mapp; struct iphdr *iph; @@ -351,7 +351,7 @@ if ( mapp->pkt_out == NULL ) return 0; - diff = mapp->pkt_out(mapp, ms, skb_p, dev); + diff = mapp->pkt_out(mapp, ms, skb_p, maddr); /* * Update ip_masq seq stuff if len has changed. @@ -369,7 +369,7 @@ * returns (new - old) skb->len diff. */ -int ip_masq_app_pkt_in(struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev) +int ip_masq_app_pkt_in(struct ip_masq *ms, struct sk_buff **skb_p) { struct ip_masq_app * mapp; struct iphdr *iph; @@ -414,7 +414,7 @@ if ( mapp->pkt_in == NULL ) return 0; - diff = mapp->pkt_in(mapp, ms, skb_p, dev); + diff = mapp->pkt_in(mapp, ms, skb_p); /* * Update ip_masq seq stuff if len has changed. diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_masq_ftp.c linux/net/ipv4/ip_masq_ftp.c --- v2.1.67/linux/net/ipv4/ip_masq_ftp.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_masq_ftp.c Sun Nov 30 14:00:39 1997 @@ -50,7 +50,7 @@ } int -masq_ftp_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev) +masq_ftp_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb; struct iphdr *iph; @@ -118,7 +118,7 @@ ip_masq_set_expire(n_ms,0); } else { - n_ms = ip_masq_new(dev, IPPROTO_TCP, + n_ms = ip_masq_new(maddr, IPPROTO_TCP, htonl(from), htons(port), iph->daddr, 0, IP_MASQ_F_NO_DPORT); diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_masq_irc.c linux/net/ipv4/ip_masq_irc.c --- v2.1.67/linux/net/ipv4/ip_masq_irc.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_masq_irc.c Sun Nov 30 14:00:39 1997 @@ -51,7 +51,7 @@ } int -masq_irc_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev) +masq_irc_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb; struct iphdr *iph; @@ -167,7 +167,7 @@ * connection is requested by another client. */ - n_ms = ip_masq_new(dev, IPPROTO_TCP, + n_ms = ip_masq_new(maddr, IPPROTO_TCP, htonl(s_addr),htons(s_port), 0, 0, IP_MASQ_F_NO_DPORT|IP_MASQ_F_NO_DADDR diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_masq_quake.c linux/net/ipv4/ip_masq_quake.c --- v2.1.67/linux/net/ipv4/ip_masq_quake.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_masq_quake.c Sun Nov 30 14:00:39 1997 @@ -73,7 +73,7 @@ } int -masq_quake_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev) +masq_quake_in (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p) { struct sk_buff *skb; struct iphdr *iph; @@ -158,7 +158,7 @@ } int -masq_quake_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev) +masq_quake_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb; struct iphdr *iph; @@ -234,7 +234,7 @@ memcpy(&udp_port, data, 2); - n_ms = ip_masq_new(dev, IPPROTO_UDP, + n_ms = ip_masq_new(maddr, IPPROTO_UDP, ms->saddr, htons(udp_port), ms->daddr, ms->dport, 0); diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_masq_raudio.c linux/net/ipv4/ip_masq_raudio.c --- v2.1.67/linux/net/ipv4/ip_masq_raudio.c Tue May 13 22:41:23 1997 +++ linux/net/ipv4/ip_masq_raudio.c Sun Nov 30 14:00:39 1997 @@ -2,7 +2,7 @@ * IP_MASQ_RAUDIO - Real Audio masquerading module * * - * Version: @(#)$Id: ip_masq_raudio.c,v 1.6 1997/04/29 09:38:26 mj Exp $ + * Version: @(#)$Id: ip_masq_raudio.c,v 1.7 1997/09/16 18:43:40 kuznet Exp $ * * Author: Nigel Metheringham * [strongly based on ftp module by Juan Jose Ciarlante & Wouter Gadeyne] @@ -88,7 +88,7 @@ } int -masq_raudio_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, struct device *dev) +masq_raudio_out (struct ip_masq_app *mapp, struct ip_masq *ms, struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb; struct iphdr *iph; @@ -154,7 +154,7 @@ if (ntohs(msg_id) == 1) { /* This is a message detailing the UDP port to be used */ memcpy(&udp_port, p, 2); - n_ms = ip_masq_new(dev, IPPROTO_UDP, + n_ms = ip_masq_new(maddr, IPPROTO_UDP, ms->saddr, udp_port, ms->daddr, 0, IP_MASQ_F_NO_DPORT); diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_nat_dumb.c linux/net/ipv4/ip_nat_dumb.c --- v2.1.67/linux/net/ipv4/ip_nat_dumb.c Thu Dec 12 06:54:24 1996 +++ linux/net/ipv4/ip_nat_dumb.c Sun Nov 30 14:00:39 1997 @@ -5,6 +5,8 @@ * * Dumb Network Address Translation. * + * Version: $Id: ip_nat_dumb.c,v 1.2 1997/10/10 22:41:05 davem Exp $ + * * Authors: Alexey Kuznetsov, * * This program is free software; you can redistribute it and/or diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_options.c linux/net/ipv4/ip_options.c --- v2.1.67/linux/net/ipv4/ip_options.c Thu May 15 16:48:06 1997 +++ linux/net/ipv4/ip_options.c Sun Nov 30 14:00:39 1997 @@ -5,6 +5,8 @@ * * The options processing module for ip.c * + * Version: $Id: ip_options.c,v 1.12 1997/10/10 22:41:08 davem Exp $ + * * Authors: A.N.Kuznetsov * */ @@ -15,10 +17,10 @@ #include #include #include +#include #include #include #include -#include /* * Write options to IP header, record destination address to @@ -32,7 +34,7 @@ */ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, - u32 daddr, u32 saddr, int is_frag) + u32 daddr, struct rtable *rt, int is_frag) { unsigned char * iph = skb->nh.raw; @@ -46,9 +48,9 @@ if (!is_frag) { if (opt->rr_needaddr) - memcpy(iph+opt->rr+iph[opt->rr+2]-5, &saddr, 4); + ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, rt); if (opt->ts_needaddr) - memcpy(iph+opt->ts+iph[opt->ts+2]-9, &saddr, 4); + ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); if (opt->ts_needtime) { struct timeval tv; __u32 midtime; @@ -147,7 +149,7 @@ if (((struct timestamp*)(dptr+1))->flags == IPOPT_TS_PRESPEC) { __u32 addr; memcpy(&addr, sptr+soffset-9, 4); - if (__ip_chk_addr(addr) == 0) { + if (inet_addr_type(addr) == RTN_UNICAST) { dopt->ts_needtime = 0; dopt->ts_needaddr = 0; soffset -= 8; @@ -248,6 +250,7 @@ unsigned char * optptr; int optlen; unsigned char * pp_ptr = NULL; + struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; if (!opt) { opt = &(IPCB(skb)->opt); @@ -328,7 +331,7 @@ goto error; } if (skb) { - memcpy(&optptr[optptr[2]-1], &skb->dev->pa_addr, 4); + memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); opt->is_changed = 1; } optptr[2] += 4; @@ -371,7 +374,7 @@ } opt->ts = optptr - iph; if (skb) { - memcpy(&optptr[ts->ptr-1], &skb->dev->pa_addr, 4); + memcpy(&optptr[ts->ptr-1], &rt->rt_spec_dst, 4); timeptr = (__u32*)&optptr[ts->ptr+3]; } opt->ts_needaddr = 1; @@ -387,7 +390,7 @@ { u32 addr; memcpy(&addr, &optptr[ts->ptr-1], 4); - if (__ip_chk_addr(addr) == 0) + if (inet_addr_type(addr) == RTN_UNICAST) break; if (skb) timeptr = (__u32*)&optptr[ts->ptr+3]; @@ -521,7 +524,7 @@ if (opt->rr_needaddr) { optptr = (unsigned char *)raw + opt->rr; - memcpy(&optptr[optptr[2]-5], &rt->u.dst.dev->pa_addr, 4); + ip_rt_get_source(&optptr[optptr[2]-5], rt); opt->is_changed = 1; } if (opt->srr_is_hit) { @@ -540,20 +543,20 @@ } if (srrptr + 3 <= srrspace) { opt->is_changed = 1; - memcpy(&optptr[srrptr-1], &rt->u.dst.dev->pa_addr, 4); + ip_rt_get_source(&optptr[srrptr-1], rt); skb->nh.iph->daddr = rt->rt_dst; optptr[2] = srrptr+4; } else printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); if (opt->ts_needaddr) { optptr = raw + opt->ts; - memcpy(&optptr[optptr[2]-9], &rt->u.dst.dev->pa_addr, 4); + ip_rt_get_source(&optptr[optptr[2]-9], rt); opt->is_changed = 1; } - if (opt->is_changed) { - opt->is_changed = 0; - ip_send_check(skb->nh.iph); - } + } + if (opt->is_changed) { + opt->is_changed = 0; + ip_send_check(skb->nh.iph); } } @@ -571,16 +574,16 @@ if (!opt->srr) return 0; - if (rt->rt_flags&(RTF_BROADCAST|RTF_MULTICAST|RTF_NAT) - || skb->pkt_type != PACKET_HOST) + if (skb->pkt_type != PACKET_HOST) return -EINVAL; - - if (!(rt->rt_flags & RTF_LOCAL)) { + if (rt->rt_type == RTN_UNICAST) { if (!opt->is_strictroute) return 0; icmp_send(skb, ICMP_PARAMETERPROB, 0, 16); return -EINVAL; } + if (rt->rt_type != RTN_LOCAL) + return -EINVAL; for (srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4) { if (srrptr + 3 > srrspace) { @@ -591,16 +594,15 @@ rt = (struct rtable*)skb->dst; skb->dst = NULL; - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, - net_alias_main_dev(skb->dev)); + err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); rt2 = (struct rtable*)skb->dst; - if (err || rt2->rt_flags&(RTF_BROADCAST|RTF_MULTICAST|RTF_NAT)) { + if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { ip_rt_put(rt2); skb->dst = &rt->u.dst; return -EINVAL; } ip_rt_put(rt); - if (!(rt2->rt_flags&RTF_LOCAL)) + if (rt2->rt_type != RTN_LOCAL) break; /* Superfast 8) loopback forward */ memcpy(&iph->daddr, &optptr[srrptr-1], 4); diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c --- v2.1.67/linux/net/ipv4/ip_output.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/ip_output.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * The Internet Protocol (IP) output module. * - * Version: @(#)ip.c 1.0.16b 9/1/93 + * Version: $Id: ip_output.c,v 1.40 1997/10/12 17:01:48 kuznet Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -67,7 +67,7 @@ #include #include #include -#include +#include #include static void __inline__ ip_ll_header_reserve(struct sk_buff *skb) @@ -92,7 +92,7 @@ daddr = opt->faddr; err = ip_route_output(&rt, daddr, saddr, RT_TOS(sk->ip_tos) | - (sk->localroute||0), NULL); + (sk->localroute||0), sk->bound_dev_if); if (err) { ip_statistics.IpOutNoRoutes++; @@ -130,7 +130,7 @@ iph->tos = sk->ip_tos; iph->frag_off = 0; if (sk->ip_pmtudisc == IP_PMTUDISC_WANT && - !(rt->rt_flags & RTF_NOPMTUDISC)) + !(rt->rt_flags & RTCF_NOPMTUDISC)) iph->frag_off |= htons(IP_DF); iph->ttl = sk->ip_ttl; iph->daddr = rt->rt_dst; @@ -143,8 +143,7 @@ { iph->ihl += opt->optlen>>2; skb->h.raw += opt->optlen; - ip_options_build(skb, opt, final_daddr, - rt->u.dst.dev->pa_addr, 0); + ip_options_build(skb, opt, final_daddr, rt, 0); } ip_rt_put(rt); @@ -170,9 +169,10 @@ rt = (struct rtable*)sk->dst_cache; if (!rt || rt->u.dst.obsolete) { + sk->dst_cache = NULL; ip_rt_put(rt); err = ip_route_output(&rt, daddr, sk->saddr, RT_TOS(sk->ip_tos) | - (sk->localroute||0), NULL); + (sk->localroute||0), sk->bound_dev_if); if (err) return err; sk->dst_cache = &rt->u.dst; @@ -210,7 +210,7 @@ iph->tos = sk->ip_tos; iph->frag_off = 0; if (sk->ip_pmtudisc == IP_PMTUDISC_WANT && - !(rt->rt_flags & RTF_NOPMTUDISC)) + !(rt->rt_flags & RTCF_NOPMTUDISC)) iph->frag_off |= htons(IP_DF); iph->ttl = sk->ip_ttl; iph->daddr = rt->rt_dst; @@ -223,7 +223,7 @@ return 0; iph->ihl += opt->optlen>>2; skb->h.raw += opt->optlen; - ip_options_build(skb, opt, final_daddr, rt->u.dst.dev->pa_addr, 0); + ip_options_build(skb, opt, final_daddr, rt, 0); return 0; } @@ -242,17 +242,35 @@ #ifdef CONFIG_IP_ACCT ip_fw_chk(skb->nh.iph, skb->dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT); #endif - +#ifdef CONFIG_IP_ROUTE_NAT if (rt->rt_flags & RTCF_NAT) ip_do_nat(skb); +#endif /* * Multicasts are looped back for other local users */ - - if (rt->rt_flags&RTF_MULTICAST && !(dev->flags&IFF_LOOPBACK)) { - if (sk==NULL || sk->ip_mc_loop) - dev_loopback_xmit(skb); + + if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) { +#ifndef CONFIG_IP_MROUTE +#if 1 + /* It should never occur. Delete it eventually. --ANK */ + if (!(rt->rt_flags&RTCF_LOCAL) || (dev->flags&IFF_LOOPBACK)) + printk(KERN_DEBUG "ip_mc_output (mc): it should never occur\n"); + else +#endif +#else + /* Small optimization: do not loopback not local frames, + which returned after forwarding; they will be dropped + by ip_mr_input in any case. + Note, that local frames are looped back to be delivered + to local recipients. + + This check is duplicated in ip_mr_input at the moment. + */ + if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED)) +#endif + dev_loopback_xmit(skb); /* Multicasts with ttl 0 must not go beyond the host */ @@ -262,9 +280,15 @@ } } - if ((rt->rt_flags&(RTF_LOCAL|RTF_BROADCAST)) == (RTF_LOCAL|RTF_BROADCAST) && - !(dev->flags&IFF_LOOPBACK)) + if (rt->rt_flags&RTCF_BROADCAST) { +#if 1 + /* It should never occur. Delete it eventually. --ANK */ + if (!(rt->rt_flags&RTCF_LOCAL) || (dev->flags&IFF_LOOPBACK)) + printk(KERN_DEBUG "ip_mc_output (brd): it should never occur!\n"); + else +#endif dev_loopback_xmit(skb); + } if (dev->flags & IFF_UP) { dev_queue_xmit(skb); @@ -291,8 +315,10 @@ ip_fw_chk(skb->nh.iph, skb->dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT); #endif +#ifdef CONFIG_IP_ROUTE_NAT if (rt->rt_flags&RTCF_NAT) ip_do_nat(skb); +#endif if (dev->flags & IFF_UP) { dev_queue_xmit(skb); @@ -431,8 +457,7 @@ */ { struct rtable *nrt; - if (ip_route_output(&nrt, rt->key.dst, rt->key.src, - rt->key.tos, NULL)) { + if (ip_route_output(&nrt, rt->key.dst, rt->key.src, rt->key.tos, sk?sk->bound_dev_if:0)) { kfree_skb(skb, 0); return; } @@ -500,14 +525,13 @@ int hh_len = rt->u.dst.dev->hard_header_len; int nfrags=0; struct ip_options *opt = ipc->opt; - struct device *dev = rt->u.dst.dev; int df = htons(IP_DF); #ifdef CONFIG_NET_SECURITY int fw_res; #endif if (sk->ip_pmtudisc == IP_PMTUDISC_DONT || - rt->rt_flags&RTF_NOPMTUDISC) + rt->rt_flags&RTCF_NOPMTUDISC) df = 0; @@ -546,7 +570,7 @@ iph->id=htons(ip_id_count++); iph->frag_off = df; iph->ttl=sk->ip_mc_ttl; - if (!(rt->rt_flags&RTF_MULTICAST)) + if (rt->rt_type != RTN_MULTICAST) iph->ttl=sk->ip_ttl; iph->protocol=sk->protocol; iph->saddr=rt->rt_src; @@ -695,14 +719,14 @@ if (opt) { iph->ihl += opt->optlen>>2; ip_options_build(skb, opt, - ipc->addr, dev->pa_addr, offset); + ipc->addr, rt, offset); } iph->tos = sk->ip_tos; iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4); iph->id = id; iph->frag_off = htons(offset>>3); iph->frag_off |= mf|df; - if (rt->rt_flags&RTF_MULTICAST) + if (rt->rt_type == RTN_MULTICAST) iph->ttl = sk->ip_mc_ttl; else iph->ttl = sk->ip_ttl; @@ -966,7 +990,7 @@ if (ipc.opt->srr) daddr = replyopts.opt.faddr; - if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), NULL)) + if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) return NULL; iphlen = sizeof(struct iphdr) + replyopts.opt.optlen; @@ -1000,7 +1024,7 @@ iph->saddr = rt->rt_src; iph->protocol = skb->nh.iph->protocol; - ip_options_build(reply, &replyopts.opt, daddr, rt->u.dst.dev->pa_addr, 0); + ip_options_build(reply, &replyopts.opt, daddr, rt, 0); return reply; } @@ -1019,43 +1043,16 @@ }; -/* - * Device notifier - */ - -static int ip_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct device *dev=ptr; - - if (dev->family != AF_INET) - return NOTIFY_DONE; - - if(event==NETDEV_UP) - { - /* - * Join the initial group if multicast. - */ - ip_mc_allhost(dev); - } - if(event==NETDEV_DOWN) - ip_mc_drop_device(dev); - - return ip_rt_event(event, dev); -} - -struct notifier_block ip_netdev_notifier={ - ip_netdev_event, - NULL, - 0 -}; #ifdef CONFIG_PROC_FS +#ifdef CONFIG_IP_MULTICAST static struct proc_dir_entry proc_net_igmp = { PROC_NET_IGMP, 4, "igmp", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, ip_mc_procinfo }; +#endif #endif /* @@ -1068,11 +1065,10 @@ ip_rt_init(); - /* So we flush routes and multicast lists when a device is downed */ - register_netdevice_notifier(&ip_netdev_notifier); - #ifdef CONFIG_PROC_FS +#ifdef CONFIG_IP_MULTICAST proc_net_register(&proc_net_igmp); +#endif #endif } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ip_sockglue.c linux/net/ipv4/ip_sockglue.c --- v2.1.67/linux/net/ipv4/ip_sockglue.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/ip_sockglue.c Sun Nov 30 14:00:39 1997 @@ -5,6 +5,8 @@ * * The IP to API glue. * + * Version: $Id: ip_sockglue.c,v 1.28 1997/11/17 17:36:08 kuznet Exp $ + * * Authors: see ip.c * * Fixes: @@ -27,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -36,34 +39,47 @@ #include +#define IP_CMSG_PKTINFO 1 +#define IP_CMSG_TTL 2 +#define IP_CMSG_TOS 4 +#define IP_CMSG_RECVOPTS 8 +#define IP_CMSG_RETOPTS 16 + /* * SOL_IP control messages. */ -static void ip_cmsg_recv_rxinfo(struct msghdr *msg, struct sk_buff *skb) +static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { struct in_pktinfo info; struct rtable *rt = (struct rtable *)skb->dst; - info.ipi_ifindex = skb->dev->ifindex; info.ipi_addr.s_addr = skb->nh.iph->daddr; - info.ipi_spec_dst.s_addr = rt->rt_spec_dst; + if (rt) { + info.ipi_ifindex = rt->rt_iif; + info.ipi_spec_dst.s_addr = rt->rt_spec_dst; + } else { + info.ipi_ifindex = 0; + info.ipi_spec_dst.s_addr = 0; + } - put_cmsg(msg, SOL_IP, IP_RXINFO, sizeof(info), &info); + put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } -static void ip_cmsg_recv_localaddr(struct msghdr *msg, struct sk_buff *skb, int local) +static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb) { - struct in_addr addr; + if (IPCB(skb)->opt.optlen == 0) + return; - addr.s_addr = skb->nh.iph->daddr; + put_cmsg(msg, SOL_IP, IP_TTL, 1, &skb->nh.iph->ttl); +} - if (local) { - struct rtable *rt = (struct rtable *)skb->dst; - addr.s_addr = rt->rt_spec_dst; - } - put_cmsg(msg, SOL_IP, local ? IP_LOCALADDR : IP_RECVDSTADDR, - sizeof(addr), &addr); +static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb) +{ + if (IPCB(skb)->opt.optlen == 0) + return; + + put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos); } static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) @@ -99,26 +115,30 @@ /* Ordered by supposed usage frequency */ if (flags & 1) - ip_cmsg_recv_rxinfo(msg, skb); + ip_cmsg_recv_pktinfo(msg, skb); if ((flags>>=1) == 0) return; + if (flags & 1) - ip_cmsg_recv_localaddr(msg, skb, 1); + ip_cmsg_recv_ttl(msg, skb); if ((flags>>=1) == 0) return; + if (flags & 1) - ip_cmsg_recv_opts(msg, skb); + ip_cmsg_recv_tos(msg, skb); if ((flags>>=1) == 0) return; + if (flags & 1) - ip_cmsg_recv_retopts(msg, skb); + ip_cmsg_recv_opts(msg, skb); if ((flags>>=1) == 0) return; + if (flags & 1) - ip_cmsg_recv_localaddr(msg, skb, 0); + ip_cmsg_recv_retopts(msg, skb); } -int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc, struct device **devp) +int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) { int err; struct cmsghdr *cmsg; @@ -127,27 +147,19 @@ if (cmsg->cmsg_level != SOL_IP) continue; switch (cmsg->cmsg_type) { - case IP_LOCALADDR: - if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_addr))) - return -EINVAL; - memcpy(&ipc->addr, CMSG_DATA(cmsg), sizeof(struct in_addr)); - break; case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40, 0); if (err) return err; break; - case IP_TXINFO: + case IP_PKTINFO: { struct in_pktinfo *info; if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) return -EINVAL; info = (struct in_pktinfo *)CMSG_DATA(cmsg); - if (info->ipi_ifindex && !devp) - return -EINVAL; - if ((*devp = dev_get_by_index(info->ipi_ifindex)) == NULL) - return -ENODEV; + ipc->oif = info->ipi_ifindex; ipc->addr = info->ipi_spec_dst.s_addr; break; } @@ -158,6 +170,53 @@ return 0; } + +/* Special input handler for packets catched by router alert option. + They are selected only by protocol field, and then processed likely + local ones; but only if someone wants them! Otherwise, router + not running rsvpd will kill RSVP. + + It is user level problem, what it will make with them. + I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), + but receiver should be enough clever f.e. to forward mtrace requests, + sent to multicast group to reach destination designated router. + */ +struct ip_ra_chain *ip_ra_chain; + +int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)) +{ + struct ip_ra_chain *ra, *new_ra, **rap; + + if (sk->type != SOCK_RAW || sk->num == IPPROTO_RAW) + return -EINVAL; + + new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + + for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) { + if (ra->sk == sk) { + if (on) { + if (new_ra) + kfree(new_ra); + return -EADDRINUSE; + } + *rap = ra->next; + if (ra->destructor) + ra->destructor(sk); + kfree(ra); + return 0; + } + } + if (new_ra == NULL) + return -ENOBUFS; + new_ra->sk = sk; + new_ra->destructor = destructor; + start_bh_atomic(); + new_ra->next = ra; + *rap = new_ra; + end_bh_atomic(); + return 0; +} + /* * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on * an IP socket. @@ -168,7 +227,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { int val=0,err; - unsigned char ucval = 0; #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT) struct ip_fw tmp_fw; #endif @@ -177,9 +235,12 @@ if(get_user(val, (int *) optval)) return -EFAULT; } else if(optlen>=sizeof(char)) { + unsigned char ucval; if(get_user(ucval, (unsigned char *) optval)) return -EFAULT; + val = (int)ucval; } + /* If optlen==0, it is equivalent to val == 0 */ if(level!=SOL_IP) return -ENOPROTOOPT; @@ -213,50 +274,38 @@ kfree_s(old_opt, sizeof(struct ip_options) + old_opt->optlen); return 0; } - case IP_RXINFO: - if (optlen<4) - return -EINVAL; + case IP_PKTINFO: if (val) - sk->ip_cmsg_flags |= 1; + sk->ip_cmsg_flags |= IP_CMSG_PKTINFO; else - sk->ip_cmsg_flags &= ~1; + sk->ip_cmsg_flags &= ~IP_CMSG_PKTINFO; return 0; - case IP_LOCALADDR: - if (optlen<4) - return -EINVAL; + case IP_RECVTTL: if (val) - sk->ip_cmsg_flags |= 2; + sk->ip_cmsg_flags |= IP_CMSG_TTL; else - sk->ip_cmsg_flags &= ~2; + sk->ip_cmsg_flags &= ~IP_CMSG_TTL; return 0; - case IP_RECVOPTS: - if (optlen<4) - return -EINVAL; + case IP_RECVTOS: if (val) - sk->ip_cmsg_flags |= 4; + sk->ip_cmsg_flags |= IP_CMSG_TOS; else - sk->ip_cmsg_flags &= ~4; + sk->ip_cmsg_flags &= ~IP_CMSG_TOS; return 0; - case IP_RETOPTS: - if (optlen<4) - return -EINVAL; + case IP_RECVOPTS: if (val) - sk->ip_cmsg_flags |= 8; + sk->ip_cmsg_flags |= IP_CMSG_RECVOPTS; else - sk->ip_cmsg_flags &= ~8; + sk->ip_cmsg_flags &= ~IP_CMSG_RECVOPTS; return 0; - case IP_RECVDSTADDR: - if (optlen<4) - return -EINVAL; + case IP_RETOPTS: if (val) - sk->ip_cmsg_flags |= 0x10; + sk->ip_cmsg_flags |= IP_CMSG_RETOPTS; else - sk->ip_cmsg_flags &= ~0x10; + sk->ip_cmsg_flags &= ~IP_CMSG_RETOPTS; return 0; case IP_TOS: /* This sets both TOS and Precedence */ /* Reject setting of unused bits */ - if (optlen<4) - return -EINVAL; if (val & ~(IPTOS_TOS_MASK|IPTOS_PREC_MASK)) return -EINVAL; if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP && !suser()) @@ -274,29 +323,25 @@ sk->priority = rt_tos2priority(val); return 0; case IP_TTL: - if (optlen<4) + if (optlen<1) return -EINVAL; + if(val==-1) + val = ip_statistics.IpDefaultTTL; if(val<1||val>255) return -EINVAL; sk->ip_ttl=val; return 0; case IP_HDRINCL: - if (optlen<4) - return -EINVAL; if(sk->type!=SOCK_RAW) return -ENOPROTOOPT; sk->ip_hdrincl=val?1:0; return 0; case IP_PMTUDISC: - if (optlen<4) - return -EINVAL; if (val<0 || val>2) return -EINVAL; sk->ip_pmtudisc = val; return 0; case IP_RECVERR: - if (optlen<4) - return -EINVAL; if (sk->type==SOCK_STREAM) return -ENOPROTOOPT; lock_sock(sk); @@ -312,211 +357,81 @@ case IP_MULTICAST_TTL: if (optlen<1) return -EINVAL; - sk->ip_mc_ttl=(int)ucval; + if (val==-1) + val = 1; + if (val < 0 || val > 255) + return -EINVAL; + sk->ip_mc_ttl=val; return 0; case IP_MULTICAST_LOOP: if (optlen<1) return -EINVAL; - if(ucval!=0 && ucval!=1) - return -EINVAL; - sk->ip_mc_loop=(int)ucval; + sk->ip_mc_loop = val ? 1 : 0; return 0; case IP_MULTICAST_IF: { - struct in_addr addr; + struct ip_mreqn mreq; struct device *dev = NULL; /* * Check the arguments are allowable */ - if(optlenip_mc_index = 0; - return 0; - } - - /* - * Find the device - */ - - dev=ip_dev_find(addr.s_addr, NULL); - - /* - * Did we find one - */ - - if(dev) - { - sk->ip_mc_index = dev->ifindex; - return 0; + if (optlen >= sizeof(struct ip_mreqn)) { + if (copy_from_user(&mreq,optval,sizeof(mreq))) + return -EFAULT; + } else { + memset(&mreq, 0, sizeof(mreq)); + if (optlen >= sizeof(struct in_addr) && + copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr))) + return -EFAULT; } - return -EADDRNOTAVAIL; - } - - - case IP_ADD_MEMBERSHIP: - { - -/* - * FIXME: Add/Del membership should have a semaphore protecting them from re-entry - */ - struct ip_mreq mreq; - struct rtable *rt; - struct device *dev=NULL; - - /* - * Check the arguments. - */ - - if(optlenu.dst.dev; - ip_rt_put(rt); - } else - dev = ip_dev_find(mreq.imr_interface.s_addr, NULL); - - /* - * No device, no cookies. - */ - - if(!dev) - return -ENODEV; - - /* - * Join group. - */ - - return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr); - } - - case IP_DROP_MEMBERSHIP: - { - struct ip_mreq mreq; - struct rtable *rt; - struct device *dev=NULL; - - /* - * Check the arguments - */ - - if(optlenu.dst.dev; - ip_rt_put(rt); - } else - dev = ip_dev_find(mreq.imr_interface.s_addr, NULL); - - /* - * Did we find a suitable device. - */ - - if(!dev) - return -ENODEV; - - /* - * Leave group - */ - - return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr); - } - - case IP_MULTICAST_IFN: - { - struct ip_mreqn mreq; - struct device *dev = NULL; - - if(optlenip_mc_index = 0; sk->ip_mc_addr = 0; return 0; } - dev = ip_dev_find(mreq.imr_address.s_addr, NULL); + dev = ip_dev_find(mreq.imr_address.s_addr); } else dev = dev_get_by_index(mreq.imr_ifindex); if (!dev) - return -ENODEV; + return -EADDRNOTAVAIL; + + if (sk->bound_dev_if && dev->ifindex != sk->bound_dev_if) + return -EINVAL; sk->ip_mc_index = mreq.imr_ifindex; sk->ip_mc_addr = mreq.imr_address.s_addr; return 0; } - case IP_ADD_MEMBERSHIPN: - { - struct ip_mreqn mreq; - struct device *dev = NULL; - if(optlen= sizeof(struct ip_mreqn)) { + if(copy_from_user(&mreq,optval,sizeof(mreq))) + return -EFAULT; + } else { + memset(&mreq, 0, sizeof(mreq)); + if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq))) + return -EFAULT; + } - dev=dev_get_by_index(mreq.imr_ifindex); - if(!dev) - return -ENODEV; - - return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr); + if (optname == IP_ADD_MEMBERSHIP) + return ip_mc_join_group(sk,&mreq); + else + return ip_mc_leave_group(sk,&mreq); } + case IP_ROUTER_ALERT: + return ip_ra_control(sk, val ? 1 : 0, NULL); + #ifdef CONFIG_IP_FIREWALL case IP_FW_INSERT_IN: case IP_FW_INSERT_OUT: @@ -616,21 +531,21 @@ return -EFAULT; return 0; } - case IP_RXINFO: - val = (sk->ip_cmsg_flags & 1) != 0; - return 0; - case IP_LOCALADDR: - val = (sk->ip_cmsg_flags & 2) != 0; - return 0; + case IP_PKTINFO: + val = (sk->ip_cmsg_flags & IP_CMSG_PKTINFO) != 0; + break; + case IP_RECVTTL: + val = (sk->ip_cmsg_flags & IP_CMSG_TTL) != 0; + break; + case IP_RECVTOS: + val = (sk->ip_cmsg_flags & IP_CMSG_TOS) != 0; + break; case IP_RECVOPTS: - val = (sk->ip_cmsg_flags & 4) != 0; - return 0; + val = (sk->ip_cmsg_flags & IP_CMSG_RECVOPTS) != 0; + break; case IP_RETOPTS: - val = (sk->ip_cmsg_flags & 8) != 0; - return 0; - case IP_RECVDSTADDR: - val = (sk->ip_cmsg_flags & 0x10) != 0; - return 0; + val = (sk->ip_cmsg_flags & IP_CMSG_RETOPTS) != 0; + break; case IP_TOS: val=sk->ip_tos; break; @@ -642,17 +557,18 @@ break; case IP_PMTUDISC: val=sk->ip_pmtudisc; - return 0; + break; case IP_RECVERR: val=sk->ip_recverr; - return 0; + break; case IP_MULTICAST_TTL: val=sk->ip_mc_ttl; break; case IP_MULTICAST_LOOP: val=sk->ip_mc_loop; break; - case IP_MULTICAST_IFN: +#if 0 + case IP_MULTICAST_IF: { struct ip_mreqn mreq; len = min(len,sizeof(struct ip_mreqn)); @@ -665,9 +581,13 @@ return -EFAULT; return 0; } +#endif case IP_MULTICAST_IF: { struct device *dev = dev_get_by_index(sk->ip_mc_index); + + printk(KERN_INFO "application %s uses old get IP_MULTICAST_IF. Please, report!\n", current->comm); + if (dev == NULL) { len = 0; @@ -689,11 +609,19 @@ return(-ENOPROTOOPT); } - len=min(sizeof(int),len); - - if(put_user(len, optlen)) - return -EFAULT; - if(copy_to_user(optval,&val,len)) - return -EFAULT; + if (len < sizeof(int) && len > 0 && val>=0 && val<255) { + unsigned char ucval = (unsigned char)val; + len = 1; + if(put_user(len, optlen)) + return -EFAULT; + if(copy_to_user(optval,&ucval,1)) + return -EFAULT; + } else { + len=min(sizeof(int),len); + if(put_user(len, optlen)) + return -EFAULT; + if(copy_to_user(optval,&val,len)) + return -EFAULT; + } return 0; } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ipconfig.c linux/net/ipv4/ipconfig.c --- v2.1.67/linux/net/ipv4/ipconfig.c Wed Dec 31 16:00:00 1969 +++ linux/net/ipv4/ipconfig.c Sun Nov 30 14:00:39 1997 @@ -0,0 +1,1160 @@ +/* + * $Id: ipconfig.c,v 1.5 1997/10/27 16:08:02 mj Exp $ + * + * Automatic Configuration of IP -- use BOOTP or RARP or user-supplied + * information to configure own IP address and routes. + * + * Copyright (C) 1996, 1997 Martin Mares + * + * Derived from network configuration code in fs/nfs/nfsroot.c, + * originally Copyright (C) 1995, 1996 Gero Kuhlmann and me. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Define this to allow debugging output */ +#undef IPCONFIG_DEBUG + +#ifdef IPCONFIG_DEBUG +#define DBG(x) printk x +#else +#define DBG(x) do { } while(0) +#endif + +/* Define the timeout for waiting for a RARP/BOOTP reply */ +#define CONF_BASE_TIMEOUT (HZ*5) /* Initial timeout: 5 seconds */ +#define CONF_RETRIES 10 /* 10 retries */ +#define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */ +#define CONF_TIMEOUT_MULT *5/4 /* Rate of timeout growth */ +#define CONF_TIMEOUT_MAX (HZ*30) /* Maximum allowed timeout */ + +/* IP configuration */ +static char user_dev_name[IFNAMSIZ] __initdata = { 0, };/* Name of user-selected boot device */ +u32 ic_myaddr __initdata = INADDR_NONE; /* My IP address */ +u32 ic_servaddr __initdata = INADDR_NONE; /* Server IP address */ +u32 ic_gateway __initdata = INADDR_NONE; /* Gateway IP address */ +u32 ic_netmask __initdata = INADDR_NONE; /* Netmask for local subnet */ +int ic_bootp_flag __initdata = 1; /* Use BOOTP */ +int ic_rarp_flag __initdata = 1; /* Use RARP */ +int ic_enable __initdata = 1; /* Automatic IP configuration enabled */ +int ic_host_name_set __initdata = 0; /* Host name configured manually */ +int ic_set_manually __initdata = 0; /* IPconfig parameters set manually */ + +u32 root_server_addr __initdata = INADDR_NONE; /* Address of boot server */ +u8 root_server_path[256] __initdata = { 0, }; /* Path to mount as root */ + +#if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_RARP) + +#define CONFIG_IP_PNP_DYNAMIC + +static int ic_got_reply __initdata = 0; + +#define IC_GOT_BOOTP 1 +#define IC_GOT_RARP 2 + +#endif + +/* + * Network devices + */ + +struct ic_device { + struct ic_device *next; + struct device *dev; + unsigned short flags; +}; + +static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */ +static struct device *ic_dev __initdata = NULL; /* Selected device */ +static int bootp_dev_count __initdata = 0; /* BOOTP capable devices */ +static int rarp_dev_count __initdata = 0; /* RARP capable devices */ + +__initfunc(int ic_open_devs(void)) +{ + struct ic_device *d, **last; + struct device *dev; + unsigned short oflags; + + last = &ic_first_dev; + for (dev = dev_base; dev; dev = dev->next) + if (dev->type < ARPHRD_SLIP && + !(dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) && + strncmp(dev->name, "dummy", 5) && + (!user_dev_name[0] || !strcmp(dev->name, user_dev_name))) { + oflags = dev->flags; + if (dev_change_flags(dev, oflags | IFF_UP) < 0) { + printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name); + continue; + } + if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) + return -1; + d->dev = dev; + *last = d; + last = &d->next; + d->flags = oflags; + bootp_dev_count++; + if (!(dev->flags & IFF_NOARP)) + rarp_dev_count++; + DBG(("IP-Config: Opened %s\n", dev->name)); + } + *last = NULL; + + if (!bootp_dev_count) { + if (user_dev_name[0]) + printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); + else + printk(KERN_ERR "IP-Config: No network devices available.\n"); + return -1; + } + return 0; +} + +__initfunc(void ic_close_devs(void)) +{ + struct ic_device *d, *next; + struct device *dev; + + next = ic_first_dev; + while ((d = next)) { + next = d->next; + dev = d->dev; + if (dev != ic_dev) { + DBG(("IP-Config: Downing %s\n", dev->name)); + dev_change_flags(dev, d->flags); + } + kfree_s(d, sizeof(struct ic_device)); + } +} + +/* + * Interface to various network functions. + */ + +static inline void +set_sockaddr(struct sockaddr_in *sin, u32 addr, u16 port) +{ + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = addr; + sin->sin_port = port; +} + +__initfunc(static int ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)) +{ + int res; + + mm_segment_t oldfs = get_fs(); + set_fs(get_ds()); + res = devinet_ioctl(cmd, arg); + set_fs(oldfs); + return res; +} + +__initfunc(static int ic_route_ioctl(unsigned int cmd, struct rtentry *arg)) +{ + int res; + + mm_segment_t oldfs = get_fs(); + set_fs(get_ds()); + res = ip_rt_ioctl(cmd, arg); + set_fs(oldfs); + return res; +} + +/* + * Set up interface addresses and routes. + */ + +__initfunc(static int ic_setup_if(void)) +{ + struct ifreq ir; + struct sockaddr_in *sin = (void *) &ir.ifr_ifru.ifru_addr; + int err; + + memset(&ir, 0, sizeof(ir)); + strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); + set_sockaddr(sin, ic_myaddr, 0); + if ((err = ic_dev_ioctl(SIOCSIFADDR, &ir)) < 0) { + printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err); + return -1; + } + set_sockaddr(sin, ic_netmask, 0); + if ((err = ic_dev_ioctl(SIOCSIFNETMASK, &ir)) < 0) { + printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err); + return -1; + } + set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); + if ((err = ic_dev_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { + printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err); + return -1; + } + return 0; +} + +__initfunc(int ic_setup_routes(void)) +{ + /* No need to setup device routes, only the default route... */ + + if (ic_gateway != INADDR_NONE) { + struct rtentry rm; + int err; + + memset(&rm, 0, sizeof(rm)); + if ((ic_gateway ^ ic_myaddr) & ic_netmask) { + printk(KERN_ERR "IP-Config: Gateway not on directly connected network.\n"); + return -1; + } + set_sockaddr((struct sockaddr_in *) &rm.rt_dst, 0, 0); + set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0); + set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0); + rm.rt_flags = RTF_UP | RTF_GATEWAY; + if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) { + printk(KERN_ERR "IP-Config: Cannot add default route (%d).\n", err); + return -1; + } + } + + return 0; +} + +/* + * Fill in default values for all missing parameters. + */ + +__initfunc(int ic_defaults(void)) +{ + if (!ic_host_name_set) + strcpy(system_utsname.nodename, in_ntoa(ic_myaddr)); + + if (root_server_addr == INADDR_NONE) + root_server_addr = ic_servaddr; + + if (ic_netmask == INADDR_NONE) { + if (IN_CLASSA(ic_myaddr)) + ic_netmask = IN_CLASSA_NET; + else if (IN_CLASSB(ic_myaddr)) + ic_netmask = IN_CLASSB_NET; + else if (IN_CLASSC(ic_myaddr)) + ic_netmask = IN_CLASSC_NET; + else { + printk(KERN_ERR "IP-Config: Unable to guess netmask for address %08x\n", ic_myaddr); + return -1; + } + } + + return 0; +} + +/* + * RARP support. + */ + +#ifdef CONFIG_IP_PNP_RARP + +static int ic_rarp_recv(struct sk_buff *skb, struct device *dev, + struct packet_type *pt); + +static struct packet_type rarp_packet_type __initdata = { + 0, /* Should be: __constant_htons(ETH_P_RARP) + * - but this _doesn't_ come out constant! */ + NULL, /* Listen to all devices */ + ic_rarp_recv, + NULL, + NULL +}; + +__initfunc(static void ic_rarp_init(void)) +{ + rarp_packet_type.type = htons(ETH_P_RARP); + dev_add_pack(&rarp_packet_type); +} + +__initfunc(static void ic_rarp_cleanup(void)) +{ + dev_remove_pack(&rarp_packet_type); +} + +/* + * Process received RARP packet. + */ +__initfunc(static int +ic_rarp_recv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)) +{ + struct arphdr *rarp = (struct arphdr *)skb->h.raw; + unsigned char *rarp_ptr = (unsigned char *) (rarp + 1); + unsigned long sip, tip; + unsigned char *sha, *tha; /* s for "source", t for "target" */ + + /* If this test doesn't pass, it's not IP, or we should ignore it anyway */ + if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd)) + goto drop; + + /* If it's not a RARP reply, delete it. */ + if (rarp->ar_op != htons(ARPOP_RREPLY)) + goto drop; + + /* If it's not ethernet, delete it. */ + if (rarp->ar_pro != htons(ETH_P_IP)) + goto drop; + + /* Extract variable-width fields */ + sha = rarp_ptr; + rarp_ptr += dev->addr_len; + memcpy(&sip, rarp_ptr, 4); + rarp_ptr += 4; + tha = rarp_ptr; + rarp_ptr += dev->addr_len; + memcpy(&tip, rarp_ptr, 4); + + /* Discard packets which are not meant for us. */ + if (memcmp(tha, dev->dev_addr, dev->addr_len)) + goto drop; + + /* Discard packets which are not from specified server. */ + if (ic_servaddr != INADDR_NONE && ic_servaddr != sip) + goto drop; + + /* Victory! The packet is what we were looking for! */ + if (!ic_got_reply) { + ic_got_reply = IC_GOT_RARP; + ic_dev = dev; + if (ic_myaddr == INADDR_NONE) + ic_myaddr = tip; + ic_servaddr = sip; + } + + /* And throw the packet out... */ +drop: + kfree_skb(skb, FREE_READ); + return 0; +} + + +/* + * Send RARP request packet over all devices which allow RARP. + */ +__initfunc(static void ic_rarp_send(void)) +{ + struct ic_device *d; + + for (d=ic_first_dev; d; d=d->next) { + struct device *dev = d->dev; + if (!(dev->flags & IFF_NOARP)) + arp_send(ARPOP_RREQUEST, ETH_P_RARP, 0, dev, 0, NULL, + dev->dev_addr, dev->dev_addr); + } +} + +#endif + +/* + * BOOTP support. + */ + +#ifdef CONFIG_IP_PNP_BOOTP + +static struct socket *ic_bootp_xmit_sock __initdata = NULL; /* BOOTP send socket */ +static struct socket *ic_bootp_recv_sock __initdata = NULL; /* BOOTP receive socket */ + +struct bootp_pkt { /* BOOTP packet format */ + u8 op; /* 1=request, 2=reply */ + u8 htype; /* HW address type */ + u8 hlen; /* HW address length */ + u8 hops; /* Used only by gateways */ + u32 xid; /* Transaction ID */ + u16 secs; /* Seconds since we started */ + u16 flags; /* Just what it says */ + u32 client_ip; /* Client's IP address if known */ + u32 your_ip; /* Assigned IP address */ + u32 server_ip; /* Server's IP address */ + u32 relay_ip; /* IP address of BOOTP relay */ + u8 hw_addr[16]; /* Client's HW address */ + u8 serv_name[64]; /* Server host name */ + u8 boot_file[128]; /* Name of boot file */ + u8 vendor_area[128]; /* Area for extensions */ +}; + +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +static struct bootp_pkt *ic_xmit_bootp __initdata = NULL; /* Packet being transmitted */ +static struct bootp_pkt *ic_recv_bootp __initdata = NULL; /* Packet being received */ + +/* + * Dirty tricks for BOOTP packet routing. We replace the standard lookup function + * for the local fib by our version which does fake lookups and returns our private + * fib entries. Ugly, but it seems to be the simplest way to do the job. + */ + +static void *ic_old_local_lookup __initdata = NULL; /* Old local routing table lookup function */ +static struct fib_info *ic_bootp_tx_fib __initdata = NULL; /* Our fake fib entries */ +static struct fib_info *ic_bootp_rx_fib __initdata = NULL; + +__initfunc(static int ic_bootp_route_lookup(struct fib_table *tb, const struct rt_key *key, + struct fib_result *res)) +{ + static u32 ic_brl_zero = 0; + + DBG(("BOOTP: Route lookup: %d:%08x -> %d:%08x: ", key->iif, key->src, key->oif, key->dst)); + res->scope = RT_SCOPE_UNIVERSE; + res->prefix = &ic_brl_zero; + res->prefixlen = 0; + res->nh_sel = 0; + if (key->src == 0 && key->dst == 0xffffffff && key->iif == loopback_dev.ifindex) { /* Packet output */ + DBG(("Output\n")); + res->type = RTN_UNICAST; + res->fi = ic_bootp_tx_fib; + } else if (key->iif && key->iif != loopback_dev.ifindex && key->oif == 0) { /* Packet input */ + DBG(("Input\n")); + res->type = RTN_LOCAL; + res->fi = ic_bootp_rx_fib; + } else if (!key->iif && !key->oif && !key->src) { /* Address check by inet_addr_type() */ + DBG(("Check\n")); + res->type = RTN_UNICAST; + res->fi = ic_bootp_tx_fib; + } else { + DBG(("Drop\n")); + return -EINVAL; + } + return 0; +} + +__initfunc(static int ic_set_bootp_route(struct ic_device *d)) +{ + struct fib_info *f = ic_bootp_tx_fib; + struct fib_nh *n = &f->fib_nh[0]; + + n->nh_dev = d->dev; + n->nh_oif = n->nh_dev->ifindex; + rt_cache_flush(0); + return 0; +} + +__initfunc(static int ic_bootp_route_init(void)) +{ + int size = sizeof(struct fib_info) + sizeof(struct fib_nh); + struct fib_info *rf, *tf; + struct fib_nh *nh; + + if (!(rf = ic_bootp_rx_fib = kmalloc(size, GFP_KERNEL)) || + !(tf = ic_bootp_tx_fib = kmalloc(size, GFP_KERNEL))) + return -1; + + memset(rf, 0, size); + rf->fib_nhs = 1; + nh = &rf->fib_nh[0]; + nh->nh_scope = RT_SCOPE_UNIVERSE; + + memset(tf, 0, size); + rf->fib_nhs = 1; + nh = &rf->fib_nh[0]; + nh->nh_dev = ic_first_dev->dev; + nh->nh_scope = RT_SCOPE_UNIVERSE; + nh->nh_oif = nh->nh_dev->ifindex; + + /* Dirty trick: replace standard routing table lookup by our function */ + ic_old_local_lookup = local_table->tb_lookup; + local_table->tb_lookup = ic_bootp_route_lookup; + + return 0; +} + +__initfunc(static void ic_bootp_route_cleanup(void)) +{ + if (ic_old_local_lookup) + local_table->tb_lookup = ic_old_local_lookup; + if (ic_bootp_rx_fib) + kfree_s(ic_bootp_rx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh)); + if (ic_bootp_tx_fib) + kfree_s(ic_bootp_tx_fib, sizeof(struct fib_info) + sizeof(struct fib_nh)); +} + + +/* + * Allocation and freeing of BOOTP packet buffers. + */ +__initfunc(static int ic_bootp_alloc(void)) +{ + if (!(ic_xmit_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL)) || + !(ic_recv_bootp = kmalloc(sizeof(struct bootp_pkt), GFP_KERNEL))) { + printk(KERN_ERR "BOOTP: Out of memory!\n"); + return -1; + } + return 0; +} + +__initfunc(static void ic_bootp_free(void)) +{ + if (ic_xmit_bootp) { + kfree_s(ic_xmit_bootp, sizeof(struct bootp_pkt)); + ic_xmit_bootp = NULL; + } + if (ic_recv_bootp) { + kfree_s(ic_recv_bootp, sizeof(struct bootp_pkt)); + ic_recv_bootp = NULL; + } +} + + +/* + * Add / Remove fake interface addresses for BOOTP packet sending. + */ +__initfunc(static int ic_bootp_addrs_add(void)) +{ + struct ic_device *d; + int err; + + for(d=ic_first_dev; d; d=d->next) + if ((err = inet_add_bootp_addr(d->dev)) < 0) { + printk(KERN_ERR "BOOTP: Unable to set interface address\n"); + return -1; + } + return 0; +} + +__initfunc(static void ic_bootp_addrs_del(void)) +{ + struct ic_device *d; + + for(d=ic_first_dev; d; d=d->next) + inet_del_bootp_addr(d->dev); +} + +/* + * UDP socket operations. + */ +__initfunc(static int ic_udp_open(struct socket **sock)) +{ + int err; + + if ((err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_UDP, sock)) < 0) + printk(KERN_ERR "BOOTP: Cannot open UDP socket!\n"); + return err; +} + +static inline void ic_udp_close(struct socket *sock) +{ + if (sock) + sock_release(sock); +} + +__initfunc(static int ic_udp_connect(struct socket *sock, u32 addr, u16 port)) +{ + struct sockaddr_in sa; + int err; + + set_sockaddr(&sa, htonl(addr), htons(port)); + err = sock->ops->connect(sock, (struct sockaddr *) &sa, sizeof(sa), 0); + if (err < 0) { + printk(KERN_ERR "BOOTP: connect() failed (%d)\n", err); + return -1; + } + return 0; +} + +__initfunc(static int ic_udp_bind(struct socket *sock, u32 addr, u16 port)) +{ + struct sockaddr_in sa; + int err; + + set_sockaddr(&sa, htonl(addr), htons(port)); + err = sock->ops->bind(sock, (struct sockaddr *) &sa, sizeof(sa)); + if (err < 0) { + printk(KERN_ERR "BOOTP: bind() failed (%d)\n", err); + return -1; + } + return 0; +} + +__initfunc(static int ic_udp_send(struct socket *sock, void *buf, int size)) +{ + mm_segment_t oldfs; + int result; + struct msghdr msg; + struct iovec iov; + + oldfs = get_fs(); + set_fs(get_ds()); + iov.iov_base = buf; + iov.iov_len = size; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + result = sock_sendmsg(sock, &msg, size); + set_fs(oldfs); + + return (result != size); +} + +__initfunc(static int ic_udp_recv(struct socket *sock, void *buf, int size)) +{ + mm_segment_t oldfs; + int result; + struct msghdr msg; + struct iovec iov; + + oldfs = get_fs(); + set_fs(get_ds()); + iov.iov_base = buf; + iov.iov_len = size; + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + result = sock_recvmsg(sock, &msg, size, MSG_DONTWAIT); + set_fs(oldfs); + return result; +} + + +/* + * Initialize BOOTP extension fields in the request. + */ +__initfunc(static void ic_bootp_init_ext(u8 *e)) +{ + *e++ = 99; /* RFC1048 Magic Cookie */ + *e++ = 130; + *e++ = 83; + *e++ = 99; + *e++ = 1; /* Subnet mask request */ + *e++ = 4; + e += 4; + *e++ = 3; /* Default gateway request */ + *e++ = 4; + e += 4; + *e++ = 12; /* Host name request */ + *e++ = 32; + e += 32; + *e++ = 40; /* NIS Domain name request */ + *e++ = 32; + e += 32; + *e++ = 17; /* Boot path */ + *e++ = 32; + e += 32; + *e = 255; /* End of the list */ +} + + +/* + * Initialize the BOOTP mechanism. + */ +__initfunc(static int ic_bootp_init(void)) +{ + /* Allocate memory for BOOTP packets */ + if (ic_bootp_alloc() < 0) + return -1; + + /* Add fake zero addresses to all interfaces */ + if (ic_bootp_addrs_add() < 0) + return -1; + + /* Initialize BOOTP routing */ + if (ic_bootp_route_init() < 0) + return -1; + + /* Initialize common portion of BOOTP request */ + memset(ic_xmit_bootp, 0, sizeof(struct bootp_pkt)); + ic_xmit_bootp->op = BOOTP_REQUEST; + get_random_bytes(&ic_xmit_bootp->xid, sizeof(ic_xmit_bootp->xid)); + ic_bootp_init_ext(ic_xmit_bootp->vendor_area); + + DBG(("BOOTP: XID=%08x\n", ic_xmit_bootp->xid)); + + /* Open the sockets */ + if (ic_udp_open(&ic_bootp_xmit_sock) || + ic_udp_open(&ic_bootp_recv_sock)) + return -1; + + /* Bind/connect the sockets */ + ic_bootp_xmit_sock->sk->broadcast = 1; + ic_bootp_xmit_sock->sk->reuse = 1; + ic_bootp_recv_sock->sk->reuse = 1; + ic_set_bootp_route(ic_first_dev); + if (ic_udp_bind(ic_bootp_recv_sock, INADDR_ANY, 68) || + ic_udp_bind(ic_bootp_xmit_sock, INADDR_ANY, 68) || + ic_udp_connect(ic_bootp_xmit_sock, INADDR_BROADCAST, 67)) + return -1; + + return 0; +} + + +/* + * BOOTP cleanup. + */ +__initfunc(static void ic_bootp_cleanup(void)) +{ + ic_udp_close(ic_bootp_xmit_sock); + ic_udp_close(ic_bootp_recv_sock); + ic_bootp_addrs_del(); + ic_bootp_free(); + ic_bootp_route_cleanup(); +} + + +/* + * Send BOOTP request to single interface. + */ +__initfunc(static int ic_bootp_send_if(struct ic_device *d, u32 jiffies)) +{ + struct device *dev = d->dev; + struct bootp_pkt *b = ic_xmit_bootp; + + b->htype = dev->type; + b->hlen = dev->addr_len; + memset(b->hw_addr, 0, sizeof(b->hw_addr)); + memcpy(b->hw_addr, dev->dev_addr, dev->addr_len); + b->secs = htons(jiffies / HZ); + ic_set_bootp_route(d); + return ic_udp_send(ic_bootp_xmit_sock, b, sizeof(struct bootp_pkt)); +} + + +/* + * Send BOOTP requests to all interfaces. + */ +__initfunc(static int ic_bootp_send(u32 jiffies)) +{ + struct ic_device *d; + + for(d=ic_first_dev; d; d=d->next) + if (ic_bootp_send_if(d, jiffies) < 0) + return -1; + return 0; +} + + +/* + * Copy BOOTP-supplied string if not already set. + */ +__initfunc(static int ic_bootp_string(char *dest, char *src, int len, int max)) +{ + if (!len) + return 0; + if (len > max-1) + len = max-1; + strncpy(dest, src, len); + dest[len] = '\0'; + return 1; +} + + +/* + * Process BOOTP extension. + */ +__initfunc(static void ic_do_bootp_ext(u8 *ext)) +{ +#ifdef IPCONFIG_DEBUG + u8 *c; + + printk("BOOTP: Got extension %02x",*ext); + for(c=ext+2; cop != BOOTP_REPLY || + b->xid != ic_xmit_bootp->xid) { + printk("?"); + return; + } + + /* Find interface this arrived from */ + for(d=ic_first_dev; d; d=d->next) { + struct device *dev = d->dev; + if (b->htype == dev->type || + b->hlen == dev->addr_len || + !memcmp(b->hw_addr, dev->dev_addr, dev->addr_len)) + break; + } + if (!d) { /* Unknown device */ + printk("!"); + return; + } + + /* Record BOOTP packet arrival */ + cli(); + if (ic_got_reply) { + sti(); + return; + } + ic_got_reply = IC_GOT_BOOTP; + sti(); + ic_dev = d->dev; + + /* Extract basic fields */ + ic_myaddr = b->your_ip; + ic_servaddr = b->server_ip; + + /* Parse extensions */ + if (b->vendor_area[0] == 99 && /* Check magic cookie */ + b->vendor_area[1] == 130 && + b->vendor_area[2] == 83 && + b->vendor_area[3] == 99) { + ext = &b->vendor_area[4]; + end = (u8 *) b + len; + while (ext < end && *ext != 0xff) { + if (*ext == 0) /* Padding */ + ext++; + else { + opt = ext; + ext += ext[1] + 2; + if (ext <= end) + ic_do_bootp_ext(opt); + } + } + } +} + +#endif + + +/* + * Dynamic IP configuration -- BOOTP and RARP. + */ + +#ifdef CONFIG_IP_PNP_DYNAMIC + +__initfunc(int ic_dynamic(void)) +{ + int retries; + unsigned long timeout, jiff; + unsigned long start_jiffies; + + /* + * If neither BOOTP nor RARP was selected, return with an error. This + * routine gets only called when some pieces of information are mis- + * sing, and without BOOTP and RARP we are not able to get that in- + * formation. + */ + if (!ic_bootp_flag && !ic_rarp_flag) { + printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n"); + return -1; + } + +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag && !bootp_dev_count) { + printk(KERN_ERR "BOOTP: No suitable device found.\n"); + ic_bootp_flag = 0; + } +#else + ic_bootp_flag = 0; +#endif + +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag && !rarp_dev_count) { + printk(KERN_ERR "RARP: No suitable device found.\n"); + ic_rarp_flag = 0; + } +#else + ic_rarp_flag = 0; +#endif + + if (!ic_bootp_flag && !ic_rarp_flag) + /* Error message already printed */ + return -1; + + /* + * Setup RARP and BOOTP protocols + */ +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag) + ic_rarp_init(); +#endif +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag && ic_bootp_init() < 0) { + ic_bootp_cleanup(); + return -1; + } +#endif + + /* + * Send requests and wait, until we get an answer. This loop + * seems to be a terrible waste of CPU time, but actually there is + * only one process running at all, so we don't need to use any + * scheduler functions. + * [Actually we could now, but the nothing else running note still + * applies.. - AC] + */ + printk(KERN_NOTICE "Sending %s%s%s requests...", + ic_bootp_flag ? "BOOTP" : "", + ic_bootp_flag && ic_rarp_flag ? " and " : "", + ic_rarp_flag ? "RARP" : ""); + start_jiffies = jiffies; + retries = CONF_RETRIES; + get_random_bytes(&timeout, sizeof(timeout)); + timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); + for(;;) { +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag && ic_bootp_send(jiffies - start_jiffies) < 0) { + printk(" BOOTP failed!\n"); + ic_bootp_cleanup(); + ic_bootp_flag = 0; + if (!ic_rarp_flag) + break; + } +#endif +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag) + ic_rarp_send(); +#endif + printk("."); + jiff = jiffies + timeout; + while (jiffies < jiff && !ic_got_reply) +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag) + ic_bootp_recv(); +#else + ; +#endif + if (ic_got_reply) { + printk(" OK\n"); + break; + } + if (! --retries) { + printk(" timed out!\n"); + break; + } + timeout = timeout CONF_TIMEOUT_MULT; + if (timeout > CONF_TIMEOUT_MAX) + timeout = CONF_TIMEOUT_MAX; + } + +#ifdef CONFIG_IP_PNP_RARP + if (ic_rarp_flag) + ic_rarp_cleanup(); +#endif +#ifdef CONFIG_IP_PNP_BOOTP + if (ic_bootp_flag) + ic_bootp_cleanup(); +#endif + + if (!ic_got_reply) + return -1; + + printk("IP-Config: Got %s answer from %s, ", + (ic_got_reply == IC_GOT_BOOTP) ? "BOOTP" : "RARP", + in_ntoa(ic_servaddr)); + printk("my address is %s\n", in_ntoa(ic_myaddr)); + + return 0; +} + +#endif + +/* + * IP Autoconfig dispatcher. + */ + +__initfunc(int ip_auto_config(void)) +{ + if (!ic_enable) + return 0; + + DBG(("IP-Config: Entered.\n")); + + /* Setup all network devices */ + if (ic_open_devs() < 0) + return -1; + + /* + * If the config information is insufficient (e.g., our IP address or + * IP address of the boot server is missing or we have multiple network + * interfaces and no default was set), use BOOTP or RARP to get the + * missing values. + */ + if (ic_myaddr == INADDR_NONE || +#ifdef CONFIG_ROOT_NFS + root_server_addr == INADDR_NONE || +#endif + (ic_first_dev && ic_first_dev->next)) { +#ifdef CONFIG_IP_PNP_DYNAMIC + if (ic_dynamic() < 0) { + printk(KERN_ERR "IP-Config: Auto-configuration of network failed.\n"); + ic_close_devs(); + return -1; + } +#else + printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n"); + ic_close_devs(); + return -1; +#endif + } else { + ic_dev = ic_first_dev->dev; /* Device selected manually or only one device -> use it */ + } + + /* + * Use defaults whereever applicable. + */ + if (ic_defaults() < 0) + return -1; + + /* + * Close all network devices except the device we've + * autoconfigured and set up routes. + */ + ic_close_devs(); + if (ic_setup_if() < 0 || ic_setup_routes() < 0) + return -1; + + DBG(("IP-Config: device=%s, local=%08x, server=%08x, boot=%08x, gw=%08x, mask=%08x\n", + ic_dev->name, ic_myaddr, ic_servaddr, root_server_addr, ic_gateway, ic_netmask)); + DBG(("IP-Config: host=%s, domain=%s, path=`%s'\n", system_utsname.nodename, + system_utsname.domainname, root_server_path)); + return 0; +} + +/* + * Decode any IP configuration options in the "ipconfig" kernel command + * line parameter. It consists of option fields separated by colons in + * the following order: + * + * :::::: + * + * Any of the fields can be empty which means to use a default value: + * - address given by BOOTP or RARP + * - address of host returning BOOTP or RARP packet + * - none, or the address returned by BOOTP + * - automatically determined from , or the + * one returned by BOOTP + * - in ASCII notation, or the name returned + * by BOOTP + * - use all available devices + * - use both protocols to determine my own address + */ +__initfunc(void ip_auto_config_setup(char *addrs, int *ints)) +{ + char *cp, *ip, *dp; + int num = 0; + + ic_set_manually = 1; + + if (!strcmp(addrs, "bootp")) { + ic_rarp_flag = 0; + return; + } else if (!strcmp(addrs, "rarp")) { + ic_bootp_flag = 0; + return; + } else if (!strcmp(addrs, "both")) { + return; + } else if (!strcmp(addrs, "off")) { + ic_enable = 0; + return; + } + + /* Parse the whole string */ + ip = addrs; + while (ip && *ip) { + if ((cp = strchr(ip, ':'))) + *cp++ = '\0'; + if (strlen(ip) > 0) { + DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip)); + switch (num) { + case 0: + if ((ic_myaddr = in_aton(ip)) == INADDR_ANY) + ic_myaddr = INADDR_NONE; + break; + case 1: + if ((ic_servaddr = in_aton(ip)) == INADDR_ANY) + ic_servaddr = INADDR_NONE; + break; + case 2: + if ((ic_gateway = in_aton(ip)) == INADDR_ANY) + ic_gateway = INADDR_NONE; + break; + case 3: + if ((ic_netmask = in_aton(ip)) == INADDR_ANY) + ic_netmask = INADDR_NONE; + break; + case 4: + if ((dp = strchr(ip, '.'))) { + *dp++ = '\0'; + strncpy(system_utsname.domainname, dp, __NEW_UTS_LEN); + system_utsname.domainname[__NEW_UTS_LEN] = '\0'; + } + strncpy(system_utsname.nodename, ip, __NEW_UTS_LEN); + system_utsname.nodename[__NEW_UTS_LEN] = '\0'; + ic_host_name_set = 1; + break; + case 5: + strncpy(user_dev_name, ip, IFNAMSIZ); + user_dev_name[IFNAMSIZ-1] = '\0'; + break; + case 6: + if (!strcmp(ip, "rarp")) + ic_bootp_flag = 0; + else if (!strcmp(ip, "bootp")) + ic_rarp_flag = 0; + else if (strcmp(ip, "both")) + ic_bootp_flag = ic_rarp_flag = 0; + break; + } + } + ip = cp; + num++; + } +} diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ipip.c linux/net/ipv4/ipip.c --- v2.1.67/linux/net/ipv4/ipip.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/ipip.c Sun Nov 30 14:00:39 1997 @@ -1,6 +1,8 @@ /* * Linux NET3: IP/IP protocol decoder. * + * Version: $Id: ipip.c,v 1.19 1997/11/08 17:50:21 kuznet Exp $ + * * Authors: * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 * @@ -11,6 +13,11 @@ * to keep ip_forward happy. * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8). * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL + * David Woodhouse : Perform some basic ICMP handling. + * IPIP Routing without decapsulation. + * Carlos Picoto : GRE over IP support + * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c. + * I do not want to merge them together. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,12 +25,80 @@ * 2 of the License, or (at your option) any later version. * */ + +/* tunnel.c: an IP tunnel driver + + The purpose of this driver is to provide an IP tunnel through + which you can tunnel network traffic transparently across subnets. + + This was written by looking at Nick Holloway's dummy driver + Thanks for the great code! + + -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 + + Minor tweaks: + Cleaned up the code a little and added some pre-1.3.0 tweaks. + dev->hard_header/hard_header_len changed to use no headers. + Comments/bracketing tweaked. + Made the tunnels use dev->name not tunnel: when error reporting. + Added tx_dropped stat + + -Alan Cox (Alan.Cox@linux.org) 21 March 95 + + Reworked: + Changed to tunnel to destination gateway in addition to the + tunnel's pointopoint address + Almost completely rewritten + Note: There is currently no firewall or ICMP handling done. + + -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96 + +*/ + +/* Things I wish I had known when writing the tunnel driver: + + When the tunnel_xmit() function is called, the skb contains the + packet to be sent (plus a great deal of extra info), and dev + contains the tunnel device that _we_ are. + + When we are passed a packet, we are expected to fill in the + source address with our source IP address. + + What is the proper way to allocate, copy and free a buffer? + After you allocate it, it is a "0 length" chunk of memory + starting at zero. If you want to add headers to the buffer + later, you'll have to call "skb_reserve(skb, amount)" with + the amount of memory you want reserved. Then, you call + "skb_put(skb, amount)" with the amount of space you want in + the buffer. skb_put() returns a pointer to the top (#0) of + that buffer. skb->len is set to the amount of space you have + "allocated" with skb_put(). You can then write up to skb->len + bytes to that buffer. If you need more, you can call skb_put() + again with the additional amount of space you need. You can + find out how much more space you can allocate by calling + "skb_tailroom(skb)". + Now, to add header space, call "skb_push(skb, header_len)". + This creates space at the beginning of the buffer and returns + a pointer to this new space. If later you need to strip a + header from a buffer, call "skb_pull(skb, header_len)". + skb_headroom() will return how much space is left at the top + of the buffer (before the main data). Remember, this headroom + space must be reserved before the skb_put() function is called. + */ + +/* + This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c + + For comments look at net/ipv4/ip_gre.c --ANK + */ + -#include #include +#include #include #include #include +#include #include #include #include @@ -31,91 +106,673 @@ #include #include #include +#include -#include #include #include #include #include #include -void ipip_err(struct sk_buff *skb, unsigned char *dp) +#define HASH_SIZE 16 +#define HASH(addr) ((addr^(addr>>4))&0xF) + +static int ipip_fb_tunnel_init(struct device *dev); +static int ipip_tunnel_init(struct device *dev); + +static struct device ipip_fb_tunnel_dev = { + NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip_fb_tunnel_init, +}; + +static struct ip_tunnel ipip_fb_tunnel = { + NULL, &ipip_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"tunl0", } +}; + +static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; +static struct ip_tunnel *tunnels_r[HASH_SIZE]; +static struct ip_tunnel *tunnels_l[HASH_SIZE]; +static struct ip_tunnel *tunnels_wc[1]; +static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; + +static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local) { - /* NI */ - return; + unsigned h0 = HASH(remote); + unsigned h1 = HASH(local); + struct ip_tunnel *t; + + for (t = tunnels_r_l[h0^h1]; t; t = t->next) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) + return t; + } + for (t = tunnels_r[h0]; t; t = t->next) { + if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) + return t; + } + for (t = tunnels_l[h1]; t; t = t->next) { + if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) + return t; + } + if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) + return t; + return NULL; } -/* - * The IPIP protocol driver. - * - * On entry here - * skb->data is the original IP header - * skb->nh points to the initial IP header. - * skb->h points at the new header. +struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) +{ + u32 remote = parms->iph.daddr; + u32 local = parms->iph.saddr; + struct ip_tunnel *t, **tp, *nt; + struct device *dev; + unsigned h = 0; + int prio = 0; + + if (remote) { + prio |= 2; + h ^= HASH(remote); + } + if (local) { + prio |= 1; + h ^= HASH(local); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) + return t; + } + if (!create) + return NULL; + + MOD_INC_USE_COUNT; + dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL); + if (dev == NULL) { + MOD_DEC_USE_COUNT; + return NULL; + } + memset(dev, 0, sizeof(*dev) + sizeof(*t)); + dev->priv = (void*)(dev+1); + nt = (struct ip_tunnel*)dev->priv; + nt->dev = dev; + dev->name = nt->parms.name; + dev->init = ipip_tunnel_init; + memcpy(&nt->parms, parms, sizeof(*parms)); + if (dev->name[0] == 0) { + int i; + for (i=1; i<100; i++) { + sprintf(dev->name, "tunl%d", i); + if (dev_get(dev->name) == NULL) + break; + } + if (i==100) + goto failed; + memcpy(parms->name, dev->name, IFNAMSIZ); + } + if (register_netdevice(dev) < 0) + goto failed; + + start_bh_atomic(); + nt->next = t; + *tp = nt; + end_bh_atomic(); + /* Do not decrement MOD_USE_COUNT here. */ + return nt; + +failed: + kfree(dev); + MOD_DEC_USE_COUNT; + return NULL; +} + +static void ipip_tunnel_destroy(struct device *dev) +{ + struct ip_tunnel *t, **tp; + struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv; + u32 remote = t0->parms.iph.daddr; + u32 local = t0->parms.iph.saddr; + unsigned h = 0; + int prio = 0; + + if (dev == &ipip_fb_tunnel_dev) { + tunnels_wc[0] = NULL; + return; + } + + if (remote) { + prio |= 2; + h ^= HASH(remote); + } + if (local) { + prio |= 1; + h ^= HASH(local); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (t == t0) { + *tp = t->next; + kfree(dev); + MOD_DEC_USE_COUNT; + break; + } + } +} + + +void ipip_err(struct sk_buff *skb, unsigned char *dp, int len) +{ +#ifndef I_WISH_WORLD_WERE_PERFECT + +/* It is not :-( All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. */ + struct iphdr *iph = (struct iphdr*)dp; + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; + struct ip_tunnel *t; + + if (len < sizeof(struct iphdr)) + return; + + switch (type) { + default: + case ICMP_PARAMETERPROB: + return; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* Soft state for pmtu is maintained by IP core. */ + return; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe they are just ether pollution. --ANK + */ + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; + } + + t = ipip_tunnel_lookup(iph->daddr, iph->saddr); + if (t == NULL || t->parms.iph.daddr == 0) + return; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) + return; + + if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; + return; +#else + struct iphdr *iph = (struct iphdr*)dp; + int hlen = iph->ihl<<2; + struct iphdr *eiph; + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; + int rel_type = 0; + int rel_code = 0; + int rel_info = 0; + struct sk_buff *skb2; + struct rtable *rt; + + if (len < hlen + sizeof(struct iphdr)) + return; + eiph = (struct iphdr*)(dp + hlen); + + switch (type) { + default: + return; + case ICMP_PARAMETERPROB: + if (skb->h.icmph->un.gateway < hlen) + return; + + /* So... This guy found something strange INSIDE encapsulated + packet. Well, he is fool, but what can we do ? + */ + rel_type = ICMP_PARAMETERPROB; + rel_info = skb->h.icmph->un.gateway - hlen; + break; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* And it is the only really necesary thing :-) */ + rel_info = ntohs(skb->h.icmph->un.frag.mtu); + if (rel_info < hlen+68) + return; + rel_info -= hlen; + /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ + if (rel_info > ntohs(eiph->tot_len)) + return; + break; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe, it is just ether pollution. --ANK + */ + rel_type = ICMP_DEST_UNREACH; + rel_code = ICMP_HOST_UNREACH; + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; + } + + /* Prepare fake skb to feed it to icmp_send */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 == NULL) + return; + dst_release(skb2->dst); + skb2->dst = NULL; + skb_pull(skb2, skb->data - (u8*)eiph); + skb2->nh.raw = skb2->data; + + /* Try to guess incoming interface */ + if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) { + kfree_skb(skb2, FREE_WRITE); + return; + } + skb2->dev = rt->u.dst.dev; + + /* route "incoming" packet */ + if (rt->rt_flags&RTCF_LOCAL) { + ip_rt_put(rt); + rt = NULL; + if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) || + rt->u.dst.dev->type != ARPHRD_IPGRE) { + ip_rt_put(rt); + kfree_skb(skb2, FREE_WRITE); + return; + } + } else { + ip_rt_put(rt); + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || + skb2->dst->dev->type != ARPHRD_IPGRE) { + kfree_skb(skb2, FREE_WRITE); + return; + } + } + + /* change mtu on this route */ + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + if (rel_info > skb2->dst->pmtu) { + kfree_skb(skb2, FREE_WRITE); + return; + } + skb2->dst->pmtu = rel_info; + rel_info = htonl(rel_info); + } else if (type == ICMP_TIME_EXCEEDED) { + struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; + if (t->parms.iph.ttl) { + rel_type = ICMP_DEST_UNREACH; + rel_code = ICMP_HOST_UNREACH; + } + } + + icmp_send(skb2, rel_type, rel_code, rel_info); + kfree_skb(skb2, FREE_WRITE); + return; +#endif +} int ipip_rcv(struct sk_buff *skb, unsigned short len) { - struct device *dev; struct iphdr *iph; + struct ip_tunnel *tunnel; -#ifdef TUNNEL_DEBUG - printk("ipip_rcv: got a packet!\n"); -#endif - /* - * Discard the original IP header - */ - - skb_pull(skb, skb->h.raw - skb->nh.raw); - - /* - * Adjust pointers - */ - iph = skb->nh.iph; - skb->nh.iph = skb->h.ipiph; + skb->mac.raw = skb->nh.raw; + skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data); memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); - - /* - * If you want to add LZ compressed IP or things like that here, - * and in drivers/net/tunnel.c are the places to add. - */ - - skb->protocol = htons(ETH_P_IP); + skb->protocol = __constant_htons(ETH_P_IP); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; + if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { + tunnel->stat.rx_packets++; + tunnel->stat.rx_bytes += skb->len; + skb->dev = tunnel->dev; + dst_release(skb->dst); + skb->dst = NULL; + netif_rx(skb); + return 0; + } + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); + kfree_skb(skb, FREE_READ); + return 0; +} + +/* + * This function assumes it is being called from dev_queue_xmit() + * and that skb is filled properly by that function. + */ + +static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct net_device_stats *stats = &tunnel->stat; + struct iphdr *tiph = &tunnel->parms.iph; + u8 tos = tunnel->parms.iph.tos; + u16 df = tiph->frag_off; + struct rtable *rt; /* Route to the other host */ + struct device *tdev; /* Device to other host */ + struct iphdr *old_iph = skb->nh.iph; + struct iphdr *iph; /* Our new IP header */ + int max_headroom; /* The extra header space needed */ + u32 dst = tiph->daddr; + int mtu; + + if (tunnel->recursion++) { + tunnel->stat.collisions++; + goto tx_error; + } + + if (skb->protocol != __constant_htons(ETH_P_IP)) + goto tx_error; + + if (tos&1) + tos = old_iph->tos; + + if (!dst) { + /* NBMA tunnel */ + if ((rt = (struct rtable*)skb->dst) == NULL) { + tunnel->stat.tx_fifo_errors++; + goto tx_error; + } + if ((dst = rt->rt_gateway) == 0) + goto tx_error_icmp; + } + + if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { + tunnel->stat.tx_carrier_errors++; + goto tx_error_icmp; + } + tdev = rt->u.dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); + tunnel->stat.collisions++; + goto tx_error; + } + + mtu = rt->u.dst.pmtu - sizeof(struct iphdr); + if (mtu < 68) { + tunnel->stat.collisions++; + ip_rt_put(rt); + goto tx_error; + } + if (skb->dst && mtu < skb->dst->pmtu) + skb->dst->pmtu = mtu; + + df |= (old_iph->frag_off&__constant_htons(IP_DF)); + + if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } + + if (tunnel->err_count > 0) { + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + tunnel->err_count--; + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + } else + tunnel->err_count = 0; + } + + skb->h.raw = skb->nh.raw; + /* - * Is it draconic? I do not think so. --ANK + * Okay, now see if we can stuff it in the buffer as-is. */ - dev = ip_dev_find_tunnel(iph->daddr, iph->saddr); - if (dev == NULL) { -#ifdef CONFIG_IP_MROUTE - int vif; + max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr)); - if (!MULTICAST(skb->nh.iph->daddr) || - !ipv4_config.multicast_route || - LOCAL_MCAST(skb->nh.iph->daddr) || - (vif=ip_mr_find_tunnel(iph->daddr, iph->saddr)) < 0) - { -#endif - kfree_skb(skb, FREE_READ); - return -EINVAL; -#ifdef CONFIG_IP_MROUTE - } - IPCB(skb)->flags |= IPSKB_TUNNELED; - IPCB(skb)->vif = vif; - dev = skb->dev; -#endif + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + ip_rt_put(rt); + stats->tx_dropped++; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; + } + dev_kfree_skb(skb, FREE_WRITE); + skb = new_skb; } - skb->dev = dev; + + skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); dst_release(skb->dst); - skb->dst = NULL; - netif_rx(skb); - return(0); + skb->dst = &rt->u.dst; + + /* + * Push down and install the IPIP header. + */ + + iph = skb->nh.iph; + iph->version = 4; + iph->ihl = sizeof(struct iphdr)>>2; + iph->frag_off = df; + iph->protocol = IPPROTO_IPIP; + iph->tos = tos; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + + if ((iph->ttl = tiph->ttl) == 0) + iph->ttl = old_iph->ttl; + + iph->tot_len = htons(skb->len); + iph->id = htons(ip_id_count++); + ip_send_check(iph); + + stats->tx_bytes += skb->len; + stats->tx_packets++; + ip_send(skb); + tunnel->recursion--; + return 0; + +tx_error_icmp: + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); +tx_error: + stats->tx_errors++; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; +} + +static int +ipip_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip_tunnel_parm p; + struct ip_tunnel *t; + + MOD_INC_USE_COUNT; + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == &ipip_fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + t = ipip_tunnel_locate(&p, 0); + } + if (t == NULL) + t = (struct ip_tunnel*)dev->priv; + memcpy(&p, &t->parms, sizeof(p)); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF))) + goto done; + if (p.iph.ttl) + p.iph.frag_off |= __constant_htons(IP_DF); + + t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL); + + if (t) { + err = 0; + if (cmd == SIOCCHGTUNNEL) { + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + t->parms.iph.frag_off = p.iph.frag_off; + } + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + if (dev == &ipip_fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + if ((t = ipip_tunnel_locate(&p, 0)) == NULL) + goto done; + err = -EPERM; + if (t == &ipip_fb_tunnel) + goto done; + } + err = unregister_netdevice(dev); + break; + + default: + err = -EINVAL; + } + +done: + MOD_DEC_USE_COUNT; + return err; +} + +static struct net_device_stats *ipip_tunnel_get_stats(struct device *dev) +{ + return &(((struct ip_tunnel*)dev->priv)->stat); +} + +static int ipip_tunnel_change_mtu(struct device *dev, int new_mtu) +{ + if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static void ipip_tunnel_init_gen(struct device *dev) +{ + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + + dev->destructor = ipip_tunnel_destroy; + dev->hard_start_xmit = ipip_tunnel_xmit; + dev->get_stats = ipip_tunnel_get_stats; + dev->do_ioctl = ipip_tunnel_ioctl; + dev->change_mtu = ipip_tunnel_change_mtu; + + dev_init_buffers(dev); + + dev->type = ARPHRD_TUNNEL; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); + dev->mtu = 1500 - sizeof(struct iphdr); + dev->flags = IFF_NOARP; + dev->iflink = 0; + dev->addr_len = 4; + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); +} + +static int ipip_tunnel_init(struct device *dev) +{ + struct device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + + tunnel = (struct ip_tunnel*)dev->priv; + iph = &tunnel->parms.iph; + + ipip_tunnel_init_gen(dev); + + if (iph->daddr) { + struct rtable *rt; + if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) { + tdev = rt->u.dst.dev; + ip_rt_put(rt); + } + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = dev_get_by_index(tunnel->parms.link); + + if (tdev) { + dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); + dev->mtu = tdev->mtu - sizeof(struct iphdr); + } + dev->iflink = tunnel->parms.link; + + return 0; } #ifdef MODULE +static int ipip_fb_tunnel_open(struct device *dev) +{ + MOD_INC_USE_COUNT; + return 0; +} + +static int ipip_fb_tunnel_close(struct device *dev) +{ + MOD_DEC_USE_COUNT; + return 0; +} +#endif + +__initfunc(int ipip_fb_tunnel_init(struct device *dev)) +{ + struct iphdr *iph; + + ipip_tunnel_init_gen(dev); +#ifdef MODULE + dev->open = ipip_fb_tunnel_open; + dev->stop = ipip_fb_tunnel_close; +#endif + + iph = &ipip_fb_tunnel.parms.iph; + iph->version = 4; + iph->protocol = IPPROTO_IPIP; + iph->ihl = 5; + + tunnels_wc[0] = &ipip_fb_tunnel; + return 0; +} static struct inet_protocol ipip_protocol = { ipip_rcv, /* IPIP handler */ @@ -127,21 +784,34 @@ "IPIP" /* name */ }; +#ifdef MODULE +int init_module(void) +#else +__initfunc(int ipip_init(void)) +#endif +{ + printk(KERN_INFO "IPv4 over IPv4 tunneling driver\n"); -/* - * And now the modules code and kernel interface. - */ + ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel; + ipip_fb_tunnel_dev.name = ipip_fb_tunnel.parms.name; +#ifdef MODULE + register_netdev(&ipip_fb_tunnel_dev); +#else + register_netdevice(&ipip_fb_tunnel_dev); +#endif -int init_module( void) -{ inet_add_protocol(&ipip_protocol); return 0; } -void cleanup_module( void) +#ifdef MODULE + +void cleanup_module(void) { if ( inet_del_protocol(&ipip_protocol) < 0 ) printk(KERN_INFO "ipip close: can't remove protocol\n"); + + unregister_netdevice(&ipip_fb_tunnel_dev); } #endif diff -u --recursive --new-file v2.1.67/linux/net/ipv4/ipmr.c linux/net/ipv4/ipmr.c --- v2.1.67/linux/net/ipv4/ipmr.c Mon Jun 16 16:36:01 1997 +++ linux/net/ipv4/ipmr.c Sun Nov 30 14:00:39 1997 @@ -9,6 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * + * Version: $Id: ipmr.c,v 1.28 1997/10/30 00:43:16 davem Exp $ * * Fixes: * Michael Chastain : Incorrect size of copying. @@ -20,14 +21,8 @@ * Alexey Kuznetsov : Status, optimisations and more. * Brad Parker : Better behaviour on mrouted upcall * overflow. + * Carlos Picoto : PIMv1 Support * - * Status: - * Cache manager under test. Forwarding in vague test mode - * Todo: - * Flow control - * Finish Tunnels - * Debug cache ttl handling properly - * Resolve IFF_ALLMULTI for rest of cards */ #include @@ -45,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -54,9 +51,16 @@ #include #include #include +#include #include +#include +#include #include +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) +#define CONFIG_IP_PIMSM 1 +#endif + /* * Multicast router control variables */ @@ -64,10 +68,133 @@ static struct vif_device vif_table[MAXVIFS]; /* Devices */ static unsigned long vifc_map; /* Active device map */ static int maxvif; -int mroute_do_pim = 0; /* Set in PIM assert */ +int mroute_do_assert = 0; /* Set in PIM assert */ +int mroute_do_pim = 0; static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ int cache_resolve_queue_len = 0; /* Size of unresolved */ +static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); +static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); +static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); + +extern struct inet_protocol pim_protocol; + +static +struct device *ipmr_new_tunnel(struct vifctl *v) +{ + struct device *dev = NULL; + + rtnl_lock(); + dev = dev_get("tunl0"); + + if (dev) { + int err; + struct ifreq ifr; + mm_segment_t oldfs; + struct ip_tunnel_parm p; + struct in_device *in_dev; + + memset(&p, 0, sizeof(p)); + p.iph.daddr = v->vifc_rmt_addr.s_addr; + p.iph.saddr = v->vifc_lcl_addr.s_addr; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPIP; + sprintf(p.name, "dvmrp%d", v->vifc_vifi); + ifr.ifr_ifru.ifru_data = (void*)&p; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + + if (err == 0 && (dev = dev_get(p.name)) != NULL) { + dev->flags |= IFF_MULTICAST; + + in_dev = dev->ip_ptr; + if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) + goto failure; + + if (dev_open(dev)) + goto failure; + } + } + rtnl_unlock(); + return dev; + +failure: + unregister_netdevice(dev); + rtnl_unlock(); + return NULL; +} + +#ifdef CONFIG_IP_PIMSM + +static int reg_vif_num = -1; +static struct device * reg_dev; + +static int reg_vif_xmit(struct sk_buff *skb, struct device *dev) +{ + ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); + kfree_skb(skb, FREE_WRITE); + return 0; +} + +static struct net_device_stats *reg_vif_get_stats(struct device *dev) +{ + return (struct net_device_stats*)dev->priv; +} + +static +struct device *ipmr_reg_vif(struct vifctl *v) +{ + struct device *dev; + struct in_device *in_dev; + int size; + + size = sizeof(*dev) + IFNAMSIZ + sizeof(struct net_device_stats); + dev = kmalloc(size, GFP_KERNEL); + if (!dev) + return NULL; + + memset(dev, 0, size); + + dev->priv = dev + 1; + dev->name = dev->priv + sizeof(struct net_device_stats); + + strcpy(dev->name, "pimreg"); + + dev->type = ARPHRD_PIMREG; + dev->mtu = 1500 - sizeof(struct iphdr) - 8; + dev->flags = IFF_NOARP; + dev->hard_start_xmit = reg_vif_xmit; + dev->get_stats = reg_vif_get_stats; + + rtnl_lock(); + + if (register_netdevice(dev)) { + rtnl_unlock(); + kfree(dev); + return NULL; + } + + if ((in_dev = inetdev_init(dev)) == NULL) + goto failure; + + if (dev_open(dev)) + goto failure; + + rtnl_unlock(); + reg_dev = dev; + return dev; + +failure: + unregister_netdevice(dev); + rtnl_unlock(); + kfree(dev); + return NULL; +} +#endif + /* * Delete a VIF entry */ @@ -75,28 +202,36 @@ static int vif_delete(int vifi) { struct vif_device *v; + struct device *dev; + struct in_device *in_dev; if (vifi < 0 || vifi >= maxvif || !(vifc_map&(1<flags&VIFF_TUNNEL)) { - v->u.dev->flags &= ~IFF_ALLMULTI; - dev_mc_upload(v->u.dev); - ip_rt_multicast_event(v->u.dev); - v->u.dev = NULL; - } else { - ip_rt_put(v->u.rt); - v->u.rt = NULL; + dev = v->dev; + v->dev = NULL; + vifc_map &= ~(1<ip_ptr) != NULL) + in_dev->flags &= ~IFF_IP_MFORWARD; + + dev_set_allmulti(dev, -1); + ip_rt_multicast_event(in_dev); + + if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) { +#ifdef CONFIG_IP_PIMSM + if (vifi == reg_vif_num) { + reg_vif_num = -1; + reg_dev = NULL; + } +#endif + unregister_netdevice(dev); + if (v->flags&VIFF_REGISTER) + kfree(dev); } - vifc_map&=~(1<=0; tmp--) { @@ -108,21 +243,27 @@ return 0; } -static void ipmr_set_bounds(struct mfc_cache *cache) +static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls) { int vifi; + + start_bh_atomic(); + + cache->mfc_minvif = MAXVIFS; + cache->mfc_maxvif = 0; + memset(cache->mfc_ttls, 255, MAXVIFS); + for (vifi=0; vifimfc_ttls[vifi]) { - cache->mfc_minvif = vifi; - cache->mfc_maxvif = vifi+1; + if (vifc_map&(1<mfc_ttls[vifi] = ttls[vifi]; + if (cache->mfc_minvif > vifi) + cache->mfc_minvif = vifi; + if (cache->mfc_maxvif <= vifi) + cache->mfc_maxvif = vifi + 1; vifi++; - break; } } - for ( ; vifimfc_ttls[vifi]) - cache->mfc_maxvif = vifi+1; - } + end_bh_atomic(); } /* @@ -148,7 +289,7 @@ /* * Unlink the buffer */ - + while(*cp!=NULL) { if(*cp==cache) @@ -158,7 +299,7 @@ } cp=&((*cp)->next); } - + /* * Free the buffer. If it is a pending resolution * clean up the other resources. @@ -167,8 +308,19 @@ if(cache->mfc_flags&MFC_QUEUED) { cache_resolve_queue_len--; - while((skb=skb_dequeue(&cache->mfc_unresolved))) + while((skb=skb_dequeue(&cache->mfc_unresolved))) { +#ifdef CONFIG_RTNETLINK + if (skb->nh.iph->version == 0) { + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT; + netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); + } else +#endif kfree_skb(skb, FREE_WRITE); + } } kfree_s(cache,sizeof(cache)); } @@ -222,14 +374,12 @@ struct mfc_cache *c=(struct mfc_cache *)kmalloc(sizeof(struct mfc_cache), priority); if(c==NULL) return NULL; - c->mfc_queuelen=0; + memset(c, 0, sizeof(*c)); skb_queue_head_init(&c->mfc_unresolved); init_timer(&c->mfc_timer); c->mfc_timer.data=(long)c; c->mfc_timer.function=ipmr_cache_timer; - c->mfc_last_assert=0; c->mfc_minvif = MAXVIFS; - c->mfc_maxvif = 0; return c; } @@ -259,8 +409,26 @@ /* * Play the pending entries through our router */ - while((skb=skb_dequeue(&cache->mfc_unresolved))) - ip_mr_input(skb); + while((skb=skb_dequeue(&cache->mfc_unresolved))) { +#ifdef CONFIG_RTNETLINK + if (skb->nh.iph->version == 0) { + int err; + struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + + if (ipmr_fill_mroute(skb, cache, NLMSG_DATA(nlh)) > 0) { + nlh->nlmsg_len = skb->tail - (u8*)nlh; + } else { + nlh->nlmsg_type = NLMSG_ERROR; + nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + skb_trim(skb, nlh->nlmsg_len); + ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE; + } + err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) printk(KERN_DEBUG "Err=%d", err); + } else +#endif + ip_mr_forward(skb, cache, 0); + } } /* @@ -270,15 +438,40 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) { - struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC); + struct sk_buff *skb; int ihl = pkt->nh.iph->ihl<<2; struct igmphdr *igmp; struct igmpmsg *msg; int ret; +#ifdef CONFIG_IP_PIMSM + if (assert == IGMPMSG_WHOLEPKT) + skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); + else +#endif + skb = alloc_skb(128, GFP_ATOMIC); + if(!skb) - return -ENOMEM; - + return -ENOBUFS; + +#ifdef CONFIG_IP_PIMSM + if (assert == IGMPMSG_WHOLEPKT) { + /* Ugly, but we have no choice with this interface. + Duplicate old header, fix ihl, length etc. + And all this only to mangle msg->im_msgtype and + to set msg->im_mbz to "mbz" :-) + */ + msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr)); + skb->nh.raw = skb->h.raw = (u8*)msg; + memcpy(msg, pkt->nh.raw, sizeof(struct iphdr)); + msg->im_msgtype = IGMPMSG_WHOLEPKT; + msg->im_mbz = 0; + msg->im_vif = reg_vif_num; + skb->nh.iph->ihl = sizeof(struct iphdr) >> 2; + skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr)); + } else { +#endif + /* * Copy the IP header */ @@ -287,33 +480,30 @@ memcpy(skb->data,pkt->data,ihl); skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */ msg = (struct igmpmsg*)skb->nh.iph; - if (assert) - msg->im_vif = vifi; - + msg->im_vif = vifi; + skb->dst = dst_clone(pkt->dst); + /* * Add our header */ - + igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr)); igmp->type = - msg->im_msgtype = assert ? IGMPMSG_WRONGVIF : IGMPMSG_NOCACHE; + msg->im_msgtype = assert; igmp->code = 0; skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */ skb->h.raw = skb->nh.raw; +#ifdef CONFIG_IP_PIMSM + } +#endif /* * Deliver to mrouted */ - if((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) - { - static unsigned long last_warn; - if(jiffies-last_warn>10*HZ) - { - last_warn=jiffies; - printk("mroute: pending queue full, dropping entries.\n"); - } + if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) { + if (net_ratelimit()) + printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); kfree_skb(skb, FREE_READ); - return ret; } return ret; @@ -323,7 +513,7 @@ * Queue a packet for resolution */ -static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb) +static int ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb) { if(cache==NULL) { @@ -333,12 +523,12 @@ if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL) { kfree_skb(skb, FREE_WRITE); - return; + return -ENOBUFS; } /* * Fill in the new cache entry */ - cache->mfc_parent=vifi; + cache->mfc_parent=ALL_VIFS; cache->mfc_origin=skb->nh.iph->saddr; cache->mfc_mcastgrp=skb->nh.iph->daddr; cache->mfc_flags=MFC_QUEUED; @@ -358,9 +548,16 @@ if(mroute_socket) { /* If the report failed throw the cache entry - out - Brad Parker */ - if(ipmr_cache_report(skb, vifi, 0)<0) + out - Brad Parker + + OK, OK, Brad. Only do not forget to free skb + and return :-) --ANK + */ + if (ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE)<0) { ipmr_cache_delete(cache); + kfree_skb(skb, FREE_WRITE); + return -ENOBUFS; + } } } /* @@ -369,10 +566,11 @@ if(cache->mfc_queuelen>3) { kfree_skb(skb, FREE_WRITE); - return; + return -ENOBUFS; } cache->mfc_queuelen++; skb_queue_tail(&cache->mfc_unresolved,skb); + return 0; } /* @@ -416,8 +614,7 @@ cache->mfc_flags|=MFC_RESOLVED; cache->mfc_parent=mfc->mfcc_parent; - memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); - ipmr_set_bounds(cache); + ipmr_update_threshoulds(cache, mfc->mfcc_ttls); /* * Check to see if we resolved a queued list. If so we @@ -445,13 +642,21 @@ cache->mfc_origin=mfc->mfcc_origin.s_addr; cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; cache->mfc_parent=mfc->mfcc_parent; - memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls)); - ipmr_set_bounds(cache); + ipmr_update_threshoulds(cache, mfc->mfcc_ttls); ipmr_cache_insert(cache); end_bh_atomic(); return 0; } - + +static void mrtsock_destruct(struct sock *sk) +{ + if (sk == mroute_socket) { + ipv4_config.multicast_route = 0; + mroute_socket=NULL; + mroute_close(sk); + } +} + /* * Socket options and virtual interface manipulation. The whole * virtual interface system is a complete heap, but unfortunately @@ -461,7 +666,6 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen) { - int err; struct vifctl vif; struct mfcctl mfc; @@ -480,9 +684,8 @@ return -ENOPROTOOPT; { int opt; - err = get_user(opt,(int *)optval); - if (err) - return err; + if (get_user(opt,(int *)optval)) + return -EFAULT; if (opt != 1) return -ENOPROTOOPT; } @@ -490,78 +693,101 @@ return -EADDRINUSE; mroute_socket=sk; ipv4_config.multicast_route = 1; - /* Initialise state */ - return 0; + if (ip_ra_control(sk, 1, mrtsock_destruct) == 0) + return 0; + mrtsock_destruct(sk); + return -EADDRINUSE; case MRT_DONE: - ipv4_config.multicast_route = 0; - mroute_close(sk); - mroute_socket=NULL; + mrtsock_destruct(sk); return 0; case MRT_ADD_VIF: case MRT_DEL_VIF: if(optlen!=sizeof(vif)) return -EINVAL; - err = copy_from_user(&vif,optval,sizeof(vif)); - if (err) + if (copy_from_user(&vif,optval,sizeof(vif))) return -EFAULT; - if(vif.vifc_vifi > MAXVIFS) + if(vif.vifc_vifi >= MAXVIFS) return -ENFILE; if(optname==MRT_ADD_VIF) { struct vif_device *v=&vif_table[vif.vifc_vifi]; struct device *dev; - /* Empty vif ? */ - if(vifc_map&(1<flags&IFF_MULTICAST) - { - /* Most ethernet cards don't know - how to do this yet.. */ - dev->flags|=IFF_ALLMULTI; - dev_mc_upload(dev); - ip_rt_multicast_event(dev); - } - else - { - /* We are stuck.. */ - return -EOPNOTSUPP; + + switch (vif.vifc_flags) { +#ifdef CONFIG_IP_PIMSM + case VIFF_REGISTER: + + /* + * Special Purpose VIF in PIM + * All the packets will be sent to the daemon + */ + if (reg_vif_num >= 0) + return -EADDRINUSE; + reg_vif_num = vif.vifc_vifi; + dev = ipmr_reg_vif(&vif); + if (!dev) { + reg_vif_num = -1; + return -ENOBUFS; } + break; +#endif + case VIFF_TUNNEL: + dev = ipmr_new_tunnel(&vif); + if (!dev) + return -ENOBUFS; + break; + case 0: + dev=ip_dev_find(vif.vifc_lcl_addr.s_addr); + if (!dev) + return -EADDRNOTAVAIL; + break; + default: + printk(KERN_DEBUG "ipmr_add_vif: flags %02x\n", vif.vifc_flags); + return -EINVAL; } + + if ((in_dev = dev->ip_ptr) == NULL) + return -EADDRNOTAVAIL; + if (in_dev->flags & IFF_IP_MFORWARD) + return -EADDRINUSE; + in_dev->flags |= IFF_IP_MFORWARD; + dev_set_allmulti(dev, +1); + ip_rt_multicast_event(in_dev); + /* * Fill in the VIF structures */ - cli(); + start_bh_atomic(); v->rate_limit=vif.vifc_rate_limit; v->local=vif.vifc_lcl_addr.s_addr; v->remote=vif.vifc_rmt_addr.s_addr; v->flags=vif.vifc_flags; v->threshold=vif.vifc_threshold; - v->u.dev=NULL; - if (!(vif.vifc_flags&VIFF_TUNNEL)) - v->u.dev=dev; + v->dev=dev; v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; v->pkt_out = 0; + v->link = dev->ifindex; + if (vif.vifc_flags&(VIFF_TUNNEL|VIFF_REGISTER)) + v->link = dev->iflink; vifc_map|=(1< maxvif) maxvif = vif.vifc_vifi+1; - sti(); + end_bh_atomic(); return 0; - } else - return vif_delete(vif.vifc_vifi); + } else { + int ret; + rtnl_lock(); + ret = vif_delete(vif.vifc_vifi); + rtnl_unlock(); + return ret; + } /* * Manipulate the forwarding caches. These live @@ -571,8 +797,9 @@ case MRT_DEL_MFC: if(optlen!=sizeof(mfc)) return -EINVAL; - err = copy_from_user(&mfc,optval, sizeof(mfc)); - return err ? -EFAULT : ipmr_mfc_modify(optname, &mfc); + if (copy_from_user(&mfc,optval, sizeof(mfc))) + return -EFAULT; + return ipmr_mfc_modify(optname, &mfc); /* * Control PIM assert. */ @@ -581,9 +808,29 @@ int v; if(get_user(v,(int *)optval)) return -EFAULT; - mroute_do_pim=(v)?1:0; + mroute_do_assert=(v)?1:0; return 0; } +#ifdef CONFIG_IP_PIMSM + case MRT_PIM: + { + int v; + if(get_user(v,(int *)optval)) + return -EFAULT; + v = (v)?1:0; + if (v != mroute_do_pim) { + mroute_do_pim = v; + mroute_do_assert = v; +#ifdef CONFIG_IP_PIMSM_V2 + if (mroute_do_pim) + inet_add_protocol(&pim_protocol); + else + inet_del_protocol(&pim_protocol); +#endif + } + return 0; + } +#endif /* * Spurious command, or MRT_VERSION which you cannot * set. @@ -604,7 +851,11 @@ if(sk!=mroute_socket) return -EACCES; - if(optname!=MRT_VERSION && optname!=MRT_ASSERT) + if(optname!=MRT_VERSION && +#ifdef CONFIG_IP_PIMSM + optname!=MRT_PIM && +#endif + optname!=MRT_ASSERT) return -ENOPROTOOPT; if(get_user(olr, optlen)) @@ -615,8 +866,12 @@ return -EFAULT; if(optname==MRT_VERSION) val=0x0305; - else +#ifdef CONFIG_IP_PIMSM + else if(optname==MRT_PIM) val=mroute_do_pim; +#endif + else + val=mroute_do_assert; if(copy_to_user(optval,&val,olr)) return -EFAULT; return 0; @@ -628,7 +883,6 @@ int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg) { - int err; struct sioc_sg_req sr; struct sioc_vif_req vr; struct vif_device *vif; @@ -637,8 +891,7 @@ switch(cmd) { case SIOCGETVIFCNT: - err = copy_from_user(&vr,(void *)arg,sizeof(vr)); - if (err) + if (copy_from_user(&vr,(void *)arg,sizeof(vr))) return -EFAULT; if(vr.vifi>=maxvif) return -EINVAL; @@ -649,16 +902,13 @@ vr.ocount=vif->pkt_out; vr.ibytes=vif->bytes_in; vr.obytes=vif->bytes_out; - err = copy_to_user((void *)arg,&vr,sizeof(vr)); - if (err) - err = -EFAULT; - return err; + if (copy_to_user((void *)arg,&vr,sizeof(vr))) + return -EFAULT; return 0; } return -EADDRNOTAVAIL; case SIOCGETSGCNT: - err = copy_from_user(&sr,(void *)arg,sizeof(sr)); - if (err) + if (copy_from_user(&sr,(void *)arg,sizeof(sr))) return -EFAULT; for (c = mfc_cache_array[MFC_HASH(sr.grp.s_addr, sr.src.s_addr)]; c; c = c->next) { @@ -667,10 +917,8 @@ sr.pktcnt = c->mfc_pkt; sr.bytecnt = c->mfc_bytes; sr.wrong_if = c->mfc_wrong_if; - err = copy_to_user((void *)arg,&sr,sizeof(sr)); - if (err) - err = -EFAULT; - return err; + if (copy_to_user((void *)arg,&sr,sizeof(sr))) + return -EFAULT; return 0; } } @@ -691,9 +939,10 @@ /* * Shut down all active vif entries */ - + rtnl_lock(); for(i=0; iflags&VIFF_TUNNEL) && v->u.dev==ptr) + for(ct=0;ctdev==ptr) vif_delete(ct); v++; } @@ -769,26 +1017,24 @@ struct rtable *rt; int encap = 0; struct sk_buff *skb2; - int err; - + +#ifdef CONFIG_IP_PIMSM + if (vif->flags & VIFF_REGISTER) { + vif->pkt_out++; + vif->bytes_out+=skb->len; + ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; + ((struct net_device_stats*)vif->dev->priv)->tx_packets++; + ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); + return; + } +#endif + if (vif->flags&VIFF_TUNNEL) { - rt = vif->u.rt; - if (!rt || rt->u.dst.obsolete) { - ip_rt_put(rt); - vif->u.rt = NULL; - err = ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), NULL); - if (err) - return; - vif->u.rt = rt; - } - dst_clone(&rt->u.dst); + if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link)) + return; encap = sizeof(struct iphdr); } else { - dev = vif->u.dev; - if (dev == NULL) - return; - err = ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), dev); - if (err) + if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link)) return; } @@ -807,10 +1053,14 @@ return; } - if (skb_headroom(skb) < encap || (encap && !last)) + if (skb_headroom(skb) < encap || skb_cloned(skb) || !last) skb2 = skb_realloc_headroom(skb, (encap + 15)&~15); - else + else if (atomic_read(&skb->users) != 1) skb2 = skb_clone(skb, GFP_ATOMIC); + else { + atomic_inc(&skb->users); + skb2 = skb; + } if (skb2 == NULL) { ip_rt_put(rt); @@ -826,34 +1076,45 @@ iph = skb2->nh.iph; ip_decrease_ttl(iph); - if (vif->flags & VIFF_TUNNEL) + if (vif->flags & VIFF_TUNNEL) { ip_encap(skb2, vif->local, vif->remote); + ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; + ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len; + } + + IPCB(skb2)->flags |= IPSKB_FORWARDED; - ip_send(skb2); + /* + * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + * not only before forwarding, but after forwarding on all output + * interfaces. It is clear, if mrouter runs a multicasting + * program, it should receive packets not depending to what interface + * program is joined. + * If we will not make it, the program will have to join on all + * interfaces. On the other hand, multihoming host (or router, but + * not mrouter) cannot join to more than one interface - it will + * result in receiving multiple packets. + */ + ip_ll_header(skb2); + skb2->dst->output(skb2); } -/* - * Multicast packets for forwarding arrive here - */ +int ipmr_find_vif(struct device *dev) +{ + int ct; + for (ct=0; ctflags&IPSKB_TUNNELED; - - cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); - - /* - * No usable cache entry - */ - - if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) { - ipmr_cache_unresolved(cache, ALL_VIFS, skb); - return -EAGAIN; - } vif = cache->mfc_parent; cache->mfc_pkt++; @@ -862,75 +1123,290 @@ /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif >= maxvif || !(vifc_map&(1<vif != vif) || - (!tunneled && (vif_table[vif].flags&VIFF_TUNNEL || - vif_table[vif].u.dev != skb->dev))) { + if (vif_table[vif].dev != skb->dev) { + int true_vifi; + + if (((struct rtable*)skb->dst)->key.iif == 0) { + /* It is our own packet, looped back. + Very complicated situation... + + The best workaround until routing daemons will be + fixed is not to redistribute packet, if it was + send through wrong interface. It means, that + multicast applications WILL NOT work for + (S,G), which have default multicast route pointing + to wrong oif. In any case, it is not a good + idea to use multicasting applications on router. + */ + goto dont_forward; + } + cache->mfc_wrong_if++; - if (vif < MAXVIFS && mroute_do_pim && - !(vif_table[vif].flags&VIFF_TUNNEL) && - skb->dev->flags&IFF_BROADCAST && + true_vifi = ipmr_find_vif(skb->dev); + + if (true_vifi < MAXVIFS && mroute_do_assert && + /* pimsm uses asserts, when switching from RPT to SPT, + so that we cannot check that packet arrived on an oif. + It is bad, but otherwise we would need to move pretty + large chunk of pimd to kernel. Ough... --ANK + */ + (mroute_do_pim || cache->mfc_ttls[true_vifi] < 255) && jiffies - cache->mfc_last_assert > MFC_ASSERT_THRESH) { cache->mfc_last_assert = jiffies; - /* - * It is wrong! Routing daemon can - * determine vif itself, but it cannot - * determine REAL device. - * BSD bug. Fix it later, PIM does not - * work in any case 8) _ANK_ - */ - ipmr_cache_report(skb, vif, 1); + ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); } - kfree_skb(skb, FREE_WRITE); - return -EINVAL; + goto dont_forward; } vif_table[vif].pkt_in++; vif_table[vif].bytes_in+=skb->len; - if (IPCB(skb)->opt.router_alert || - ((struct rtable*)skb->dst)->rt_flags&RTF_LOCAL || - skb->nh.iph->protocol == IPPROTO_IGMP) - local = 1; - /* * Forward the frame */ - ct = cache->mfc_maxvif-1; - while (ct>=cache->mfc_minvif) { - /* - * 0 means don't do it. Silly idea, 255 as don't do it would be cleaner! - */ - if (skb->nh.iph->ttl > cache->mfc_ttls[ct] && cache->mfc_ttls[ct]>0) { + for (ct = cache->mfc_maxvif-1; ct >= cache->mfc_minvif; ct--) { + if (skb->nh.iph->ttl > cache->mfc_ttls[ct]) { if (psend != -1) ipmr_queue_xmit(skb, cache, psend, 0); psend=ct; } - ct--; } if (psend != -1) - ipmr_queue_xmit(skb, cache, psend, 1); + ipmr_queue_xmit(skb, cache, psend, !local); + +dont_forward: + if (!local) + kfree_skb(skb, FREE_WRITE); + return 0; +} + + +/* + * Multicast packets for forwarding arrive here + */ + +int ip_mr_input(struct sk_buff *skb) +{ + struct mfc_cache *cache; + int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; + + /* Packet is looped back after forward, it should not be + forwarded second time, but still can be delivered locally. + */ + if (IPCB(skb)->flags&IPSKB_FORWARDED) + goto dont_forward; + if (!local) { + if (IPCB(skb)->opt.router_alert) { + if (ip_call_ra_chain(skb)) + return 0; + } else if (skb->nh.iph->protocol == IPPROTO_IGMP && mroute_socket) { + /* IGMPv1 (and broken IGMPv2 implementations sort of + Cisco IOS <= 11.2(8)) do not put router alert + option to IGMP packets destined to routable + groups. It is very bad, because it means + that we can forward NO IGMP messages. + */ + raw_rcv(mroute_socket, skb); + return 0; + } + } + + cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr); + + /* + * No usable cache entry + */ + + if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) { + int vif; + + if (local) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + ip_local_deliver(skb); + if (skb2 == NULL) + return -ENOBUFS; + skb = skb2; + } + + vif = ipmr_find_vif(skb->dev); + if (vif != ALL_VIFS) { + ipmr_cache_unresolved(cache, vif, skb); + return -EAGAIN; + } kfree_skb(skb, FREE_READ); return 0; } - return ip_local_deliver(skb); + + ip_mr_forward(skb, cache, local); + + if (local) + return ip_local_deliver(skb); + return 0; + +dont_forward: + if (local) + return ip_local_deliver(skb); + kfree_skb(skb, FREE_READ); + return 0; +} + +#ifdef CONFIG_IP_PIMSM_V1 +/* + * Handle IGMP messages of PIMv1 + */ + +int pim_rcv_v1(struct sk_buff * skb, unsigned short len) +{ + struct igmphdr *pim = (struct igmphdr*)skb->h.raw; + struct iphdr *encap; + + if (!mroute_do_pim || + len < sizeof(*pim) + sizeof(*encap) || + pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER || + reg_dev == NULL) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + + encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr)); + /* + Check that: + a. packet is really destinted to a multicast group + b. packet is not a NULL-REGISTER + c. packet is not truncated + */ + if (!MULTICAST(encap->daddr) || + ntohs(encap->tot_len) == 0 || + ntohs(encap->tot_len) + sizeof(*pim) > len) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + skb_pull(skb, (u8*)encap - skb->data); + skb->nh.iph = (struct iphdr *)skb->data; + skb->dev = reg_dev; + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + skb->protocol = __constant_htons(ETH_P_IP); + skb->ip_summed = 0; + skb->pkt_type = PACKET_HOST; + dst_release(skb->dst); + skb->dst = NULL; + ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; + ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + netif_rx(skb); + return 0; } +#endif -int ip_mr_find_tunnel(u32 local, u32 remote) +#ifdef CONFIG_IP_PIMSM_V2 +int pim_rcv(struct sk_buff * skb, unsigned short len) +{ + struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw; + struct iphdr *encap; + + if (len < sizeof(*pim) + sizeof(*encap) || + pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || + (pim->flags&PIM_NULL_REGISTER) || + reg_dev == NULL || + ip_compute_csum((void *)pim, len)) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + + /* check if the inner packet is destined to mcast group */ + encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr)); + if (!MULTICAST(encap->daddr) || + ntohs(encap->tot_len) == 0 || + ntohs(encap->tot_len) + sizeof(*pim) > len) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + skb_pull(skb, (u8*)encap - skb->data); + skb->nh.iph = (struct iphdr *)skb->data; + skb->dev = reg_dev; + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + skb->protocol = __constant_htons(ETH_P_IP); + skb->ip_summed = 0; + skb->pkt_type = PACKET_HOST; + dst_release(skb->dst); + ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; + ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + skb->dst = NULL; + netif_rx(skb); + return 0; +} +#endif + +#ifdef CONFIG_RTNETLINK + +static int +ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) { int ct; - struct vif_device *vif; + struct rtnexthop *nhp; + struct device *dev = vif_table[c->mfc_parent].dev; - for (ct=0; ctflags&VIFF_TUNNEL && - vif->local == local && vif->remote == remote) - return ct; + if (dev) { + u8 *o = skb->tail; + RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); + rtm->rtm_optlen += skb->tail - o; + } + + for (ct = c->mfc_minvif; ct < c->mfc_maxvif; ct++) { + if (c->mfc_ttls[ct] < 255) { + if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) + goto rtattr_failure; + nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp->rtnh_flags = 0; + nhp->rtnh_hops = c->mfc_ttls[ct]; + nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; + nhp->rtnh_len = sizeof(*nhp); + rtm->rtm_nhs++; + } } - return -1; + rtm->rtm_type = RTN_MULTICAST; + return 1; + +rtattr_failure: + return -EMSGSIZE; } +int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm) +{ + struct mfc_cache *cache; + struct rtable *rt = (struct rtable*)skb->dst; + + start_bh_atomic(); + cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); + if (cache==NULL || (cache->mfc_flags&MFC_QUEUED)) { + struct device *dev = skb->dev; + int vif; + int err; + + if (dev == NULL || (vif = ipmr_find_vif(dev)) == ALL_VIFS) { + end_bh_atomic(); + return -ENODEV; + } + skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); + skb->nh.iph->ihl = sizeof(struct iphdr)>>2; + skb->nh.iph->saddr = rt->rt_src; + skb->nh.iph->daddr = rt->rt_dst; + skb->nh.iph->version = 0; + err = ipmr_cache_unresolved(cache, vif, skb); + end_bh_atomic(); + return err; + } + /* Resolved cache entry is not changed by net bh, + so that we are allowed to enable it. + */ + end_bh_atomic(); + + if (rtm->rtm_flags & RTM_F_NOTIFY) + cache->mfc_flags |= MFC_NOTIFY; + return ipmr_fill_mroute(skb, cache, rtm); +} +#endif + /* * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif */ @@ -945,16 +1421,19 @@ int ct; len += sprintf(buffer, - "Interface Bytes In Pkts In Bytes Out Pkts Out Flags Local Remote\n"); + "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); pos=len; for (ct=0;ctflags&VIFF_TUNNEL ? "Tunnel" : vif->u.dev->name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, + if (vif->dev) + name = vif->dev->name; + size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", + ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags, vif->local, vif->remote); len+=size; pos+=size; @@ -984,7 +1463,7 @@ int ct; len += sprintf(buffer, - "Group Origin SrcIface Pkts Bytes Wrong VifTtls\n"); + "Group Origin Iif Pkts Bytes Wrong Oifs\n"); pos=len; for (ct=0;ctmfc_parent < maxvif && vifc_map&(1<mfc_parent)) { - if (vif_table[mfc->mfc_parent].flags&VIFF_TUNNEL) - name="Tunnel"; - else - name=vif_table[mfc->mfc_parent].u.dev->name; - } + /* * Interface forwarding map */ - size = sprintf(buffer+len, "%08lX %08lX %-8s %8ld %8ld %8ld", + size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld", (unsigned long)mfc->mfc_mcastgrp, (unsigned long)mfc->mfc_origin, - name, + mfc->mfc_parent == ALL_VIFS ? -1 : mfc->mfc_parent, + (mfc->mfc_flags & MFC_QUEUED) ? mfc->mfc_unresolved.qlen : mfc->mfc_pkt, mfc->mfc_bytes, - mfc->mfc_pkt, mfc->mfc_wrong_if); - for(n=0;nmfc_minvif;nmfc_maxvif;n++) { - if(vifc_map&(1<mfc_ttls[n]); - else - size += sprintf(buffer+len+size, " --- "); + if(vifc_map&(1<mfc_ttls[n] < 255) + size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_ttls[n]); } size += sprintf(buffer+len+size, "\n"); len+=size; @@ -1043,6 +1511,10 @@ len-=(offset-begin); if(len>length) len=length; + if (len < 0) { + len = 0; + printk(KERN_CRIT "Yep, guys... our template for proc_*_read is crappy :-)\n"); + } return len; } @@ -1061,6 +1533,19 @@ }; #endif +#ifdef CONFIG_IP_PIMSM_V2 +struct inet_protocol pim_protocol = +{ + pim_rcv, /* PIM handler */ + NULL, /* PIM error control */ + NULL, /* next */ + IPPROTO_PIM, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "PIM" /* name */ +}; +#endif + /* * Setup for IP multicast routing @@ -1068,7 +1553,7 @@ __initfunc(void ip_mr_init(void)) { - printk(KERN_INFO "Linux IP multicast router 0.06.\n"); + printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n"); register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS proc_net_register(&proc_net_ipmr_vif); diff -u --recursive --new-file v2.1.67/linux/net/ipv4/packet.c linux/net/ipv4/packet.c --- v2.1.67/linux/net/ipv4/packet.c Mon Jun 16 16:36:01 1997 +++ linux/net/ipv4/packet.c Wed Dec 31 16:00:00 1969 @@ -1,528 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * PACKET - implements raw packet sockets. - * - * Doesn't belong in IP but it's currently too hooked into ip - * to separate. - * - * Version: @(#)packet.c 1.0.6 05/25/93 - * - * Authors: Ross Biro, - * Fred N. van Kempen, - * Alan Cox, - * - * Fixes: - * Alan Cox : verify_area() now used correctly - * Alan Cox : new skbuff lists, look ma no backlogs! - * Alan Cox : tidied skbuff lists. - * Alan Cox : Now uses generic datagram routines I - * added. Also fixed the peek/read crash - * from all old Linux datagram code. - * Alan Cox : Uses the improved datagram code. - * Alan Cox : Added NULL's for socket options. - * Alan Cox : Re-commented the code. - * Alan Cox : Use new kernel side addressing - * Rob Janssen : Correct MTU usage. - * Dave Platt : Counter leaks caused by incorrect - * interrupt locking and some slightly - * dubious gcc output. Can you read - * compiler: it said _VOLATILE_ - * Richard Kooijman : Timestamp fixes. - * Alan Cox : New buffers. Use sk->mac.raw. - * Alan Cox : sendmsg/recvmsg support. - * Alan Cox : Protocol setting support - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* - * This should be the easiest of all, all we do is copy it into a buffer. - */ - -int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) -{ - struct sock *sk; - - /* - * When we registered the protocol we saved the socket in the data - * field for just this event. - */ - - sk = (struct sock *) pt->data; - - /* - * Yank back the headers [hope the device set this - * right or kerboom...] - */ - - skb_push(skb,skb->data-skb->mac.raw); - - /* - * The SOCK_PACKET socket receives _all_ frames. - */ - - skb->dev = dev; - - /* - * Charge the memory to the socket. This is done specifically - * to prevent sockets using all the memory up. - */ - - if(sock_queue_rcv_skb(sk,skb)<0) - { - kfree_skb(skb, FREE_READ); - return 0; - } - /* - * Processing complete. - */ - - return(0); -} - - -/* - * Output a raw packet to a device layer. This bypasses all the other - * protocol layers and you must therefore supply it with a complete frame - */ - -static int packet_sendmsg(struct sock *sk, struct msghdr *msg, int len) -{ - struct sk_buff *skb; - struct device *dev; - struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name; - unsigned short proto=0; - int err; - - /* - * Check the flags. - */ - - if (msg->msg_flags&~MSG_DONTWAIT) - return(-EINVAL); - - /* - * Get and verify the address. - */ - - if (saddr) - { - if (msg->msg_namelen < sizeof(struct sockaddr)) - return(-EINVAL); - if (msg->msg_namelen==sizeof(struct sockaddr_pkt)) - proto=saddr->spkt_protocol; - } - else - return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */ - - /* - * Find the device first to size check it - */ - - saddr->spkt_device[13] = 0; - dev = dev_get(saddr->spkt_device); - if (dev == NULL) - { - return(-ENODEV); - } - - /* - * You may not queue a frame bigger than the mtu. This is the lowest level - * raw protocol and you must do your own fragmentation at this level. - */ - - if(len>dev->mtu+dev->hard_header_len) - return -EMSGSIZE; - - skb = sock_wmalloc(sk, len+dev->hard_header_len, 0, GFP_KERNEL); - - /* - * If the write buffer is full, then tough. At this level the user gets to - * deal with the problem - do your own algorithmic backoffs. That's far - * more flexible. - */ - - if (skb == NULL) - { - return(-ENOBUFS); - } - - /* - * Fill it in - */ - - /* FIXME: Save some space for broken drivers that write a - * hard header at transmission time by themselves. PPP is the - * notable one here. This should really be fixed at the driver level. - */ - skb_reserve(skb,dev->hard_header_len); - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); - skb->arp = 1; /* No ARP needs doing on this (complete) frame */ - skb->protocol = proto; - skb->dev = dev; - skb->priority = sk->priority; - - /* - * Now send it - */ - - if (err) - { - err = -EFAULT; - } - else - { - if (!(dev->flags & IFF_UP)) - { - err = -ENODEV; - } - } - - if (err) - { - kfree_skb(skb, FREE_WRITE); - return err; - } - - dev_queue_xmit(skb); - return(len); -} - -/* - * Close a SOCK_PACKET socket. This is fairly simple. We immediately go - * to 'closed' state and remove our protocol entry in the device list. - * The release_sock() will destroy the socket if a user has closed the - * file side of the object. - */ - -static void packet_close(struct sock *sk, unsigned long timeout) -{ - /* - * Stop more data and kill the socket off. - */ - - lock_sock(sk); - sk->state = TCP_CLOSE; - - /* - * Unhook the notifier - */ - - unregister_netdevice_notifier(&sk->protinfo.af_packet.notifier); - - if(sk->protinfo.af_packet.prot_hook) - { - /* - * Remove the protocol hook - */ - - dev_remove_pack((struct packet_type *)sk->protinfo.af_packet.prot_hook); - - /* - * Dispose of litter carefully. - */ - - kfree_s((void *)sk->protinfo.af_packet.prot_hook, sizeof(struct packet_type)); - sk->protinfo.af_packet.prot_hook = NULL; - } - - release_sock(sk); - sk->dead = 1; - destroy_sock(sk); -} - -/* - * Attach a packet hook to a device. - */ - -int packet_attach(struct sock *sk, struct device *dev) -{ - struct packet_type *p = (struct packet_type *) kmalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) - return(-ENOMEM); - - p->func = packet_rcv; - p->type = sk->num; - p->data = (void *)sk; - p->dev = dev; - dev_add_pack(p); - - /* - * We need to remember this somewhere. - */ - - sk->protinfo.af_packet.prot_hook = p; - sk->protinfo.af_packet.bound_dev = dev; - return 0; -} - -/* - * Bind a packet socket to a device - */ - -static int packet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) -{ - char name[15]; - struct device *dev; - - /* - * Check legality - */ - - if(addr_len!=sizeof(struct sockaddr)) - return -EINVAL; - strncpy(name,uaddr->sa_data,14); - name[14]=0; - - /* - * Lock the device chain while we sanity check - * the bind request. - */ - - dev_lock_list(); - dev=dev_get(name); - if(dev==NULL) - { - dev_unlock_list(); - return -ENODEV; - } - - if(!(dev->flags&IFF_UP)) - { - dev_unlock_list(); - return -ENETDOWN; - } - - /* - * Perform the request. - */ - - memcpy(sk->protinfo.af_packet.device_name,name,15); - - /* - * Rewrite an existing hook if present. - */ - - if(sk->protinfo.af_packet.prot_hook) - { - dev_remove_pack(sk->protinfo.af_packet.prot_hook); - sk->protinfo.af_packet.prot_hook->dev=dev; - sk->protinfo.af_packet.bound_dev=dev; - dev_add_pack(sk->protinfo.af_packet.prot_hook); - } - else - { - int err=packet_attach(sk, dev); - if(err) - { - dev_unlock_list(); - return err; - } - } - /* - * Now the notifier is set up right this lot is safe. - */ - dev_unlock_list(); - return 0; -} - -/* - * This hook is called when a device goes up or down so that - * SOCK_PACKET sockets can come unbound properly. - */ - -static int packet_unbind(struct notifier_block *this, unsigned long msg, void *data) -{ - struct inet_packet_opt *ipo=(struct inet_packet_opt *)this; - if(msg==NETDEV_DOWN && data==ipo->bound_dev) - { - /* - * Our device has gone down. - */ - ipo->bound_dev=NULL; - dev_remove_pack(ipo->prot_hook); - kfree(ipo->prot_hook); - ipo->prot_hook=NULL; - } - return NOTIFY_DONE; -} - - -/* - * Create a packet of type SOCK_PACKET. - */ - -static int packet_init(struct sock *sk) -{ - /* - * Attach a protocol block - */ - - int err=packet_attach(sk, NULL); - if(err) - return err; - - /* - * Set up the per socket notifier. - */ - - sk->protinfo.af_packet.notifier.notifier_call=packet_unbind; - sk->protinfo.af_packet.notifier.priority=0; - - register_netdevice_notifier(&sk->protinfo.af_packet.notifier); - - return(0); -} - - -/* - * Pull a packet from our receive queue and hand it to the user. - * If necessary we block. - */ - -int packet_recvmsg(struct sock *sk, struct msghdr *msg, int len, - int noblock, int flags,int *addr_len) -{ - int copied=0; - struct sk_buff *skb; - struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name; - int err; - - /* - * If there is no protocol hook then the device is down. - */ - - if(sk->protinfo.af_packet.prot_hook==NULL) - return -ENETDOWN; - - /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - if (addr_len) - *addr_len=sizeof(*saddr); - - /* - * Call the generic datagram receiver. This handles all sorts - * of horrible races and re-entrancy so we can forget about it - * in the protocol layers. - */ - - skb=skb_recv_datagram(sk,flags,noblock,&err); - - /* - * An error occurred so return it. Because skb_recv_datagram() - * handles the blocking we don't see and worry about blocking - * retries. - */ - - if(skb==NULL) - return err; - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. - */ - - copied = skb->len; - if(copied>len) - { - copied=len; - msg->msg_flags|=MSG_TRUNC; - } - - /* We can't use skb_copy_datagram here */ - err = memcpy_toiovec(msg->msg_iov, skb->data, copied); - if (err) - { - return -EFAULT; - } - - sk->stamp=skb->stamp; - - /* - * Copy the address. - */ - - if (saddr) - { - saddr->spkt_family = skb->dev->type; - strncpy(saddr->spkt_device,skb->dev->name, 15); - saddr->spkt_protocol = skb->protocol; - } - - /* - * Free or return the buffer as appropriate. Again this hides all the - * races and re-entrancy issues from us. - */ - - skb_free_datagram(sk, skb); - - return(copied); -} - -/* - * This structure declares to the lower layer socket subsystem currently - * incorrectly embedded in the IP code how to behave. This interface needs - * a lot of work and will change. - */ - -struct proto packet_prot = -{ - (struct sock *)&packet_prot, /* sklist_next */ - (struct sock *)&packet_prot, /* sklist_prev */ - packet_close, /* close */ - NULL, /* connect */ - NULL, /* accept */ - NULL, /* retransmit */ - NULL, /* write_wakeup */ - NULL, /* read_wakeup */ - datagram_poll, /* poll */ - NULL, /* ioctl */ - packet_init, /* init */ - NULL, /* destroy */ - NULL, /* shutdown */ - NULL, /* setsockopt */ - NULL, /* getsockopt */ - packet_sendmsg, /* Sendmsg */ - packet_recvmsg, /* Recvmsg */ - packet_bind, /* bind */ - NULL, /* backlog_rcv */ - NULL, /* hash */ - NULL, /* unhash */ - NULL, /* rehash */ - NULL, /* good_socknum */ - NULL, /* verify_bind */ - 128, /* max_header */ - 0, /* retransmits */ - "PACKET", /* name */ - 0, /* inuse */ - 0 /* highestinuse */ -}; diff -u --recursive --new-file v2.1.67/linux/net/ipv4/proc.c linux/net/ipv4/proc.c --- v2.1.67/linux/net/ipv4/proc.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/proc.c Sun Nov 30 14:00:39 1997 @@ -7,7 +7,7 @@ * PROC file system. It is mainly used for debugging and * statistics. * - * Version: @(#)proc.c 1.0.5 05/27/93 + * Version: $Id: proc.c,v 1.23 1997/10/30 23:52:20 davem Exp $ * * Authors: Fred N. van Kempen, * Gerald J. Heim, @@ -221,7 +221,6 @@ { /* From net/socket.c */ extern int socket_get_info(char *, char **, off_t, int); - extern struct proto packet_prot; int len = socket_get_info(buffer,start,offset,length); @@ -231,8 +230,6 @@ udp_prot.inuse, udp_prot.highestinuse); len += sprintf(buffer+len,"RAW: inuse %d highest %d\n", raw_prot.inuse, raw_prot.highestinuse); - len += sprintf(buffer+len,"PAC: inuse %d highest %d\n", - packet_prot.inuse, packet_prot.highestinuse); if (offset >= len) { *start = buffer; @@ -291,14 +288,15 @@ icmp_statistics.IcmpOutAddrMasks, icmp_statistics.IcmpOutAddrMaskReps); len += sprintf (buffer + len, - "Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs\n" - "Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + "Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts\n" + "Tcp: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", tcp_statistics.TcpRtoAlgorithm, tcp_statistics.TcpRtoMin, tcp_statistics.TcpRtoMax, tcp_statistics.TcpMaxConn, tcp_statistics.TcpActiveOpens, tcp_statistics.TcpPassiveOpens, tcp_statistics.TcpAttemptFails, tcp_statistics.TcpEstabResets, tcp_statistics.TcpCurrEstab, tcp_statistics.TcpInSegs, - tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs); + tcp_statistics.TcpOutSegs, tcp_statistics.TcpRetransSegs, + tcp_statistics.TcpInErrs, tcp_statistics.TcpOutRsts); len += sprintf (buffer + len, "Udp: InDatagrams NoPorts InErrors OutDatagrams\nUdp: %lu %lu %lu %lu\n", diff -u --recursive --new-file v2.1.67/linux/net/ipv4/protocol.c linux/net/ipv4/protocol.c --- v2.1.67/linux/net/ipv4/protocol.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/protocol.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * INET protocol dispatch tables. * - * Version: @(#)protocol.c 1.0.5 05/25/93 + * Version: $Id: protocol.c,v 1.9 1997/10/29 20:27:34 kuznet Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -45,20 +45,23 @@ #include #include +#define IPPROTO_PREVIOUS NULL -#ifdef CONFIG_NET_IPIP +#ifdef CONFIG_IP_MULTICAST -static struct inet_protocol ipip_protocol = +static struct inet_protocol igmp_protocol = { - ipip_rcv, /* IPIP handler */ - ipip_err, /* TUNNEL error control */ - 0, /* next */ - IPPROTO_IPIP, /* protocol ID */ - 0, /* copy */ - NULL, /* data */ - "IPIP" /* name */ + igmp_rcv, /* IGMP handler */ + NULL, /* IGMP error control */ + IPPROTO_PREVIOUS, /* next */ + IPPROTO_IGMP, /* protocol ID */ + 0, /* copy */ + NULL, /* data */ + "IGMP" /* name */ }; +#undef IPPROTO_PREVIOUS +#define IPPROTO_PREVIOUS &igmp_protocol #endif @@ -66,52 +69,47 @@ { tcp_v4_rcv, /* TCP handler */ tcp_v4_err, /* TCP error control */ -#ifdef CONFIG_NET_IPIP - &ipip_protocol, -#else - NULL, /* next */ -#endif + IPPROTO_PREVIOUS, IPPROTO_TCP, /* protocol ID */ 0, /* copy */ NULL, /* data */ "TCP" /* name */ }; +#undef IPPROTO_PREVIOUS +#define IPPROTO_PREVIOUS &tcp_protocol + static struct inet_protocol udp_protocol = { udp_rcv, /* UDP handler */ udp_err, /* UDP error control */ - &tcp_protocol, /* next */ + IPPROTO_PREVIOUS, /* next */ IPPROTO_UDP, /* protocol ID */ 0, /* copy */ NULL, /* data */ "UDP" /* name */ }; +#undef IPPROTO_PREVIOUS +#define IPPROTO_PREVIOUS &udp_protocol + static struct inet_protocol icmp_protocol = { icmp_rcv, /* ICMP handler */ NULL, /* ICMP error control */ - &udp_protocol, /* next */ + IPPROTO_PREVIOUS, /* next */ IPPROTO_ICMP, /* protocol ID */ 0, /* copy */ NULL, /* data */ "ICMP" /* name */ }; -static struct inet_protocol igmp_protocol = -{ - igmp_rcv, /* IGMP handler */ - NULL, /* IGMP error control */ - &icmp_protocol, /* next */ - IPPROTO_IGMP, /* protocol ID */ - 0, /* copy */ - NULL, /* data */ - "IGMP" /* name */ -}; +#undef IPPROTO_PREVIOUS +#define IPPROTO_PREVIOUS &icmp_protocol + -struct inet_protocol *inet_protocol_base = &igmp_protocol; +struct inet_protocol *inet_protocol_base = IPPROTO_PREVIOUS; struct inet_protocol *inet_protos[MAX_INET_PROTOS] = { diff -u --recursive --new-file v2.1.67/linux/net/ipv4/rarp.c linux/net/ipv4/rarp.c --- v2.1.67/linux/net/ipv4/rarp.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/rarp.c Sun Nov 30 14:00:39 1997 @@ -3,6 +3,8 @@ * Copyright (C) 1994 by Ross Martin * Based on linux/net/inet/arp.c, Copyright (C) 1994 by Florian La Roche * + * $Id: rarp.c,v 1.21 1997/10/27 09:13:16 geert Exp $ + * * This module implements the Reverse Address Resolution Protocol * (RARP, RFC 903), which is used to convert low level addresses such * as ethernet addresses into high level addresses such as IP addresses. @@ -119,20 +121,20 @@ struct rarp_table *entry; struct rarp_table **pentry; - cli(); + start_bh_atomic(); pentry = &rarp_tables; while ((entry = *pentry) != NULL) { if (entry->ip == ip_addr) { *pentry = entry->next; - sti(); + end_bh_atomic(); rarp_release_entry(entry); return; } pentry = &entry->next; } - sti(); + end_bh_atomic(); } /* @@ -144,7 +146,7 @@ struct rarp_table *entry; struct rarp_table **pentry; - cli(); + start_bh_atomic(); pentry = &rarp_tables; while ((entry = *pentry) != NULL) { @@ -156,7 +158,7 @@ else pentry = &entry->next; } - sti(); + end_bh_atomic(); } static int rarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -176,6 +178,8 @@ NULL, 0 }; + +static int rarp_pkt_inited=0; static void rarp_init_pkt (void) { @@ -183,8 +187,19 @@ rarp_packet_type.type=htons(ETH_P_RARP); dev_add_pack(&rarp_packet_type); register_netdevice_notifier(&rarp_dev_notifier); + rarp_pkt_inited=1; } +static void rarp_end_pkt(void) +{ + if(!rarp_pkt_inited) + return; + dev_remove_pack(&rarp_packet_type); + unregister_netdevice_notifier(&rarp_dev_notifier); + rarp_pkt_inited=0; +} + + /* * Receive an arp request by the device layer. Maybe it should be * rewritten to use the incoming packet for the reply. The current @@ -199,6 +214,7 @@ struct arphdr *rarp = (struct arphdr *) skb->data; unsigned char *rarp_ptr = skb_pull(skb,sizeof(struct arphdr)); struct rarp_table *entry; + struct in_device *in_dev = dev->ip_ptr; long sip,tip; unsigned char *sha,*tha; /* s for "source", t for "target" */ @@ -207,7 +223,7 @@ */ if (rarp->ar_hln != dev->addr_len || dev->type != ntohs(rarp->ar_hrd) - || dev->flags&IFF_NOARP) + || dev->flags&IFF_NOARP || !in_dev || !in_dev->ifa_list) { kfree_skb(skb, FREE_READ); return 0; @@ -256,7 +272,6 @@ * Process entry. Use tha for table lookup according to RFC903. */ - cli(); for (entry = rarp_tables; entry != NULL; entry = entry->next) if (!memcmp(entry->ha, tha, rarp->ar_hln)) break; @@ -264,13 +279,10 @@ if (entry != NULL) { sip=entry->ip; - sti(); - arp_send(ARPOP_RREPLY, ETH_P_RARP, sip, dev, dev->pa_addr, sha, + arp_send(ARPOP_RREPLY, ETH_P_RARP, sip, dev, in_dev->ifa_list->ifa_address, sha, dev->dev_addr, sha); } - else - sti(); kfree_skb(skb, FREE_READ); return 0; @@ -331,10 +343,10 @@ * Is it reachable directly ? */ - err = ip_route_output(&rt, ip, 0, 1, NULL); + err = ip_route_output(&rt, ip, 0, 1, 0); if (err) return err; - if (rt->rt_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT)) { + if (rt->rt_flags&(RTCF_LOCAL|RTCF_BROADCAST|RTCF_MULTICAST|RTCF_DNAT)) { ip_rt_put(rt); return -EINVAL; } @@ -344,7 +356,6 @@ * Is there an existing entry for this address? Find out... */ - cli(); for (entry = rarp_tables; entry != NULL; entry = entry->next) if (entry->ip == ip) break; @@ -359,7 +370,6 @@ GFP_ATOMIC); if (entry == NULL) { - sti(); return -ENOMEM; } if (initflag) @@ -368,21 +378,23 @@ initflag=0; } + /* Block interrupts until table modification is finished */ + + cli(); entry->next = rarp_tables; rarp_tables = entry; } - + cli(); entry->ip = ip; entry->hlen = hlen; entry->htype = htype; memcpy(&entry->ha, &r.arp_ha.sa_data, hlen); entry->dev = dev; + sti(); /* Don't unlink if we have entries to serve. */ MOD_INC_USE_COUNT; - sti(); - return 0; } @@ -417,14 +429,12 @@ si = (struct sockaddr_in *) &r.arp_pa; ip = si->sin_addr.s_addr; - cli(); for (entry = rarp_tables; entry != NULL; entry = entry->next) if (entry->ip == ip) break; if (entry == NULL) { - sti(); return -ENXIO; } @@ -434,7 +444,6 @@ memcpy(r.arp_ha.sa_data, &entry->ha, entry->hlen); r.arp_ha.sa_family = entry->htype; - sti(); /* * Copy the information back @@ -483,6 +492,7 @@ return 0; } +#ifdef CONFIG_PROC_FS int rarp_get_info(char *buffer, char **start, off_t offset, int length, int dummy) { int len=0; @@ -505,7 +515,6 @@ pos+=size; len+=size; - cli(); for(entry=rarp_tables; entry!=NULL; entry=entry->next) { netip=htonl(entry->ip); /* switch to network order */ @@ -537,7 +546,6 @@ if(pos>offset+length) break; } - sti(); } *start = buffer+(offset-begin); /* Start of wanted data */ @@ -553,11 +561,14 @@ 0, &proc_net_inode_operations, rarp_get_info }; +#endif __initfunc(void rarp_init(void)) { +#ifdef CONFIG_PROC_FS proc_net_register(&proc_net_rarp); +#endif rarp_ioctl_hook = rarp_ioctl; } @@ -572,7 +583,9 @@ void cleanup_module(void) { struct rarp_table *rt, *rt_next; +#ifdef CONFIG_PROC_FS proc_net_unregister(PROC_NET_RARP); +#endif rarp_ioctl_hook = NULL; cli(); /* Destroy the RARP-table */ @@ -584,5 +597,6 @@ rt_next = rt->next; rarp_release_entry(rt); } + rarp_end_pkt(); } #endif diff -u --recursive --new-file v2.1.67/linux/net/ipv4/raw.c linux/net/ipv4/raw.c --- v2.1.67/linux/net/ipv4/raw.c Thu Jun 26 12:33:41 1997 +++ linux/net/ipv4/raw.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * RAW - implementation of IP "raw" sockets. * - * Version: @(#)raw.c 1.0.4 05/25/93 + * Version: $Id: raw.c,v 1.32 1997/10/24 17:16:00 kuznet Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -126,7 +126,7 @@ /* Grumble... icmp and ip_input want to get at this... */ struct sock *raw_v4_lookup(struct sock *sk, unsigned short num, - unsigned long raddr, unsigned long laddr) + unsigned long raddr, unsigned long laddr, int dif) { struct sock *s = sk; @@ -135,7 +135,8 @@ if((s->num == num) && !(s->dead && (s->state == TCP_CLOSE)) && !(s->daddr && s->daddr != raddr) && - !(s->rcv_saddr && s->rcv_saddr != laddr)) + !(s->rcv_saddr && s->rcv_saddr != laddr) && + !(s->bound_dev_if && s->bound_dev_if != dif)) break; /* gotcha */ } SOCKHASH_UNLOCK(); @@ -203,7 +204,7 @@ struct rawfakehdr { - const unsigned char *from; + struct iovec *iov; u32 saddr; }; @@ -218,7 +219,7 @@ static int raw_getfrag(const void *p, char *to, unsigned int offset, unsigned int fraglen) { struct rawfakehdr *rfh = (struct rawfakehdr *) p; - return copy_from_user(to, rfh->from + offset, fraglen); + return memcpy_fromiovecend(to, rfh->iov, offset, fraglen); } /* @@ -229,8 +230,9 @@ { struct rawfakehdr *rfh = (struct rawfakehdr *) p; - if (copy_from_user(to, rfh->from + offset, fraglen)) + if (memcpy_fromiovecend(to, rfh->iov, offset, fraglen)) return -EFAULT; + if (offset==0) { struct iphdr *iph = (struct iphdr *)to; if (!iph->saddr) @@ -249,10 +251,8 @@ return 0; } -static int raw_sendto(struct sock *sk, const unsigned char *from, - int len, struct msghdr *msg) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len) { - struct device *dev = NULL; struct ipcm_cookie ipc; struct rawfakehdr rfh; struct rtable *rt; @@ -302,9 +302,10 @@ ipc.addr = sk->saddr; ipc.opt = NULL; + ipc.oif = sk->bound_dev_if; if (msg->msg_controllen) { - int tmp = ip_cmsg_send(msg, &ipc, &dev); + int tmp = ip_cmsg_send(msg, &ipc); if (tmp) return tmp; if (ipc.opt && sk->ip_hdrincl) { @@ -327,23 +328,27 @@ } tos = RT_TOS(sk->ip_tos) | (sk->localroute || (msg->msg_flags&MSG_DONTROUTE)); - if (MULTICAST(daddr) && sk->ip_mc_index && dev==NULL) - err = ip_route_output_dev(&rt, daddr, rfh.saddr, tos, sk->ip_mc_index); - else - err = ip_route_output(&rt, daddr, rfh.saddr, tos, dev); + if (MULTICAST(daddr)) { + if (!ipc.oif) + ipc.oif = sk->ip_mc_index; + if (!rfh.saddr) + rfh.saddr = sk->ip_mc_addr; + } + + err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif); if (err) { if (free) kfree(ipc.opt); return err; } - if (rt->rt_flags&RTF_BROADCAST && !sk->broadcast) { + if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) { if (free) kfree(ipc.opt); ip_rt_put(rt); return -EACCES; } - rfh.from = from; + rfh.iov = msg->msg_iov; rfh.saddr = rt->rt_src; if (!ipc.addr) ipc.addr = rt->rt_dst; @@ -363,56 +368,10 @@ return err<0 ? err : len; } -/* - * Temporary - */ - -static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len) -{ - if (msg->msg_iovlen==1) - return raw_sendto(sk, msg->msg_iov[0].iov_base,len, msg); - else { - /* - * For awkward cases we linearise the buffer first. In theory this is only frames - * whose iovec's don't split on 4 byte boundaries, and soon encrypted stuff (to keep - * skip happy). We are a bit more general about it. - */ - - unsigned char *buf; - int err; - if(len>65515) - return -EMSGSIZE; - buf=kmalloc(len, GFP_KERNEL); - if(buf==NULL) - return -ENOBUFS; - err = memcpy_fromiovec(buf, msg->msg_iov, len); - if (!err) - { - unsigned long fs; - fs=get_fs(); - set_fs(get_ds()); - err=raw_sendto(sk,buf,len, msg); - set_fs(fs); - } - else - err = -EFAULT; - - kfree_s(buf,len); - return err; - } -} - static void raw_close(struct sock *sk, unsigned long timeout) { sk->state = TCP_CLOSE; -#ifdef CONFIG_IP_MROUTE - if(sk==mroute_socket) - { - ipv4_config.multicast_route = 0; - mroute_close(sk); - mroute_socket=NULL; - } -#endif + ip_ra_control(sk, 0, NULL); sk->dead=1; destroy_sock(sk); } @@ -425,17 +384,17 @@ if((sk->state != TCP_CLOSE) || (addr_len < sizeof(struct sockaddr_in))) return -EINVAL; - chk_addr_ret = __ip_chk_addr(addr->sin_addr.s_addr); - if(addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && - chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) { + chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + if(addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) { #ifdef CONFIG_IP_TRANSPARENT_PROXY /* Superuser may bind to any address to allow transparent proxying. */ - if(!suser()) + if(chk_addr_ret != RTN_UNICAST || !suser()) #endif return -EADDRNOTAVAIL; } sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; - if(chk_addr_ret == IS_MULTICAST || chk_addr_ret == IS_BROADCAST) + if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) sk->saddr = 0; /* Use device */ dst_release(sk->dst_cache); sk->dst_cache = NULL; @@ -448,7 +407,7 @@ */ int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len, - int noblock, int flags,int *addr_len) + int noblock, int flags,int *addr_len) { int copied=0; struct sk_buff *skb; @@ -500,6 +459,75 @@ return err ? err : (copied); } +static int raw_init(struct sock *sk) +{ + struct raw_opt *tp = &(sk->tp_pinfo.tp_raw4); + if (sk->num == IPPROTO_ICMP) { + memset(&tp->filter, 0, sizeof(tp->filter)); + + /* By default block ECHO and TIMESTAMP requests */ + + set_bit(ICMP_ECHO, &tp->filter); + set_bit(ICMP_TIMESTAMP, &tp->filter); + } + return 0; +} + +static int raw_seticmpfilter(struct sock *sk, char *optval, int optlen) +{ + if (optlen > sizeof(struct icmp_filter)) + optlen = sizeof(struct icmp_filter); + if (copy_from_user(&sk->tp_pinfo.tp_raw4.filter, optval, optlen)) + return -EFAULT; + return 0; +} + +static int raw_geticmpfilter(struct sock *sk, char *optval, int *optlen) +{ + int len; + + if (get_user(len,optlen)) + return -EFAULT; + if (len > sizeof(struct icmp_filter)) + len = sizeof(struct icmp_filter); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &sk->tp_pinfo.tp_raw4.filter, len)) + return -EFAULT; + return 0; +} + +static int raw_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + if (level != SOL_RAW) + return ip_setsockopt(sk, level, optname, optval, optlen); + + switch (optname) { + case ICMP_FILTER: + if (sk->num != IPPROTO_ICMP) + return -EOPNOTSUPP; + return raw_seticmpfilter(sk, optval, optlen); + }; + + return -ENOPROTOOPT; +} + +static int raw_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + if (level != SOL_RAW) + return ip_getsockopt(sk, level, optname, optval, optlen); + + switch (optname) { + case ICMP_FILTER: + if (sk->num != IPPROTO_ICMP) + return -EOPNOTSUPP; + return raw_geticmpfilter(sk, optval, optlen); + }; + + return -ENOPROTOOPT; +} struct proto raw_prot = { (struct sock *)&raw_prot, /* sklist_next */ @@ -516,11 +544,11 @@ #else NULL, /* ioctl */ #endif - NULL, /* init */ + raw_init, /* init */ NULL, /* destroy */ NULL, /* shutdown */ - ip_setsockopt, /* setsockopt */ - ip_getsockopt, /* getsockopt */ + raw_setsockopt, /* setsockopt */ + raw_getsockopt, /* getsockopt */ raw_sendmsg, /* sendmsg */ raw_recvmsg, /* recvmsg */ raw_bind, /* bind */ diff -u --recursive --new-file v2.1.67/linux/net/ipv4/route.c linux/net/ipv4/route.c --- v2.1.67/linux/net/ipv4/route.c Thu Jun 26 12:33:41 1997 +++ linux/net/ipv4/route.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * ROUTE - implementation of the IP router. * - * Version: @(#)route.c 1.0.14 05/31/93 + * Version: $Id: route.c,v 1.33 1997/10/24 17:16:08 kuznet Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -68,27 +68,27 @@ #include #include #include -#include #include #include -#include +#include +#include +#include +#include +#include +#include #include +#include #include +#include +#include #include #include -#include -#include #include -#include - -/* Compile time configuretion flags */ -#define CONFIG_IP_LOCAL_RT_POLICY 1 +#define RTprint(a...) printk(KERN_DEBUG a) -static void rt_run_flush(unsigned long); - static struct timer_list rt_flush_timer = - { NULL, NULL, RT_FLUSH_DELAY, 0L, rt_run_flush }; + { NULL, NULL, RT_FLUSH_DELAY, 0L, NULL }; /* * Interface to generic destination cache. @@ -108,6 +108,24 @@ ipv4_dst_destroy }; +__u8 ip_tos2prio[16] = { + TC_PRIO_FILLER, + TC_PRIO_BESTEFFORT, + TC_PRIO_FILLER, + TC_PRIO_FILLER, + TC_PRIO_BULK, + TC_PRIO_FILLER, + TC_PRIO_BULK, + TC_PRIO_FILLER, + TC_PRIO_INTERACTIVE, + TC_PRIO_FILLER, + TC_PRIO_INTERACTIVE, + TC_PRIO_FILLER, + TC_PRIO_INTERACTIVE_BULK, + TC_PRIO_FILLER, + TC_PRIO_INTERACTIVE_BULK, + TC_PRIO_FILLER +}; /* * Route cache. @@ -162,8 +180,10 @@ r->u.dst.dev ? r->u.dst.dev->name : "*", (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, - r->rt_flags, atomic_read(&r->u.dst.refcnt), - atomic_read(&r->u.dst.use), 0, + r->rt_flags, + atomic_read(&r->u.dst.use), + atomic_read(&r->u.dst.refcnt), + 0, (unsigned long)r->rt_src, (int)r->u.dst.pmtu, r->u.dst.window, (int)r->u.dst.rtt, r->key.tos, @@ -202,8 +222,6 @@ struct rtable *rth, **rthp; unsigned long now = jiffies; - start_bh_atomic(); - for (i=0; iu.dst.lastuse - rth->u.dst.lastuse > RT_CACHE_BUBBLE_THRESHOLD || (rth->u.dst.lastuse - rth_next->u.dst.lastuse < 0 && - atomic_read(&rth->u.dst.use) < atomic_read(&rth_next->u.dst.use))) { + atomic_read(&rth->u.dst.refcnt) < atomic_read(&rth_next->u.dst.refcnt))) { #if RT_CACHE_DEBUG >= 2 printk("rt_check_expire bubbled %02x@%08x<->%08x\n", rover, rth->rt_dst, rth_next->rt_dst); #endif *rthp = rth_next; rth->u.rt_next = rth_next->u.rt_next; rth_next->u.rt_next = rth; - sti(); rthp = &rth_next->u.rt_next; continue; } rthp = &rth->u.rt_next; } } - - end_bh_atomic(); } - - -void rt_cache_flush(int how) -{ - start_bh_atomic(); - if (rt_flush_timer.expires) { - if (jiffies - rt_flush_timer.expires > 0 || - rt_flush_timer.expires - jiffies > RT_FLUSH_DELAY/2) - how = 1; - } - if (how) { - if (rt_flush_timer.expires) - del_timer(&rt_flush_timer); - rt_flush_timer.expires = 0; - end_bh_atomic(); - rt_run_flush(0); - return; - } - if (rt_flush_timer.expires) { - end_bh_atomic(); - return; - } - del_timer(&rt_flush_timer); - rt_flush_timer.expires = jiffies + RT_FLUSH_DELAY; - add_timer(&rt_flush_timer); - end_bh_atomic(); -} - -void rt_run_flush(unsigned long dummy) + +static void rt_run_flush(unsigned long dummy) { int i; struct rtable * rth, * next; @@ -313,6 +294,30 @@ #endif } } + +void rt_cache_flush(int delay) +{ + start_bh_atomic(); + if (delay && rt_flush_timer.function && + rt_flush_timer.expires - jiffies < delay) { + end_bh_atomic(); + return; + } + if (rt_flush_timer.function) { + del_timer(&rt_flush_timer); + rt_flush_timer.function = NULL; + } + if (delay == 0) { + end_bh_atomic(); + rt_run_flush(0); + return; + } + rt_flush_timer.function = rt_run_flush; + rt_flush_timer.expires = jiffies + delay; + add_timer(&rt_flush_timer); + end_bh_atomic(); +} + static void rt_garbage_collect(void) { @@ -327,7 +332,7 @@ /* * Garbage collection is pretty expensive, - * do not make it too frequently. + * do not make it too frequently, but just increase expire strength. */ if (now - last_gc < 1*HZ) { expire >>= 1; @@ -342,7 +347,7 @@ continue; for (rthp=&rt_hash_table[i]; (rth=*rthp); rthp=&rth->u.rt_next) { if (atomic_read(&rth->u.dst.use) || - (now - rth->u.dst.lastuse > expire)) + now - rth->u.dst.lastuse < expire) continue; atomic_dec(&rt_cache_size); *rthp = rth->u.rt_next; @@ -465,115 +470,94 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, u32 saddr, u8 tos, struct device *dev) { - int i; - int off_link = 0; - struct fib_info *fi; + int i, k; + struct in_device *in_dev = dev->ip_ptr; struct rtable *rth, **rthp; - u32 skeys[2] = { saddr, 0, }; - struct device *pdev = net_alias_main_dev(dev); + u32 skeys[2] = { saddr, 0 }; + int ikeys[2] = { dev->ifindex, 0 }; tos &= IPTOS_TOS_MASK; - if (new_gw == old_gw || !ipv4_config.accept_redirects + if (!in_dev || new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw)) goto reject_redirect; - if ((new_gw^dev->pa_addr)&dev->pa_mask) - off_link = 1; - - if (!ipv4_config.rfc1620_redirects) { - if (off_link) + if (!IN_DEV_SHARED_MEDIA(in_dev)) { + if (ip_fib_check_default(new_gw, dev)) goto reject_redirect; - if (ipv4_config.secure_redirects && ip_fib_chk_default_gw(new_gw, dev)) + } else { + if (inet_addr_type(new_gw) != RTN_UNICAST) goto reject_redirect; } - fi = fib_lookup_info(new_gw, 0, 0, &loopback_dev, NULL); - if (fi == NULL || fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_NAT)) - goto reject_redirect; - for (i=0; i<2; i++) { - unsigned hash = rt_hash_code(daddr, skeys[i], tos); + for (k=0; k<2; k++) { + unsigned hash = rt_hash_code(daddr, skeys[i]^(ikeys[k]<<5), tos); - rthp=&rt_hash_table[hash]; + rthp=&rt_hash_table[hash]; - while ( (rth = *rthp) != NULL) { - struct rtable *rt; + while ( (rth = *rthp) != NULL) { + struct rtable *rt; - if (rth->key.dst != daddr || - rth->key.src != skeys[i] || - rth->key.tos != tos || - rth->key.dst_dev != NULL || - rth->key.src_dev != NULL) { - rthp = &rth->u.rt_next; - continue; - } - - if (rth->rt_dst != daddr || - rth->rt_src != saddr || - rth->rt_flags&RTF_REJECT || - rth->rt_gateway != old_gw || - rth->u.dst.dev != dev) - break; + if (rth->key.dst != daddr || + rth->key.src != skeys[i] || + rth->key.tos != tos || + rth->key.oif != ikeys[k] || + rth->key.iif != 0) { + rthp = &rth->u.rt_next; + continue; + } - rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops); - if (rt == NULL) - return; + if (rth->rt_dst != daddr || + rth->rt_src != saddr || + rth->u.dst.error || + rth->rt_gateway != old_gw || + rth->u.dst.dev != dev) + break; - /* - * Copy all the information. - */ - atomic_set(&rt->u.dst.refcnt, 1); - rt->u.dst.dev = dev; - rt->u.dst.input = rth->u.dst.input; - rt->u.dst.output = rth->u.dst.output; - rt->u.dst.pmtu = dev->mtu; - rt->u.dst.rtt = TCP_TIMEOUT_INIT; - rt->u.dst.window = 0; - atomic_set(&rt->u.dst.use, 1); - rt->u.dst.lastuse = jiffies; - - rt->rt_flags = rth->rt_flags|RTF_DYNAMIC|RTF_MODIFIED; - rt->rt_flags &= ~RTF_GATEWAY; - if (new_gw != daddr) - rt->rt_flags |= RTF_GATEWAY; - - rt->rt_src = rth->rt_src; - rt->rt_dst = rth->rt_dst; - rt->rt_src_dev = rth->rt_src_dev; - rt->rt_spec_dst = rth->rt_spec_dst; - rt->key = rth->key; - - /* But gateway is different ... */ - rt->rt_gateway = new_gw; - - if (off_link) { - if (fi->fib_dev != dev && - net_alias_main_dev(fi->fib_dev) == pdev) - rt->u.dst.dev = fi->fib_dev; - } + rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops); + if (rt == NULL) + return; + + /* + * Copy all the information. + */ + *rt = *rth; + atomic_set(&rt->u.dst.refcnt, 1); + atomic_set(&rt->u.dst.use, 1); + rt->u.dst.lastuse = jiffies; + rt->u.dst.neighbour = NULL; + rt->u.dst.hh = NULL; + + rt->rt_flags |= RTCF_REDIRECTED; + + /* Gateway is different ... */ + rt->rt_gateway = new_gw; + + if (!rt_ll_bind(rt)) { + ip_rt_put(rt); + rt_free(rt); + break; + } - if (ipv4_config.rfc1620_redirects && !rt_ll_bind(rt)) { + *rthp = rth->u.rt_next; + rt_free(rth); + rt = rt_intern_hash(hash, rt, ETH_P_IP); ip_rt_put(rt); - rt_free(rt); break; } - - *rthp = rth->u.rt_next; - rt_free(rth); - rt = rt_intern_hash(hash, rt, ETH_P_IP); - ip_rt_put(rt); - break; } } return; reject_redirect: +#ifdef CONFIG_IP_ROUTE_VERBOSE if (ipv4_config.log_martians && net_ratelimit()) printk(KERN_INFO "Redirect from %lX/%s to %lX ignored." "Path = %lX -> %lX, tos %02x\n", ntohl(old_gw), dev->name, ntohl(new_gw), ntohl(saddr), ntohl(daddr), tos); +#endif } @@ -585,7 +569,7 @@ return; start_bh_atomic(); - if ((rt = *rp) != NULL && (rt->rt_flags&(RTF_DYNAMIC|RTF_MODIFIED))) { + if ((rt = *rp) != NULL && (rt->rt_flags&RTCF_REDIRECTED)) { #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to %08x/%02x dropped\n", rt->rt_dst, rt->key.tos); #endif @@ -602,7 +586,7 @@ * 1. The first RT_REDIRECT_NUMBER redirects are sent * with exponential backoff, then we stop sending them at all, * assuming that the host ignores our redirects. - * 2. If we did not see a packets requiring redirects + * 2. If we did not see packets requiring redirects * during RT_REDIRECT_SILENCE, we assume that the host * forgot redirected route and start to send redirects again. * @@ -637,9 +621,12 @@ if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<errors)) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); rt->last_error = jiffies; - if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER && net_ratelimit()) - printk(KERN_WARNING "host %08x/%s ignores redirects for %08x to %08x.\n", - rt->rt_src, rt->rt_src_dev->name, rt->rt_dst, rt->rt_gateway); + ++rt->errors; +#ifdef CONFIG_IP_ROUTE_VERBOSE + if (ipv4_config.log_martians && rt->errors == RT_REDIRECT_NUMBER && net_ratelimit()) + printk(KERN_WARNING "host %08x/if%d ignores redirects for %08x to %08x.\n", + rt->rt_src, rt->rt_iif, rt->rt_dst, rt->rt_gateway); +#endif } } @@ -653,6 +640,9 @@ default: kfree_skb(skb, FREE_READ); return 0; + case EHOSTUNREACH: + code = ICMP_HOST_UNREACH; + break; case ENETUNREACH: code = ICMP_NET_UNREACH; break; @@ -668,37 +658,24 @@ return 0; } +/* + * The last two values are not from the RFC but + * are needed for AMPRnet AX.25 paths. + */ + +static unsigned short mtu_plateau[] = +{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; static __inline__ unsigned short guess_mtu(unsigned short old_mtu) { - if (old_mtu > 32000) - return 32000; - else if (old_mtu > 17914) - return 17914; - else if (old_mtu > 8166) - return 8166; - else if (old_mtu > 4352) - return 4352; - else if (old_mtu > 2002) - return 2002; - else if (old_mtu > 1492) - return 1492; - else if (old_mtu > 576) - return 576; - else if (old_mtu > 296) - return 296; - /* - * These two are not from the RFC but - * are needed for AMPRnet AX.25 paths. - */ - else if (old_mtu > 216) - return 216; - else if (old_mtu > 128) - return 128; + int i; + + for (i = 0; i < sizeof(mtu_plateau)/sizeof(mtu_plateau[0]); i++) + if (old_mtu > mtu_plateau[i]) + return mtu_plateau[i]; return 68; } - unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) { int i; @@ -721,8 +698,8 @@ rth->rt_dst == daddr && rth->rt_src == iph->saddr && rth->key.tos == tos && - !rth->key.src_dev && - !(rth->rt_flags&RTF_NOPMTUDISC)) { + rth->key.iif == 0 && + !(rth->rt_flags&RTCF_NOPMTUDISC)) { unsigned short mtu = new_mtu; if (new_mtu < 68 || new_mtu >= old_mtu) { @@ -770,177 +747,227 @@ return NULL; } -int -ip_check_mc(struct device *dev, u32 mc_addr) +static int ip_rt_bug(struct sk_buff *skb) { - struct ip_mc_list *ip_mc; + printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr, + skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?"); + kfree_skb(skb, FREE_WRITE); + return 0; +} - if (mc_addr==htonl(INADDR_ALLHOSTS_GROUP)) - return 1; +/* + We do not cache source address of outgoing interface, + because it is used only by IP RR, TS and SRR options, + so that it out of fast path. - for (ip_mc=dev->ip_mc_list; ip_mc; ip_mc=ip_mc->next) - if (ip_mc->multiaddr == mc_addr) - return 1; - return 0; + BTW remember: "addr" is allowed to be not aligned + in IP options! + */ + +void ip_rt_get_source(u8 *addr, struct rtable *rt) +{ + u32 src; + struct fib_result res; + + if (rt->key.iif == 0) { + memcpy(addr, &rt->rt_src, 4); + return; + } + if (fib_lookup(&rt->key, &res) == 0) { + src = FIB_RES_PREFSRC(res); + memcpy(addr, &src, 4); + return; + } + src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); + memcpy(addr, &src, 4); } -static int ip_rt_bug(struct sk_buff *skb) +static int +ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, + u8 tos, struct device *dev, int our) { - kfree_skb(skb, FREE_WRITE); - printk(KERN_DEBUG "ip_rt_bug: %08x -> %08x, %s\n", skb->nh.iph->saddr, - skb->nh.iph->daddr, skb->dev ? skb->dev->name : "?"); + unsigned hash; + struct rtable *rth; + u32 spec_dst; + struct in_device *in_dev = dev->ip_ptr; + + /* Primary sanity checks. */ + + if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) || + in_dev == NULL || skb->protocol != __constant_htons(ETH_P_IP)) + return -EINVAL; + + if (ZERONET(saddr)) { + if (!LOCAL_MCAST(daddr)) + return -EINVAL; + spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); + } else if (fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst) < 0) + return -EINVAL; + + rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops); + if (!rth) + return -ENOBUFS; + + rth->u.dst.output= ip_rt_bug; + + atomic_set(&rth->u.dst.use, 1); + rth->key.dst = daddr; + rth->rt_dst = daddr; + rth->key.tos = tos; + rth->key.src = saddr; + rth->rt_src = saddr; +#ifdef CONFIG_IP_ROUTE_NAT + rth->rt_dst_map = daddr; + rth->rt_src_map = saddr; +#endif + rth->rt_iif = + rth->key.iif = dev->ifindex; + rth->u.dst.dev = &loopback_dev; + rth->key.oif = 0; + rth->rt_gateway = daddr; + rth->rt_spec_dst= spec_dst; + rth->rt_type = RTN_MULTICAST; + rth->rt_flags = RTCF_MULTICAST; + if (our) { + rth->u.dst.input= ip_local_deliver; + rth->rt_flags |= RTCF_LOCAL; + } + +#ifdef CONFIG_IP_MROUTE + if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) + rth->u.dst.input = ip_mr_input; +#endif + + hash = rt_hash_code(daddr, saddr^(dev->ifindex<<5), tos); + skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0); return 0; } /* - * This function is called ONLY FROM NET BH. No locking! - * * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet * must have correct destination already attached by output routine. * * Such approach solves two big problems: - * 1. Not simplex devices (if they exist 8)) are handled properly. + * 1. Not simplex devices are handled properly. * 2. IP spoofing attempts are filtered with 100% of guarantee. */ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, - u8 tos, struct device *pdev) + u8 tos, struct device *dev) { - struct device * dev = pdev; - struct fib_info *fi = NULL; - struct fib_info *src_fi = NULL; + struct rt_key key; + struct fib_result res; + struct in_device *in_dev = dev->ip_ptr; + struct in_device *out_dev; unsigned flags = 0; - struct device *devout; struct rtable * rth; unsigned hash; - struct fib_result res; - u32 src_key = saddr; - u32 dst_key = daddr; - int err = -EINVAL; - int log = 0; + u32 spec_dst; + int err = -EINVAL; - hash = rt_hash_code(daddr, saddr^(unsigned long)pdev, tos); + /* + * IP on this device is disabled. + */ + + if (!in_dev) + return -EINVAL; + + key.dst = daddr; + key.src = saddr; + key.tos = tos; + key.iif = dev->ifindex; + key.oif = 0; + key.scope = RT_SCOPE_UNIVERSE; - /* Check for martians... */ + hash = rt_hash_code(daddr, saddr^(key.iif<<5), tos); + + /* Check for the most weird martians, which can be not detected + by fib_lookup. + */ if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) goto martian_source; - if (MULTICAST(daddr) || daddr == 0xFFFFFFFF) - goto mc_input; - /* Accept zero addresses only to limited broadcast/multicasts; - * I even do not know to fix it or not. + if (daddr == 0xFFFFFFFF) + goto brd_input; + + /* Accept zero addresses only to limited broadcast; + * I even do not know to fix it or not. Waiting for complains :-) */ if (ZERONET(saddr)) goto martian_source; + if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) goto martian_destination; /* - * Device is not yet initialized, accept all addresses as ours. + * Now we are ready to route packet. */ - if (ZERONET(dev->pa_addr)) - goto promisc_ip; - - /* - * Now we are able to route packet. - */ - if ((err = fib_lookup(&res, daddr, saddr, tos, pdev, NULL)) < 0) { - if (!IS_ROUTER) + if ((err = fib_lookup(&key, &res))) { + if (!IN_DEV_FORWARD(in_dev)) return -EINVAL; goto no_route; } - fi = res.f->fib_info; - flags = fi->fib_flags; - devout = fi->fib_dev; - - if (flags&RTF_NAT) { - daddr = htonl((ntohl(daddr)&((1<fib_gateway; - fi = fib_lookup_info(daddr, saddr, tos, pdev, NULL); - if (!fi || fi->fib_flags&(RTF_NAT|RTF_LOCAL|RTF_MULTICAST|RTF_BROADCAST)) - return -EINVAL; - devout = fi->fib_dev; - flags = fi->fib_flags|RTCF_NAT|RTF_NAT; - } - - switch (res.fr->cl_action) { - case RTP_NAT: - /* Packet is from translated source; remember it */ - saddr = (saddr&~res.fr->cl_srcmask)|res.fr->cl_srcmap; - flags |= RTCF_NAT; - break; - case RTP_MASQUERADE: - /* Packet is from masqueraded source; remember it */ - flags |= RTCF_MASQ; - break; - default: - } - log = res.fr->cl_flags&RTRF_LOG; +#ifdef CONFIG_IP_ROUTE_NAT + /* Policy is applied before mapping destination, + but rerouting after map should be made with old source. + */ - if (!(flags & RTF_LOCAL)) { - if (!IS_ROUTER || flags&RTF_NOFORWARD) - return -EINVAL; - } else { - fi = NULL; - devout = &loopback_dev; - if (flags&RTF_BROADCAST) - goto mc_input; + if (1) { + u32 src_map = saddr; + if (res.r) + src_map = fib_rules_policy(saddr, &res, &flags); + + if (res.type == RTN_NAT) { + key.dst = fib_rules_map_destination(daddr, &res); + if (fib_lookup(&key, &res) || res.type != RTN_UNICAST) + return -EINVAL; + flags |= RTCF_DNAT; + } + key.src = src_map; } - -#ifndef CONFIG_IP_LOCAL_RT_POLICY - if (flags&RTF_LOCAL) - src_fi = fib_lookup_info(src_key, 0, tos, &loopback_dev, NULL); - else #endif - if (fib_lookup(&res, src_key, daddr, tos, net_alias_main_dev(devout), NULL) == 0) { - src_fi = res.f->fib_info; - /* Destination is on masqueraded network: - * if it is real incoming frame, ip_forward will drop it. - */ - if (res.fr->cl_flags&RTRF_VALVE) - flags |= RTCF_VALVE; - } - if (src_fi) { - if (src_fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT)) + if (res.type == RTN_BROADCAST) + goto brd_input; + + if (res.type == RTN_LOCAL) { + spec_dst = daddr; + if (inet_addr_type(saddr) != RTN_UNICAST) goto martian_source; + goto local_input; + } - if (!(src_fi->fib_flags&RTF_GATEWAY)) - flags |= RTCF_DIRECTSRC; + if (!IN_DEV_FORWARD(in_dev)) + return -EINVAL; + if (res.type != RTN_UNICAST) + goto martian_destination; - if (net_alias_main_dev(src_fi->fib_dev) == pdev) - skb->dev = dev = src_fi->fib_dev; - else { - /* Route to packet source goes via - different interface; rfc1812 proposes - to drop them. - It is dangerous on not-stub/transit networks - because of path asymmetry. - */ - if (ipv4_config.rfc1812_filter >= 2) - goto martian_source; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (res.fi->fib_nhs > 1 && key.oif == 0) + fib_select_multipath(&key, &res); +#endif + out_dev = FIB_RES_DEV(res)->ip_ptr; - /* Weaker form of rfc1812 filtering. - If source is on directly connected network, - it can mean either local network configuration error - (the most probable case) or real IP spoofing attempt. - */ - if (ipv4_config.rfc1812_filter >= 1 && !(flags&RTCF_DIRECTSRC)) - goto martian_source; - } - } else if (ipv4_config.rfc1812_filter >= 1) + err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(res), dev, &spec_dst); + if (err < 0) goto martian_source; -make_route: + if (err) + flags |= RTCF_DIRECTSRC; + + if (out_dev == in_dev && err && !(flags&RTCF_NAT) && + (IN_DEV_SHARED_MEDIA(out_dev) + || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res)))) + flags |= RTCF_DOREDIRECT; + if (skb->protocol != __constant_htons(ETH_P_IP)) { - /* ARP request. Do not make route for invalid destination or - * if it is redirected. + /* Not IP (i.e. ARP). Do not make route for invalid + * destination or if it is redirected. */ - if (flags&(RTF_REJECT|RTF_BROADCAST|RTF_MULTICAST) || - skb->pkt_type == PACKET_OTHERHOST || - (devout == dev && !(flags&(RTF_LOCAL|RTCF_NAT)))) + if (out_dev == in_dev && flags&RTCF_DOREDIRECT) return -EINVAL; } @@ -948,147 +975,105 @@ if (!rth) return -ENOBUFS; - rth->u.dst.output= ip_rt_bug; - atomic_set(&rth->u.dst.use, 1); - rth->key.dst = dst_key; - rth->rt_dst = dst_key; - rth->rt_dst_map = daddr; + rth->key.dst = daddr; + rth->rt_dst = daddr; rth->key.tos = tos; - rth->key.src = src_key; - rth->rt_src = src_key; - rth->rt_src_map = saddr; - rth->rt_src_dev = dev; - rth->key.src_dev= pdev; - rth->u.dst.dev = devout; - rth->key.dst_dev= NULL; + rth->key.src = saddr; + rth->rt_src = saddr; rth->rt_gateway = daddr; - rth->rt_spec_dst= daddr; - - if (!(flags&RTF_REJECT)) { - if (flags&RTF_LOCAL) - rth->u.dst.input= ip_local_deliver; - if (!(flags&(RTF_NOFORWARD|RTF_BROADCAST))) { - if (flags&RTF_MULTICAST) { -#ifdef CONFIG_IP_MROUTE - if (!LOCAL_MCAST(daddr) && ipv4_config.multicast_route) { - rth->u.dst.input = ip_mr_input; - rth->u.dst.output = ip_output; - } +#ifdef CONFIG_IP_ROUTE_NAT + rth->rt_src_map = key.src; + rth->rt_dst_map = key.dst; + if (flags&RTCF_DNAT) + rth->rt_gateway = key.dst; #endif - } else if (!(flags&RTF_LOCAL)) { - rth->u.dst.input = ip_forward; - rth->u.dst.output = ip_output; - } - } - } else if (IS_ROUTER && !(flags&(RTF_MULTICAST|RTF_BROADCAST))) { - rth->u.dst.input= ip_error; - rth->u.dst.error= -err; - } - - if ((flags&(RTF_BROADCAST|RTF_MULTICAST)) || !(flags&RTF_LOCAL)) - rth->rt_spec_dst= dev->pa_addr; - - if (fi) { - rth->u.dst.pmtu = fi->fib_mtu; - rth->u.dst.window=fi->fib_window; - rth->u.dst.rtt = fi->fib_irtt; - if (flags & RTF_GATEWAY) - rth->rt_gateway = fi->fib_gateway; - } else { - rth->u.dst.pmtu = devout->mtu; - rth->u.dst.window=0; - rth->u.dst.rtt = TCP_TIMEOUT_INIT; - } - - if (!(flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTCF_NAT)) && - flags&RTCF_DIRECTSRC && - (devout == dev || (ipv4_config.rfc1620_redirects && - net_alias_main_dev(devout) == pdev))) - flags |= RTCF_DOREDIRECT; + rth->rt_iif = + rth->key.iif = dev->ifindex; + rth->u.dst.dev = out_dev->dev; + rth->key.oif = 0; + rth->rt_spec_dst= spec_dst; + + rth->u.dst.input = ip_forward; + rth->u.dst.output = ip_output; + + rth->u.dst.pmtu = res.fi->fib_mtu ? : out_dev->dev->mtu; + rth->u.dst.window=res.fi->fib_window ? : 0; + rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT; + if (FIB_RES_GW(res) && FIB_RES_NH(res).nh_scope == RT_SCOPE_LINK) + rth->rt_gateway = FIB_RES_GW(res); rth->rt_flags = flags; + rth->rt_type = res.type; - if (log) - printk(KERN_INFO "installing route %08lX -> %08lX\n", ntohl(rth->rt_src), ntohl(rth->rt_dst)); - - if (flags&(RTF_LOCAL|RTF_MULTICAST|RTF_BROADCAST|RTF_REJECT)) { - skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0); - return 0; - } - skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, __constant_ntohs(skb->protocol)); + skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, ntohs(skb->protocol)); return 0; -mc_input: +brd_input: if (skb->protocol != __constant_htons(ETH_P_IP)) return -EINVAL; if (ZERONET(saddr)) { - if (!ipv4_config.bootp_agent) - goto martian_source; - flags |= RTF_NOFORWARD|RTF_LOCAL; + spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else { - src_fi = fib_lookup_info(saddr, 0, tos, &loopback_dev, NULL); - if (!src_fi) + err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst); + if (err < 0) goto martian_source; - - if (src_fi->fib_flags&(RTF_LOCAL|RTF_BROADCAST|RTF_MULTICAST|RTF_NAT)) - goto martian_source; - - if (!(src_fi->fib_flags&RTF_GATEWAY)) + if (err) flags |= RTCF_DIRECTSRC; - - if (!MULTICAST(daddr) || !ipv4_config.multicast_route || - LOCAL_MCAST(daddr)) { - if (net_alias_main_dev(src_fi->fib_dev) == pdev) { - skb->dev = dev = src_fi->fib_dev; - } else { - /* Fascist not-unicast filtering 8) */ - goto martian_source; - } - } } + flags |= RTCF_BROADCAST; - if (!MULTICAST(daddr)) { - flags |= RTF_LOCAL|RTF_BROADCAST|RTF_NOFORWARD; - devout = dev; - goto make_route; - } - - flags |= RTF_MULTICAST|RTF_LOCAL; +local_input: + rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops); + if (!rth) + return -ENOBUFS; - if (ip_check_mc(dev, daddr) == 0) { - flags &= ~RTF_LOCAL; + rth->u.dst.output= ip_rt_bug; - if (!ipv4_config.multicast_route || !(dev->flags&IFF_ALLMULTI)) - goto no_route; + atomic_set(&rth->u.dst.use, 1); + rth->key.dst = daddr; + rth->rt_dst = daddr; + rth->key.tos = tos; + rth->key.src = saddr; + rth->rt_src = saddr; +#ifdef CONFIG_IP_ROUTE_NAT + rth->rt_dst_map = key.dst; + rth->rt_src_map = key.src; +#endif + rth->rt_iif = + rth->key.iif = dev->ifindex; + rth->u.dst.dev = &loopback_dev; + rth->key.oif = 0; + rth->rt_gateway = daddr; + rth->rt_spec_dst= spec_dst; + rth->u.dst.input= ip_local_deliver; + if (res.type == RTN_UNREACHABLE) { + rth->u.dst.input= ip_error; + rth->u.dst.error= err; } - devout = dev; - goto make_route; - -promisc_ip: - flags |= RTF_LOCAL|RTF_NOFORWARD; - if (MULTICAST(daddr)) - flags |= RTF_MULTICAST; - else - flags |= RTF_BROADCAST; - devout = dev; - goto make_route; + rth->rt_flags = flags|RTCF_LOCAL; + rth->rt_type = res.type; + skb->dst = (struct dst_entry*)rt_intern_hash(hash, rth, 0); + return 0; no_route: - flags |= RTF_REJECT; - devout = dev; - goto make_route; + spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); + res.type = RTN_UNREACHABLE; + goto local_input; /* * Do not cache martian addresses: they should be logged (RFC1812) */ martian_destination: +#ifdef CONFIG_IP_ROUTE_VERBOSE if (ipv4_config.log_martians && net_ratelimit()) printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name); +#endif return -EINVAL; martian_source: +#ifdef CONFIG_IP_ROUTE_VERBOSE if (ipv4_config.log_martians && net_ratelimit()) { /* * RFC1812 recommenadtion, if source is martian, @@ -1104,6 +1089,7 @@ printk("\n"); } } +#endif return -EINVAL; } @@ -1112,224 +1098,298 @@ { struct rtable * rth; unsigned hash; - - if (skb->dst) - return 0; - -#if RT_CACHE_DEBUG >= 1 - if (dev->flags & IFF_LOOPBACK) { - printk(KERN_DEBUG "ip_route_input: bug: packet is looped back\n"); - return -EINVAL; - } - if (net_alias_main_dev(dev) != dev) - printk(KERN_DEBUG "ip_route_input: bug: packet is received on alias %s\n", dev->name); -#endif + int iif = dev->ifindex; tos &= IPTOS_TOS_MASK; - hash = rt_hash_code(daddr, saddr^(unsigned long)dev, tos); - skb->dev = dev; + hash = rt_hash_code(daddr, saddr^(iif<<5), tos); for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) { if (rth->key.dst == daddr && rth->key.src == saddr && - rth->key.src_dev == dev && - rth->key.dst_dev == NULL && + rth->key.iif == iif && + rth->key.oif == 0 && rth->key.tos == tos) { rth->u.dst.lastuse = jiffies; atomic_inc(&rth->u.dst.use); atomic_inc(&rth->u.dst.refcnt); skb->dst = (struct dst_entry*)rth; - skb->dev = rth->rt_src_dev; return 0; } } + + /* Multicast recognition logic is moved from route cache to here. + The problem was that too many ethernet cards have broken/missing + hardware multicast filters :-( As result the host on multicasting + network acquires a lot of useless route cache entries, sort of + SDR messages from all the world. Now we try to get rid of them. + Really, provided software IP multicast filter is organized + reasonably (at least, hashed), it does not result in a slowdown + comparing with route cache reject entries. + Note, that multicast routers are not affected, because + route cache entry is created eventually. + */ + if (MULTICAST(daddr)) { + int our = ip_check_mc(dev, daddr); + if (!our +#ifdef CONFIG_IP_MROUTE + && (LOCAL_MCAST(daddr) || !dev->ip_ptr || + !IN_DEV_MFORWARD((struct in_device*)dev->ip_ptr)) +#endif + ) return -EINVAL; + return ip_route_input_mc(skb, daddr, saddr, tos, dev, our); + } return ip_route_input_slow(skb, daddr, saddr, tos, dev); } - /* * Major route resolver routine. */ -int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, - struct device *dev_out) +int ip_route_output_slow(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif) { - u32 src_key = saddr; - u32 dst_key = daddr; - u32 dst_map; - struct device *dst_dev_key = dev_out; + struct rt_key key; + struct fib_result res; unsigned flags = 0; - struct fib_info *fi = NULL; struct rtable *rth; -#ifdef CONFIG_IP_LOCAL_RT_POLICY - struct fib_result res; -#endif + struct device *dev_out = NULL; unsigned hash; tos &= IPTOS_TOS_MASK|1; + key.dst = daddr; + key.src = saddr; + key.tos = tos&IPTOS_TOS_MASK; + key.iif = loopback_dev.ifindex; + key.oif = oif; + key.scope = (tos&1) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + res.fi = NULL; if (saddr) { - if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr) || - __ip_chk_addr(saddr) != IS_MYADDR) + if (MULTICAST(saddr) || BADCLASS(saddr) || ZERONET(saddr)) return -EINVAL; - if (dev_out == NULL && (MULTICAST(daddr) || daddr == 0xFFFFFFFF)) - dev_out = ip_dev_find(saddr, NULL); + + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(saddr); + if (dev_out == NULL) + return -EINVAL; + + /* I removed check for oif == dev_out->oif here. + It was wrong by three reasons: + 1. ip_dev_find(saddr) can return wrong iface, if saddr is + assigned to multiple interfaces. + 2. Moreover, we are allowed to send packets with saddr + of another iface. --ANK + */ + + if (oif == 0 && (MULTICAST(daddr) || daddr == 0xFFFFFFFF)) { + /* Special hack: user can direct multicasts + and limited broadcast via necessary interface + without fiddling with IP_MULTICAST_IF or IP_TXINFO. + This hack is not just for fun, it allows + vic,vat and friends to work. + They bind socket to loopback, set ttl to zero + and expect that it will work. + From the viewpoint of routing cache they are broken, + because we are not allowed to build multicast path + with loopback source addr (look, routing cache + cannot know, that ttl is zero, so that packet + will not leave this host and route is valid). + Luckily, this hack is good workaround. + */ + + key.oif = dev_out->ifindex; + goto make_route; + } + dev_out = NULL; } - if (!daddr) - daddr = saddr; + if (oif) { + dev_out = dev_get_by_index(oif); + if (dev_out == NULL) + return -ENODEV; + if (dev_out->ip_ptr == NULL) + return -ENODEV; /* Wrong error code */ - if (dev_out) { - if (!saddr) { - saddr = dev_out->pa_addr; - if (!daddr) - daddr = saddr; + if (LOCAL_MCAST(daddr) || daddr == 0xFFFFFFFF) { + key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); + goto make_route; } - dst_map = daddr; - if (MULTICAST(daddr) || daddr == 0xFFFFFFFF) + if (MULTICAST(daddr)) { + key.src = inet_select_addr(dev_out, 0, key.scope); goto make_route; + } + if (!daddr) + key.src = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); + } + + if (!key.dst) { + key.dst = key.src; + if (!key.dst) + key.dst = key.src = htonl(INADDR_LOOPBACK); + dev_out = &loopback_dev; + key.oif = loopback_dev.ifindex; + flags |= RTCF_LOCAL; + goto make_route; } - if (!daddr) - daddr = htonl(INADDR_LOOPBACK); + if (fib_lookup(&key, &res)) { + res.fi = NULL; + if (oif) { + /* Apparently, routing tables are wrong. Assume, + that the destination is on link. + + WHY? DW. + Because we are allowed to send to iface + even if it has NO routes and NO assigned + addresses. When oif is specified, routing + tables are looked up with only one purpose: + to catch if destination is gatewayed, rather than + direct. Moreover, if MSG_DONTROUTE is set, + we send packet, no matter of routing tables + of ifaddr state. --ANK -#ifdef CONFIG_IP_LOCAL_RT_POLICY - if (fib_lookup(&res, daddr, saddr, tos, &loopback_dev, dev_out)) + + We could make it even if oif is unknown, + likely IPv6, but we do not. + */ + + printk(KERN_DEBUG "Dest not on link. Forcing...\n"); + if (key.src == 0) + key.src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); + goto make_route; + } return -ENETUNREACH; - fi = res.f->fib_info; - dst_map = daddr; + } - if (fi->fib_flags&RTF_NAT) + if (res.type == RTN_NAT) return -EINVAL; - if (!saddr) { - saddr = fi->fib_dev->pa_addr; + if (!key.src) { + key.src = FIB_RES_PREFSRC(res); + +#ifdef CONFIG_IP_MULTIPLE_TABLES /* * "Stabilization" of route. * This step is necessary, if locally originated packets - * are subjected to source routing, else we could get + * are subjected to policy routing, otherwise we could get * route flapping. */ - fi = fib_lookup_info(dst_map, saddr, tos, &loopback_dev, dev_out); - if (!fi) + if (fib_lookup(&key, &res)) return -ENETUNREACH; +#endif } -#else - fi = fib_lookup_info(daddr, 0, tos, &loopback_dev, dev_out); - if (!fi) - return -ENETUNREACH; - - if (fi->fib_flags&RTF_NAT) - return -EINVAL; - dst_map = daddr; - if (!saddr) - saddr = fi->fib_dev->pa_addr; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (res.fi->fib_nhs > 1 && key.oif == 0) + fib_select_multipath(&key, &res); #endif - flags |= fi->fib_flags; - dev_out = fi->fib_dev; + dev_out = FIB_RES_DEV(res); - if (RT_LOCALADDR(flags)) { + if (res.type == RTN_LOCAL) { dev_out = &loopback_dev; - fi = NULL; + key.oif = dev_out->ifindex; + res.fi = NULL; + flags |= RTCF_LOCAL; } - if (dst_dev_key && dev_out != dst_dev_key) - return -EINVAL; + key.oif = dev_out->ifindex; make_route: - if (LOOPBACK(saddr) && !(dev_out->flags&IFF_LOOPBACK)) { - printk(KERN_DEBUG "this guy talks to %08x from loopback\n", daddr); + if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK)) { + printk(KERN_DEBUG "this guy talks to %08x from loopback\n", key.dst); return -EINVAL; } - if (daddr == 0xFFFFFFFF) - flags |= RTF_BROADCAST; - else if (MULTICAST(daddr)) - flags |= RTF_MULTICAST; - else if (BADCLASS(daddr) || ZERONET(daddr)) + if (key.dst == 0xFFFFFFFF) + res.type = RTN_BROADCAST; + else if (MULTICAST(key.dst)) + res.type = RTN_MULTICAST; + else if (BADCLASS(key.dst) || ZERONET(key.dst)) return -EINVAL; - if (flags&RTF_BROADCAST && (dev_out->flags&IFF_LOOPBACK || - !(dev_out->flags&IFF_BROADCAST))) - flags &= ~RTF_LOCAL; - else if (flags&RTF_MULTICAST) { + if (res.type == RTN_BROADCAST) { + flags |= RTCF_BROADCAST; + if (!(dev_out->flags&IFF_LOOPBACK) && dev_out->flags&IFF_BROADCAST) + flags |= RTCF_LOCAL; + } else if (res.type == RTN_MULTICAST) { + flags |= RTCF_MULTICAST; if (ip_check_mc(dev_out, daddr)) - flags |= RTF_LOCAL; + flags |= RTCF_LOCAL; } - + rth = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops); if (!rth) return -ENOBUFS; atomic_set(&rth->u.dst.use, 1); - rth->key.dst = dst_key; + rth->key.dst = daddr; rth->key.tos = tos; - rth->key.src = src_key; - rth->key.src_dev= NULL; - rth->key.dst_dev= dst_dev_key; - rth->rt_dst = daddr; - rth->rt_dst_map = dst_map; - rth->rt_src = saddr; - rth->rt_src_map = saddr; - rth->rt_src_dev = dev_out; + rth->key.src = saddr; + rth->key.iif = 0; + rth->key.oif = oif; + rth->rt_dst = key.dst; + rth->rt_src = key.src; +#ifdef CONFIG_IP_ROUTE_NAT + rth->rt_dst_map = key.dst; + rth->rt_src_map = key.src; +#endif + rth->rt_iif = dev_out->ifindex; rth->u.dst.dev = dev_out; - rth->rt_gateway = dst_map; - rth->rt_spec_dst= dev_out->pa_addr; + rth->rt_gateway = key.dst; + rth->rt_spec_dst= key.src; rth->u.dst.output=ip_output; - if (flags&RTF_LOCAL) { + if (flags&RTCF_LOCAL) { rth->u.dst.input = ip_local_deliver; - rth->rt_spec_dst = daddr; + rth->rt_spec_dst = key.dst; } - if (flags&(RTF_BROADCAST|RTF_MULTICAST)) { - rth->rt_spec_dst = dev_out->pa_addr; - flags &= ~RTF_GATEWAY; - if (flags&RTF_LOCAL) + if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) { + rth->rt_spec_dst = key.src; + if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK)) rth->u.dst.output = ip_mc_output; - if (flags&RTF_MULTICAST) { - if (dev_out->flags&IFF_ALLMULTI) - rth->u.dst.output = ip_mc_output; #ifdef CONFIG_IP_MROUTE - if (ipv4_config.multicast_route && !LOCAL_MCAST(daddr)) + if (res.type == RTN_MULTICAST && dev_out->ip_ptr) { + struct in_device *in_dev = dev_out->ip_ptr; + if (IN_DEV_MFORWARD(in_dev) && !LOCAL_MCAST(daddr)) { rth->u.dst.input = ip_mr_input; -#endif + rth->u.dst.output = ip_mc_output; + } } +#endif } - if (fi) { - if (flags&RTF_GATEWAY) - rth->rt_gateway = fi->fib_gateway; - rth->u.dst.pmtu = fi->fib_mtu; - rth->u.dst.window=fi->fib_window; - rth->u.dst.rtt = fi->fib_irtt; + if (res.fi) { + if (FIB_RES_GW(res) && FIB_RES_NH(res).nh_scope == RT_SCOPE_LINK) + rth->rt_gateway = FIB_RES_GW(res); + rth->u.dst.pmtu = res.fi->fib_mtu ? : dev_out->mtu; + rth->u.dst.window=res.fi->fib_window ? : 0; + rth->u.dst.rtt = res.fi->fib_rtt ? : TCP_TIMEOUT_INIT; } else { rth->u.dst.pmtu = dev_out->mtu; rth->u.dst.window=0; rth->u.dst.rtt = TCP_TIMEOUT_INIT; } rth->rt_flags = flags; - hash = rt_hash_code(dst_key, dst_dev_key ? src_key^(dst_dev_key->ifindex<<5) : src_key, tos); + rth->rt_type = res.type; + hash = rt_hash_code(daddr, saddr^(oif<<5), tos); *rp = rt_intern_hash(hash, rth, ETH_P_IP); return 0; } -int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, struct device *dev_out) +int ip_route_output(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int oif) { unsigned hash; struct rtable *rth; - hash = rt_hash_code(daddr, dev_out ? saddr^(dev_out->ifindex<<5) - : saddr, tos); + hash = rt_hash_code(daddr, saddr^(oif<<5), tos); start_bh_atomic(); for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) { if (rth->key.dst == daddr && rth->key.src == saddr && - rth->key.src_dev == NULL && - rth->key.dst_dev == dev_out && + rth->key.iif == 0 && + rth->key.oif == oif && rth->key.tos == tos) { rth->u.dst.lastuse = jiffies; atomic_inc(&rth->u.dst.use); @@ -1341,48 +1401,126 @@ } end_bh_atomic(); - return ip_route_output_slow(rp, daddr, saddr, tos, dev_out); + return ip_route_output_slow(rp, daddr, saddr, tos, oif); } -int ip_route_output_dev(struct rtable **rp, u32 daddr, u32 saddr, u8 tos, int ifindex) +#ifdef CONFIG_RTNETLINK + +int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - unsigned hash; - struct rtable *rth; - struct device *dev_out; + struct kern_rta *rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct rtable *rt = NULL; + u32 dst = 0; + u32 src = 0; + int err; + struct sk_buff *skb; + u8 *o; - hash = rt_hash_code(daddr, saddr^(ifindex<<5), tos); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + return -ENOBUFS; - start_bh_atomic(); - for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) { - if (rth->key.dst == daddr && - rth->key.src == saddr && - rth->key.src_dev == NULL && - rth->key.tos == tos && - rth->key.dst_dev && - rth->key.dst_dev->ifindex == ifindex) { - rth->u.dst.lastuse = jiffies; - atomic_inc(&rth->u.dst.use); - atomic_inc(&rth->u.dst.refcnt); - end_bh_atomic(); - *rp = rth; - return 0; + /* Reserve room for dummy headers, this skb can pass + through good chunk of routing engine. + */ + skb->mac.raw = skb->data; + skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); + + if (rta->rta_dst) + memcpy(&dst, rta->rta_dst, 4); + if (rta->rta_src) + memcpy(&src, rta->rta_src, 4); + + if (rta->rta_iif) { + struct device *dev; + dev = dev_get_by_index(*rta->rta_iif); + if (!dev) + return -ENODEV; + skb->protocol = __constant_htons(ETH_P_IP); + skb->dev = dev; + start_bh_atomic(); + err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); + end_bh_atomic(); + rt = (struct rtable*)skb->dst; + if (!err && rt->u.dst.error) + err = rt->u.dst.error; + } else { + err = ip_route_output(&rt, dst, src, rtm->rtm_tos, + rta->rta_oif ? *rta->rta_oif : 0); + } + if (err) { + kfree_skb(skb, FREE_WRITE); + return err; + } + + skb->dst = &rt->u.dst; + if (rtm->rtm_flags & RTM_F_NOTIFY) + rt->rt_flags |= RTCF_NOTIFY; + + nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + RTM_NEWROUTE, sizeof(*rtm)); + rtm = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + rtm->rtm_family = AF_INET; + rtm->rtm_dst_len = 32; + rtm->rtm_src_len = 32; + rtm->rtm_tos = rt->key.tos; + rtm->rtm_table = RT_TABLE_MAIN; + rtm->rtm_type = rt->rt_type; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_protocol = RTPROT_UNSPEC; + rtm->rtm_flags = (rt->rt_flags&~0xFFFF) | RTM_F_CLONED; + rtm->rtm_nhs = 0; + + o = skb->tail; + RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); + RTA_PUT(skb, RTA_SRC, 4, &rt->rt_src); + if (rt->u.dst.dev) + RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + if (rt->rt_dst != rt->rt_gateway) + RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); + RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu); + RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window); + RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt); + RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); + rtm->rtm_optlen = skb->tail - o; + if (rta->rta_iif) { +#ifdef CONFIG_IP_MROUTE + if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_config.multicast_route) { + NETLINK_CB(skb).pid = NETLINK_CB(in_skb).pid; + err = ipmr_get_route(skb, rtm); + if (err <= 0) + return err; + } else +#endif + { + RTA_PUT(skb, RTA_IIF, 4, rta->rta_iif); + rtm->rtm_optlen = skb->tail - o; } } - end_bh_atomic(); + nlh->nlmsg_len = skb->tail - (u8*)nlh; + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err < 0) + return err; + return 0; - dev_out = dev_get_by_index(ifindex); - if (!dev_out) - return -ENODEV; - return ip_route_output_slow(rp, daddr, saddr, tos, dev_out); +nlmsg_failure: +rtattr_failure: + kfree_skb(skb, FREE_WRITE); + return -EMSGSIZE; } -void ip_rt_multicast_event(struct device *dev) +#endif /* CONFIG_RTNETLINK */ + +void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(0); + rt_cache_flush(1*HZ); } __initfunc(void ip_rt_init(void)) { + devinet_init(); ip_fib_init(); #ifdef CONFIG_PROC_FS diff -u --recursive --new-file v2.1.67/linux/net/ipv4/syncookies.c linux/net/ipv4/syncookies.c --- v2.1.67/linux/net/ipv4/syncookies.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/syncookies.c Sun Nov 30 14:00:39 1997 @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * $Id: syncookies.c,v 1.2 1997/08/22 19:15:08 freitag Exp $ + * $Id: syncookies.c,v 1.3 1997/09/16 17:16:21 freitag Exp $ * * Missing: IPv6 support. * Some counter so that the Administrator can see when the machine @@ -200,9 +200,11 @@ * no easy way to do this. */ if (ip_route_output(&rt, - opt && opt->srr ? opt->faddr : - req->af.v4_req.rmt_addr,req->af.v4_req.loc_addr, - sk->ip_tos, NULL)) { + opt && + opt->srr ? opt->faddr : req->af.v4_req.rmt_addr, + req->af.v4_req.loc_addr, + sk->ip_tos, + 0)) { tcp_openreq_free(req); return NULL; } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/sysctl_net_ipv4.c linux/net/ipv4/sysctl_net_ipv4.c --- v2.1.67/linux/net/ipv4/sysctl_net_ipv4.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/sysctl_net_ipv4.c Sun Nov 30 14:00:39 1997 @@ -1,6 +1,8 @@ /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * + * $Id: sysctl_net_ipv4.c,v 1.21 1997/10/17 01:21:18 davem Exp $ + * * Begun April 1, 1996, Mike Shaver. * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] */ @@ -36,16 +38,15 @@ extern int sysctl_arp_confirm_timeout; extern int sysctl_arp_max_pings; +/* From icmp.c */ +extern int sysctl_icmp_echo_ignore_all; +extern int sysctl_icmp_echo_ignore_broadcasts; + /* From ip_fragment.c */ extern int sysctl_ipfrag_low_thresh; extern int sysctl_ipfrag_high_thresh; extern int sysctl_ipfrag_time; -/* From igmp.c */ -extern int sysctl_igmp_max_host_report_delay; -extern int sysctl_igmp_timer_scale; -extern int sysctl_igmp_age_threshold; - extern int sysctl_tcp_cong_avoidance; extern int sysctl_tcp_hoe_retransmits; extern int sysctl_tcp_sack; @@ -65,6 +66,13 @@ extern int sysctl_tcp_syn_taildrop; extern int sysctl_max_syn_backlog; +/* From icmp.c */ +extern int sysctl_icmp_sourcequench_time; +extern int sysctl_icmp_destunreach_time; +extern int sysctl_icmp_timeexceed_time; +extern int sysctl_icmp_paramprob_time; +extern int sysctl_icmp_echoreply_time; + int tcp_retr1_max = 255; extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp, @@ -77,6 +85,7 @@ struct ipv4_config ipv4_def_router_config = { 0, 1, 1, 1, 1, 1, 1, }; struct ipv4_config ipv4_def_host_config = { 1, 1, 1, 0, }; +static int ipv4_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp, void *buffer, size_t *lenp) { @@ -95,6 +104,15 @@ return ret; } +static +int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, + void *buffer, size_t *lenp) +{ + if (write) + rt_cache_flush(0); + return 0; +} + ctl_table ipv4_table[] = { {NET_IPV4_ARP_RES_TIME, "arp_res_time", &sysctl_arp_res_time, sizeof(int), 0644, NULL, &proc_dointvec}, @@ -147,17 +165,17 @@ {NET_IPV4_SOURCE_ROUTE, "ip_source_route", &ipv4_config.source_route, sizeof(int), 0644, NULL, &proc_dointvec}, - {NET_IPV4_ADDRMASK_AGENT, "ip_addrmask_agent", - &ipv4_config.addrmask_agent, sizeof(int), 0644, NULL, + {NET_IPV4_SEND_REDIRECTS, "ip_send_redirects", + &ipv4_config.send_redirects, sizeof(int), 0644, NULL, &proc_dointvec}, - {NET_IPV4_BOOTP_AGENT, "ip_bootp_agent", - &ipv4_config.bootp_agent, sizeof(int), 0644, NULL, + {NET_IPV4_AUTOCONFIG, "ip_autoconfig", + &ipv4_config.autoconfig, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_BOOTP_RELAY, "ip_bootp_relay", &ipv4_config.bootp_relay, sizeof(int), 0644, NULL, &proc_dointvec}, - {NET_IPV4_FIB_MODEL, "ip_fib_model", - &ipv4_config.fib_model, sizeof(int), 0644, NULL, + {NET_IPV4_PROXY_ARP, "ip_proxy_arp", + &ipv4_config.proxy_arp, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc", &ipv4_config.no_pmtu_disc, sizeof(int), 0644, NULL, @@ -171,6 +189,9 @@ {NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects", &ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_RTCACHE_FLUSH, "ip_rtcache_flush", + NULL, sizeof(int), 0644, NULL, + &ipv4_sysctl_rtcache_flush}, {NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries", &sysctl_tcp_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh", @@ -197,17 +218,6 @@ {NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout", &sysctl_tcp_fin_timeout, sizeof(int), 0644, NULL, &proc_dointvec_jiffies}, - {NET_IPV4_IGMP_MAX_HOST_REPORT_DELAY, "igmp_max_host_report_delay", - &sysctl_igmp_max_host_report_delay, sizeof(int), 0644, NULL, - &proc_dointvec}, - {NET_IPV4_IGMP_TIMER_SCALE, "igmp_timer_scale", - &sysctl_igmp_timer_scale, sizeof(int), 0644, NULL, &proc_dointvec}, -#if 0 - /* This one shouldn't be exposed to the user (too implementation - specific): */ - {NET_IPV4_IGMP_AGE_THRESHOLD, "igmp_age_threshold", - &sysctl_igmp_age_threshold, sizeof(int), 0644, NULL, &proc_dointvec}, -#endif #ifdef CONFIG_SYN_COOKIES {NET_TCP_SYNCOOKIES, "tcp_syncookies", &sysctl_tcp_syncookies, sizeof(int), 0644, NULL, &proc_dointvec}, @@ -218,6 +228,25 @@ sizeof(int), 0644, NULL, &proc_dointvec}, {NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog", &sysctl_max_syn_backlog, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range", + &sysctl_local_port_range, sizeof(sysctl_local_port_range), 0644, + NULL, &proc_dointvec}, + {NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all", + &sysctl_icmp_echo_ignore_all, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts", + &sysctl_icmp_echo_ignore_broadcasts, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV4_ICMP_SOURCEQUENCH_RATE, "icmp_sourcequench_rate", + &sysctl_icmp_sourcequench_time, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_ICMP_DESTUNREACH_RATE, "icmp_destunreach_rate", + &sysctl_icmp_destunreach_time, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_ICMP_TIMEEXCEED_RATE, "icmp_timeexceed_rate", + &sysctl_icmp_timeexceed_time, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_ICMP_PARAMPROB_RATE, "icmp_paramprob_rate", + &sysctl_icmp_paramprob_time, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_ICMP_ECHOREPLY_RATE, "icmp_echoreply_rate", + &sysctl_icmp_echoreply_time, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff -u --recursive --new-file v2.1.67/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c --- v2.1.67/linux/net/ipv4/tcp.c Wed Sep 24 20:05:48 1997 +++ linux/net/ipv4/tcp.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.71 1997/09/06 05:11:45 davem Exp $ + * Version: $Id: tcp.c,v 1.75 1997/10/16 02:57:34 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -437,8 +437,8 @@ struct open_request *prev = (struct open_request *)&tp->syn_wait_queue; while(req) { if (req->sk && - (req->sk->state == TCP_ESTABLISHED || - req->sk->state >= TCP_FIN_WAIT1)) + ((1 << req->sk->state) & + ~(TCPF_SYN_SENT|TCPF_SYN_RECV))) break; prev = req; req = req->dl_next; @@ -603,7 +603,7 @@ if (sk->err) mask = POLLERR; /* Connected? */ - if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) { + if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) { if (sk->shutdown & RCV_SHUTDOWN) mask |= POLLHUP; @@ -653,7 +653,8 @@ { unsigned long amount; - if (sk->state == TCP_LISTEN) return(-EINVAL); + if (sk->state == TCP_LISTEN) + return(-EINVAL); amount = sock_wspace(sk); return put_user(amount, (int *)arg); } @@ -701,7 +702,8 @@ { release_sock(sk); cli(); - if (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT && sk->err == 0) + if (((1 << sk->state) & ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT)) && + sk->err == 0) interruptible_sleep_on(sk->sleep); sti(); lock_sock(sk); @@ -779,11 +781,11 @@ struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); /* Wait for a connection to finish. */ - while (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) { + while ((1 << sk->state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { if (sk->err) return sock_error(sk); - if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) { + if ((1 << sk->state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (sk->keepopen) send_sig(SIGPIPE, current, 0); return -EPIPE; @@ -982,7 +984,7 @@ /* If we're closed, don't send an ack, or we'll get a RST * from the closed destination. */ - if ((sk->state == TCP_CLOSE) || (sk->state == TCP_TIME_WAIT)) + if ((1 << sk->state) & (TCPF_CLOSE|TCPF_TIME_WAIT)) return; tcp_send_ack(sk); @@ -1400,10 +1402,8 @@ return; /* If we've already sent a FIN, or it's a closed state, skip this. */ - if (sk->state == TCP_ESTABLISHED || - sk->state == TCP_SYN_SENT || - sk->state == TCP_SYN_RECV || - sk->state == TCP_CLOSE_WAIT) { + if ((1 << sk->state) & + (TCPF_ESTABLISHED|TCPF_SYN_SENT|TCPF_SYN_RECV|TCPF_CLOSE_WAIT)) { lock_sock(sk); /* Flag that the sender has shutdown. */ @@ -1424,9 +1424,7 @@ static inline int closing(struct sock * sk) { - return ((1 << sk->state) & ((1 << TCP_FIN_WAIT1)| - (1 << TCP_CLOSING)| - (1 << TCP_LAST_ACK))); + return ((1 << sk->state) & (TCPF_FIN_WAIT1|TCPF_CLOSING|TCPF_LAST_ACK)); } diff -u --recursive --new-file v2.1.67/linux/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c --- v2.1.67/linux/net/ipv4/tcp_input.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/tcp_input.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_input.c,v 1.56 1997/08/31 08:24:54 freitag Exp $ + * Version: $Id: tcp_input.c,v 1.64 1997/10/30 23:52:24 davem Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -64,6 +64,8 @@ #define SYNC_INIT 1 #endif +extern int sysctl_tcp_fin_timeout; + int sysctl_tcp_cong_avoidance; int sysctl_tcp_hoe_retransmits; int sysctl_tcp_sack; @@ -249,7 +251,7 @@ * really. */ -static int tcp_reset(struct sock *sk, struct sk_buff *skb) +static void tcp_reset(struct sock *sk, struct sk_buff *skb) { sk->zapped = 1; @@ -285,8 +287,6 @@ #endif if (!sk->dead) sk->state_change(sk); - - return(0); } /* @@ -345,15 +345,16 @@ /* Cheaper to set again then to * test syn. Optimize this? */ - if (sysctl_tcp_timestamps && !no_fancy) + if (sysctl_tcp_timestamps && !no_fancy) { tp->tstamp_ok = 1; - tp->saw_tstamp = 1; - tp->rcv_tsval = ntohl(*(__u32 *)ptr); - tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4)); + tp->saw_tstamp = 1; + tp->rcv_tsval = ntohl(*(__u32 *)ptr); + tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4)); + } } break; case TCPOPT_SACK: - if (no_fancy) + if (no_fancy || !sysctl_tcp_sack) break; tp->sacks = (opsize-2)>>3; if (tp->sacks<<3 == opsize-2) { @@ -486,8 +487,10 @@ #define FLAG_WIN_UPDATE 0x02 #define FLAG_DATA_ACKED 0x04 -static __inline__ void clear_fast_retransmit(struct sock *sk) { +static __inline__ void clear_fast_retransmit(struct sock *sk) +{ struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp); + if (tp->dup_acks > 3) { tp->retrans_head = NULL; tp->snd_cwnd = max(tp->snd_ssthresh, 1); @@ -857,8 +860,7 @@ tcp_ack_probe(sk, ack); /* See if we can take anything off of the retransmit queue. */ - if (tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt)) - flag |= FLAG_DATA_ACKED; + flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt); /* If we have a timestamp, we always do rtt estimates. */ if (tp->saw_tstamp) { @@ -879,7 +881,7 @@ } } else { tcp_set_rto(tp); - if (flag && FLAG_DATA_ACKED) + if (flag & FLAG_DATA_ACKED) (*tcp_sys_cong_ctl_f)(sk, seq, ack, seq_rtt); } /* NOTE: safe here so long as cong_ctl doesn't use rto */ @@ -973,6 +975,11 @@ { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + if(sk->state == TCP_SYN_SENT) { + /* RFC793 says to drop the segment and return. */ + return 1; + } + /* XXX This fin_seq thing should disappear... -DaveM */ tp->fin_seq = skb->end_seq; @@ -985,7 +992,6 @@ switch(sk->state) { case TCP_SYN_RECV: - case TCP_SYN_SENT: case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); @@ -999,12 +1005,16 @@ * nothing. */ break; + case TCP_LAST_ACK: + /* RFC793: Remain in the LAST-ACK state. */ + break; case TCP_TIME_WAIT: /* Received a retransmission of the FIN, * restart the TIME_WAIT timer. */ tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - return(0); + break; + case TCP_FIN_WAIT1: /* This case occurs when a simultaneous close * happens, we must ack the received FIN and @@ -1028,15 +1038,13 @@ /* Already in CLOSE. */ break; default: - /* FIXME: Document whats happening in this case. -DaveM */ - tcp_set_state(sk,TCP_LAST_ACK); - - /* Start the timers. */ - tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); - return(0); + /* Only TCP_LISTEN is left, in that case we should never + * reach this piece of code. + */ + printk("tcp_fin: Impossible, sk->state=%d\n", sk->state); + break; }; - - return(0); + return 0; } /* This one checks to see if we can put data from the @@ -1337,8 +1345,6 @@ * We do checksum and copy also but from device to kernel. */ - tp = &(sk->tp_pinfo.af_tcp); - /* * RFC1323: H1. Apply PAWS check first. */ @@ -1373,6 +1379,7 @@ tcp_data_snd_check(sk); } + tcp_statistics.TcpInErrs++; kfree_skb(skb, FREE_READ); return 0; } else if (skb->ack_seq == tp->snd_una) { @@ -1409,6 +1416,7 @@ if(th->syn && skb->seq != sk->syn_seq) { SOCK_DEBUG(sk, "syn in established state\n"); + tcp_statistics.TcpInErrs++; tcp_reset(sk, skb); return 1; } @@ -1430,7 +1438,7 @@ /* step 8: check the FIN bit */ if (th->fin) - tcp_fin(skb, sk, th); + (void) tcp_fin(skb, sk, th); tcp_data_snd_check(sk); tcp_ack_snd_check(sk); @@ -1449,82 +1457,67 @@ /* Shared between IPv4 and IPv6 now. */ struct sock * -tcp_check_req(struct sock *sk, struct sk_buff *skb, void *opt) +tcp_check_req(struct sock *sk, struct sk_buff *skb, struct open_request *req) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - struct open_request *dummy, *req; /* assumption: the socket is not in use. * as we checked the user count on tcp_rcv and we're * running from a soft interrupt. */ - req = tp->af_specific->search_open_req(tp, (void *)skb->nh.raw, skb->h.th, - &dummy); - if (req) { - if (req->sk) { - /* socket already created but not - * yet accepted()... - */ - sk = req->sk; - } else { - u32 flg; - /* Check for syn retransmission */ - flg = *(((u32 *)skb->h.th) + 3); + if (req->sk) { + /* socket already created but not + * yet accepted()... + */ + sk = req->sk; + } else { + u32 flg; - flg &= __constant_htonl(0x00170000); - if ((flg == __constant_htonl(0x00020000)) && - (!after(skb->seq, req->rcv_isn))) { + /* Check for syn retransmission */ + flg = *(((u32 *)skb->h.th) + 3); + + flg &= __constant_htonl(0x00170000); + /* Only SYN set? */ + if (flg == __constant_htonl(0x00020000)) { + if (!after(skb->seq, req->rcv_isn)) { /* retransmited syn. */ req->class->rtx_syn_ack(sk, req); return NULL; + } else { + return sk; /* New SYN */ } - - /* In theory the packet could be for a cookie, but - * TIME_WAIT should guard us against this. - * XXX: Nevertheless check for cookies? - */ - if (skb->ack_seq != req->snt_isn+1) { - tp->af_specific->send_reset(skb); - return NULL; - } - - sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); - tcp_dec_slow_timer(TCP_SLT_SYNACK); - if (sk == NULL) - return NULL; + } - req->expires = 0UL; - req->sk = sk; + /* We know it's an ACK here */ + /* In theory the packet could be for a cookie, but + * TIME_WAIT should guard us against this. + * XXX: Nevertheless check for cookies? + * This sequence number check is done again later, + * but we do it here to prevent syn flood attackers + * from creating big SYN_RECV sockets. + */ + if (!between(skb->ack_seq, req->snt_isn, req->snt_isn+1) || + !between(skb->seq, req->rcv_isn, + req->rcv_isn+1+req->rcv_wnd)) { + req->class->send_reset(skb); + return NULL; } - } -#ifdef CONFIG_SYNCOOKIES - else { - sk = tp->af_specific->cookie_check(sk, skb, opt); + + sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); + tcp_dec_slow_timer(TCP_SLT_SYNACK); if (sk == NULL) - return NULL; + return NULL; + + req->expires = 0UL; + req->sk = sk; } -#endif skb_orphan(skb); skb_set_owner_r(skb, sk); return sk; } - -static void tcp_rst_req(struct tcp_opt *tp, struct sk_buff *skb) -{ - struct open_request *req, *prev; - - req = tp->af_specific->search_open_req(tp,skb->nh.iph,skb->h.th,&prev); - if (!req) - return; - /* Sequence number check required by RFC793 */ - if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1)) - return; - tcp_synq_unlink(tp, req, prev); -} - /* * This function implements the receiving procedure of RFC 793. * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be @@ -1540,16 +1533,11 @@ /* state == CLOSED, hash lookup always fails, so no worries. -DaveM */ switch (sk->state) { case TCP_LISTEN: - if (th->rst) { - tcp_rst_req(tp, skb); - goto discard; - } - /* These use the socket TOS.. * might want to be the received TOS */ - if(th->ack) - return 1; + if(th->ack) + return 1; if(th->syn) { if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0) @@ -1812,6 +1800,8 @@ tcp_set_state(sk, TCP_FIN_WAIT2); if (!sk->dead) sk->state_change(sk); + else + tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout); } break; @@ -1870,8 +1860,10 @@ } /* step 8: check the FIN bit */ - if (th->fin) - tcp_fin(skb, sk, th); + if (th->fin) { + if(tcp_fin(skb, sk, th) != 0) + goto discard; + } tcp_data_snd_check(sk); tcp_ack_snd_check(sk); diff -u --recursive --new-file v2.1.67/linux/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c --- v2.1.67/linux/net/ipv4/tcp_ipv4.c Sun Sep 7 13:10:43 1997 +++ linux/net/ipv4/tcp_ipv4.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.62 1997/09/04 22:34:59 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.74 1997/10/30 23:52:27 davem Exp $ * * IPv4 specific functions * @@ -88,6 +88,13 @@ */ struct sock *tcp_bound_hash[TCP_BHTABLE_SIZE]; +/* + * This array holds the first and last local port number. + * For high-usage systems, use sysctl to change this to + * 32768-61000 + */ +int sysctl_local_port_range[2] = { 1024, 4999 }; + static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport, __u32 faddr, __u16 fport) { @@ -116,6 +123,13 @@ unsigned char state = sk2->state; int sk2_reuse = sk2->reuse; + /* Two sockets can be bound to the same port if they're + * bound to different interfaces. + */ + + if(sk->bound_dev_if != sk2->bound_dev_if) + continue; + if(!sk2->rcv_saddr || !sk->rcv_saddr) { if((!sk2_reuse) || (!sk_reuse) || @@ -161,13 +175,15 @@ */ unsigned short tcp_good_socknum(void) { - static int start = PROT_SOCK; + static int start = 0; static int binding_contour = 0; int best = 0; int size = 32767; /* a big num. */ int retval = 0, i, end, bc; SOCKHASH_LOCK(); + if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0]) + start = sysctl_local_port_range[0]; i = tcp_bhashfn(start); end = i + TCP_BHTABLE_SIZE; bc = binding_contour; @@ -207,8 +223,8 @@ best = retval; /* mark the starting point to avoid infinite loops */ while(tcp_lport_inuse(retval)) { retval = tcp_bhashnext(retval,i); - if (retval > 32767) /* Upper bound */ - retval = tcp_bhashnext(PROT_SOCK,i); + if (retval > sysctl_local_port_range[1]) /* Upper bound */ + retval = tcp_bhashnext(sysctl_local_port_range[0],i); if (retval == best) { /* This hash chain is full. No answer. */ retval = 0; @@ -218,8 +234,6 @@ done: start = (retval + 1); - if (start > 32767 || start < PROT_SOCK) - start = PROT_SOCK; SOCKHASH_UNLOCK(); return retval; @@ -301,20 +315,34 @@ * connection. So always assume those are both wildcarded * during the search since they can never be otherwise. */ -static struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum) +static struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif) { struct sock *sk; struct sock *result = NULL; + int score, hiscore; + hiscore=0; for(sk = tcp_listening_hash[tcp_lhashfn(hnum)]; sk; sk = sk->next) { if(sk->num == hnum) { __u32 rcv_saddr = sk->rcv_saddr; + score = 1; if(rcv_saddr) { - if(rcv_saddr == daddr) - return sk; /* Best possible match. */ - } else if(!result) + if (rcv_saddr != daddr) + continue; + score++; + } + if (sk->bound_dev_if) { + if (sk->bound_dev_if != dif) + continue; + score++; + } + if (score == 3) + return sk; + if (score > hiscore) { + hiscore = score; result = sk; + } } } return result; @@ -324,7 +352,7 @@ * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM */ static inline struct sock *__tcp_v4_lookup(struct tcphdr *th, - u32 saddr, u16 sport, u32 daddr, u16 dport) + u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) { unsigned short hnum = ntohs(dport); struct sock *sk; @@ -338,7 +366,8 @@ if(sk->daddr == saddr && /* remote address */ sk->dummy_th.dest == sport && /* remote port */ sk->num == hnum && /* local port */ - sk->rcv_saddr == daddr) /* local address */ + sk->rcv_saddr == daddr && /* local address */ + (!sk->bound_dev_if || sk->bound_dev_if == dif)) goto hit; /* You sunk my battleship! */ /* Must check for a TIME_WAIT'er before going to listener hash. */ @@ -346,17 +375,18 @@ if(sk->daddr == saddr && /* remote address */ sk->dummy_th.dest == sport && /* remote port */ sk->num == hnum && /* local port */ - sk->rcv_saddr == daddr) /* local address */ + sk->rcv_saddr == daddr && /* local address */ + (!sk->bound_dev_if || sk->bound_dev_if == dif)) goto hit; - sk = tcp_v4_lookup_listener(daddr, hnum); + sk = tcp_v4_lookup_listener(daddr, hnum, dif); hit: return sk; } -__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport) +__inline__ struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) { - return __tcp_v4_lookup(0, saddr, sport, daddr, dport); + return __tcp_v4_lookup(0, saddr, sport, daddr, dport, dif); } #ifdef CONFIG_IP_TRANSPARENT_PROXY @@ -374,16 +404,25 @@ #define tcp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \ secondlist((hpnum),(sk)->bind_next,(fpass)) -struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr, - unsigned short rnum, unsigned long laddr, - unsigned long paddr, unsigned short pnum) +static struct sock *tcp_v4_proxy_lookup(unsigned short num, unsigned long raddr, + unsigned short rnum, unsigned long laddr, + struct device *dev, unsigned short pnum, + int dif) { struct sock *s, *result = NULL; int badness = -1; + u32 paddr = 0; unsigned short hnum = ntohs(num); unsigned short hpnum = ntohs(pnum); int firstpass = 1; + if(dev && dev->ip_ptr) { + struct in_device *idev = dev->ip_ptr; + + if(idev->ifa_list) + paddr = idev->ifa_list->ifa_local; + } + /* This code must run only from NET_BH. */ for(s = tcp_v4_proxy_loop_init(hnum, hpnum, s, firstpass); s != NULL; @@ -408,7 +447,12 @@ continue; score++; } - if(score == 3 && s->num == hnum) { + if(s->bound_dev_if) { + if(s->bound_dev_if != dif) + continue; + score++; + } + if(score == 4 && s->num == hnum) { result = s; break; } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) { @@ -486,7 +530,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sk_buff *buff; - struct sk_buff *skb1; int tmp; struct tcphdr *th; struct rtable *rt; @@ -517,11 +560,11 @@ } tmp = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr, - RT_TOS(sk->ip_tos)|(sk->localroute || 0)); + RT_TOS(sk->ip_tos)|(sk->localroute || 0), sk->bound_dev_if); if (tmp < 0) return tmp; - if (rt->rt_flags&(RTF_MULTICAST|RTF_BROADCAST)) { + if (rt->rt_flags&(RTCF_MULTICAST|RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } @@ -533,13 +576,22 @@ } lock_sock(sk); + + /* Do this early, so there is less state to unwind on failure. */ + buff = sock_wmalloc(sk, MAX_SYN_SIZE, 0, GFP_KERNEL); + if (buff == NULL) { + release_sock(sk); + ip_rt_put(rt); + return(-ENOBUFS); + } + sk->dst_cache = &rt->u.dst; sk->daddr = rt->rt_dst; if (!sk->saddr) sk->saddr = rt->rt_src; sk->rcv_saddr = sk->saddr; - if (sk->priority == SOPRI_NORMAL) + if (sk->priority == 0) sk->priority = rt->u.dst.priority; sk->dummy_th.dest = usin->sin_port; @@ -557,20 +609,23 @@ sk->err = 0; - buff = sock_wmalloc(sk, MAX_SYN_SIZE, 0, GFP_KERNEL); - if (buff == NULL) { - release_sock(sk); - return(-ENOBUFS); - } - /* Put in the IP header and routing stuff. */ tmp = ip_build_header(buff, sk); if (tmp < 0) { + /* Caller has done ip_rt_put(rt) and set sk->dst_cache + * to NULL. We must unwind the half built TCP socket + * state so that this failure does not create a "stillborn" + * sock (ie. future re-tries of connect() would fail). + */ + sk->daddr = 0; + sk->saddr = sk->rcv_saddr = 0; kfree_skb(buff, FREE_WRITE); release_sock(sk); return(-ENETUNREACH); } + /* No failure conditions can result past this point. */ + th = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr)); buff->h.th = th; @@ -582,11 +637,10 @@ th->ack = 0; th->syn = 1; - sk->mtu = rt->u.dst.pmtu; if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT || (sk->ip_pmtudisc == IP_PMTUDISC_WANT && - rt->rt_flags&RTF_NOPMTUDISC)) && + rt->rt_flags&RTCF_NOPMTUDISC)) && rt->u.dst.pmtu > 576) sk->mtu = 576; @@ -639,8 +693,7 @@ tp->packets_out++; buff->when = jiffies; - skb1 = skb_clone(buff, GFP_KERNEL); - ip_queue_xmit(skb1); + ip_queue_xmit(skb_clone(buff, GFP_KERNEL)); /* Timer for repeating the SYN until an answer. */ tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto); @@ -691,11 +744,10 @@ * This should be replaced with a global hash table. */ static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, - void *header, - struct tcphdr *th, - struct open_request **prevp) + struct iphdr *iph, + struct tcphdr *th, + struct open_request **prevp) { - struct iphdr *iph = header; struct open_request *req, *prev; __u16 rport = th->source; @@ -750,7 +802,8 @@ * dropped. This is the new "fast" path mtu * discovery. */ - tcp_simple_retransmit(sk); + if (!sk->sock_readers) + tcp_simple_retransmit(sk); } } } @@ -764,7 +817,7 @@ * to find the appropriate port. */ -void tcp_v4_err(struct sk_buff *skb, unsigned char *dp) +void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len) { struct iphdr *iph = (struct iphdr*)dp; struct tcphdr *th; @@ -773,18 +826,16 @@ int code = skb->h.icmph->code; struct sock *sk; __u32 seq; + int opening; -#if 0 - /* check wrong - icmp.c should pass in len */ - if (skb->len < 8+(iph->ihl << 2)+sizeof(struct tcphdr)) { + if (len < (iph->ihl << 2)+sizeof(struct tcphdr)) { icmp_statistics.IcmpInErrors++; return; } -#endif th = (struct tcphdr*)(dp+(iph->ihl<<2)); - sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source); + sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { icmp_statistics.IcmpInErrors++; return; @@ -793,19 +844,38 @@ /* pointless, because we have no way to retry when sk is locked. But the socket should be really locked here for better interaction with the socket layer. This needs to be solved for SMP - (I would prefer an "ICMP backlog"). */ - /* lock_sock(sk); */ - tp = &sk->tp_pinfo.af_tcp; - - seq = ntohl(th->seq); + (I would prefer an "ICMP backlog"). + tcp_v4_err is called only from bh, so that lock_sock is pointless, + even in commented form :-) --ANK + + Note "for SMP" ;) -AK + + Couple of notes about backlogging: + - error_queue could be used for it. + - could, but MUST NOT :-), because: + a) it is not clear, + who will process deferred messages. + b) ICMP is not reliable by design, so that you can safely + drop ICMP messages. Besides that, if ICMP really arrived + it is very unlikely, that socket is locked. --ANK + + I don't think it's unlikely that sk is locked. With the + open_request stuff there is much more stress on the main + LISTEN socket. I just want to make sure that all ICMP unreachables + destroy unneeded open_requests as reliable as possible (for + syn flood protection) -AK + */ + tp = &sk->tp_pinfo.af_tcp; #ifdef ICMP_PARANOIA - if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { + seq = ntohl(th->seq); + if (sk->state != TCP_LISTEN && + !between(seq, tp->snd_una, max(tp->snd_una+32768,tp->snd_nxt))) { if (net_ratelimit()) printk(KERN_DEBUG "icmp packet outside the tcp window:" " s:%d %u,%u,%u\n", (int)sk->state, seq, tp->snd_una, tp->snd_nxt); - goto out; + return; } #endif @@ -814,7 +884,7 @@ tp->snd_ssthresh = max(tp->snd_cwnd >> 1, 2); tp->snd_cwnd = tp->snd_ssthresh; tp->high_seq = tp->snd_nxt; - goto out; + return; case ICMP_PARAMETERPROB: sk->err=EPROTO; sk->error_report(sk); @@ -822,7 +892,7 @@ case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ do_pmtu_discovery(sk, iph, th); - goto out; + return; } break; } @@ -830,62 +900,62 @@ /* If we've already connected we will keep trying * until we time out, or the user gives up. */ - if (code <= NR_ICMP_UNREACH) { - int fatal = 0; - - if (sk->state == TCP_LISTEN) { - struct open_request *req, *prev; - - /* Prevent race conditions with accept() - * icmp is unreliable. - * This is the easiest solution for now - for - * very big servers it might prove inadequate. - */ - if (sk->sock_readers) { - /* XXX: add a counter here to profile this. - * If too many ICMPs get dropped on busy - * servers this needs to be solved differently. - */ - goto out; - } + if (code > NR_ICMP_UNREACH) + return; - req = tcp_v4_search_req(tp, iph, th, &prev); - if (!req) - goto out; + opening = 0; + switch (sk->state) { + struct open_request *req, *prev; + case TCP_LISTEN: + /* Prevent race conditions with accept() - + * ICMP is unreliable. + */ + if (sk->sock_readers) { + /* XXX: add a counter here to profile this. + * If too many ICMPs get dropped on busy + * servers this needs to be solved differently. + */ + return; + } + + if (!th->syn && !th->ack) + return; + req = tcp_v4_search_req(tp, iph, th, &prev); + if (!req) + return; #ifdef ICMP_PARANOIA - if (seq != req->snt_isn) { - if (net_ratelimit()) - printk(KERN_DEBUG "icmp packet for openreq " - "with wrong seq number:%d:%d\n", - seq, req->snt_isn); - goto out; - } + if (seq != req->snt_isn) { + if (net_ratelimit()) + printk(KERN_DEBUG "icmp packet for openreq " + "with wrong seq number:%d:%d\n", + seq, req->snt_isn); + return; + } #endif - if (req->sk) { /* not yet accept()ed */ - sk = req->sk; - } else { - tcp_synq_unlink(tp, req, prev); - tcp_openreq_free(req); - fatal = 1; - } - } else if (sk->state == TCP_SYN_SENT - || sk->state == TCP_SYN_RECV) - fatal = 1; - - if(icmp_err_convert[code].fatal || fatal) { - sk->err = icmp_err_convert[code].errno; - if (fatal) { - tcp_statistics.TcpAttemptFails++; - if (sk->state != TCP_LISTEN) - tcp_set_state(sk,TCP_CLOSE); - sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ - } - } else /* Only an error on timeout */ - sk->err_soft = icmp_err_convert[code].errno; + if (req->sk) { /* not yet accept()ed */ + sk = req->sk; /* report error in accept */ + } else { + tcp_synq_unlink(tp, req, prev); + req->class->destructor(req); + tcp_openreq_free(req); + } + /* FALL THOUGH */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + opening = 1; + break; } - -out: - /* release_sock(sk); */ + + if(icmp_err_convert[code].fatal || opening) { + sk->err = icmp_err_convert[code].errno; + if (opening) { + tcp_statistics.TcpAttemptFails++; + if (sk->state != TCP_LISTEN) + tcp_set_state(sk,TCP_CLOSE); + sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ + } + } else /* Only an error on timeout */ + sk->err_soft = icmp_err_convert[code].errno; } /* This routine computes an IPv4 TCP checksum. */ @@ -948,6 +1018,7 @@ /* FIXME: should this carry an options packet? */ ip_queue_xmit(skb1); tcp_statistics.TcpOutSegs++; + tcp_statistics.TcpOutRsts++; } #ifdef CONFIG_IP_TRANSPARENT_PROXY @@ -962,7 +1033,7 @@ struct tcphdr *th = (struct tcphdr *)(skb->nh.raw + iph->ihl*4); struct sock *sk; - sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest); + sk = tcp_v4_lookup(iph->saddr, th->source, iph->daddr, th->dest, skb->dev->ifindex); if (!sk) return 0; @@ -992,7 +1063,7 @@ kfree_skb(skb, FREE_WRITE); return; } - + mss = (skb->dst->pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr)); if (sk->user_mss) mss = min(mss, sk->user_mss); @@ -1077,7 +1148,8 @@ struct or_calltable or_ipv4 = { tcp_v4_send_synack, - tcp_v4_or_free + tcp_v4_or_free, + tcp_v4_send_reset }; #ifdef NEW_LISTEN @@ -1304,7 +1376,7 @@ if (ip_route_output(&rt, newsk->opt && newsk->opt->srr ? newsk->opt->faddr : newsk->daddr, - newsk->saddr, newsk->ip_tos, NULL)) { + newsk->saddr, newsk->ip_tos, 0)) { sk_free(newsk); return NULL; } @@ -1359,6 +1431,57 @@ return NULL; } +static void tcp_v4_rst_req(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct open_request *req, *prev; + + req = tcp_v4_search_req(tp,skb->nh.iph, skb->h.th, &prev); + if (!req) + return; + /* Sequence number check required by RFC793 */ + if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1)) + return; + tcp_synq_unlink(tp, req, prev); + req->class->destructor(req); + tcp_openreq_free(req); +} + +/* Check for embryonic sockets (open_requests) We check packets with + * only the SYN bit set against the open_request queue too: This + * increases connection latency a bit, but is required to detect + * retransmitted SYNs. + */ +static inline struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb) +{ + struct tcphdr *th = skb->h.th; + u32 flg = ((u32 *)th)[3]; + + /* Check for RST */ + if (flg & __constant_htonl(0x00040000)) { + tcp_v4_rst_req(sk, skb); + return NULL; + } + + /* Check for SYN|ACK */ + if (flg & __constant_htonl(0x00120000)) { + struct open_request *req, *dummy; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + /* Find possible connection requests. */ + req = tcp_v4_search_req(tp, skb->nh.iph, th, &dummy); + if (req) { + sk = tcp_check_req(sk, skb, req); + } +#ifdef CONFIG_SYN_COOKIES + else { + sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); + } +#endif + } + return sk; +} + int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { skb_set_owner_r(skb, sk); @@ -1368,49 +1491,42 @@ * is currently called with bh processing disabled. */ lock_sock(sk); - + if (sk->state == TCP_ESTABLISHED) { /* Fast path */ if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) goto reset; - } else { - /* Check for embryonic sockets (open_requests) - * We check packets with only the SYN bit set - * against the open_request queue too: This - * increases connection latency a bit, but is - * required to detect retransmitted SYNs. - */ - /* FIXME: need to check for multicast syns - * here to satisfy RFC1122 4.2.3.10, p. 104: - * discard bcast/mcast SYN. I'm not sure if - * they're filtered out at the IP layer (I - * think not) - */ - if (sk->state == TCP_LISTEN && - ((u32 *)skb->h.th)[3] & __constant_htonl(0x00120000)) { - struct sock *nsk; - - /* Find possible connection requests. */ - nsk = tcp_check_req(sk, skb, &(IPCB(skb)->opt)); - if (nsk == NULL) - goto discard; - - release_sock(sk); - lock_sock(nsk); - sk = nsk; - } + release_sock(sk); + return 0; + } - if (tcp_rcv_state_process(sk, skb, skb->h.th, - &(IPCB(skb)->opt), skb->len)) - goto reset; + + if (sk->state == TCP_LISTEN) { + struct sock *nsk; + + nsk = tcp_v4_hnd_req(sk, skb); + if (!nsk) + goto discard; + lock_sock(nsk); + release_sock(sk); + sk = nsk; } + + if (tcp_rcv_state_process(sk, skb, skb->h.th, + &(IPCB(skb)->opt), skb->len)) + goto reset; release_sock(sk); return 0; reset: tcp_v4_send_reset(skb); discard: - kfree_skb(skb, FREE_READ); - release_sock(sk); + kfree_skb(skb, FREE_READ); + /* Be careful here. If this function gets more complicated and + * gcc suffers from register pressure on the x86, sk (in %ebx) + * might be destroyed here. This current version compiles correctly, + * but you have been warned. + */ + release_sock(sk); return 0; } @@ -1422,42 +1538,43 @@ { struct tcphdr *th; struct sock *sk; - u32 saddr = skb->nh.iph->saddr; - u32 daddr = skb->nh.iph->daddr; - - th = skb->h.th; if (skb->pkt_type!=PACKET_HOST) goto discard_it; + th = skb->h.th; + /* Pull up the IP header. */ - skb_pull(skb, skb->h.raw-skb->data); + __skb_pull(skb, skb->h.raw - skb->data); + + /* Count it even if it's bad */ + tcp_statistics.TcpInSegs++; /* Try to use the device checksum if provided. */ switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = csum_partial((char *)th, len, 0); case CHECKSUM_HW: - if (tcp_v4_check(th,len,saddr,daddr,skb->csum)) { - struct iphdr * iph = skb->nh.iph; + if (tcp_v4_check(th,len,skb->nh.iph->saddr,skb->nh.iph->daddr,skb->csum)) { printk(KERN_DEBUG "TCPv4 bad checksum from %d.%d.%d.%d:%04x to %d.%d.%d.%d:%04x, len=%d/%d/%d\n", - NIPQUAD(saddr), ntohs(th->source), NIPQUAD(daddr), - ntohs(th->dest), len, skb->len, ntohs(iph->tot_len)); - goto discard_it; + NIPQUAD(skb->nh.iph->saddr), ntohs(th->source), NIPQUAD(skb->nh.iph->daddr), + ntohs(th->dest), len, skb->len, ntohs(skb->nh.iph->tot_len)); + tcp_statistics.TcpInErrs++; + goto discard_it; } default: /* CHECKSUM_UNNECESSARY */ } - tcp_statistics.TcpInSegs++; - #ifdef CONFIG_IP_TRANSPARENT_PROXY if (IPCB(skb)->redirport) - sk = tcp_v4_proxy_lookup(th->dest, saddr, th->source, daddr, - skb->dev->pa_addr, IPCB(skb)->redirport); + sk = tcp_v4_proxy_lookup(th->dest, skb->nh.iph->saddr, th->source, + skb->nh.iph->daddr, skb->dev, + IPCB(skb)->redirport, skb->dev->ifindex); else #endif - sk = __tcp_v4_lookup(th, saddr, th->source, daddr, th->dest); + sk = __tcp_v4_lookup(th, skb->nh.iph->saddr, th->source, + skb->nh.iph->daddr, th->dest, skb->dev->ifindex); if (!sk) goto no_tcp_socket; if(!ipsec_sk_policy(sk,skb)) @@ -1501,7 +1618,7 @@ rt = (struct rtable*)skb->dst; if (rt->u.dst.obsolete) { int err; - err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos, rt->key.dst_dev); + err = ip_route_output(&rt, rt->rt_dst, rt->rt_src, rt->key.tos, rt->key.oif); if (err) { sk->err_soft=-err; sk->error_report(skb->sk); @@ -1524,7 +1641,7 @@ static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th) { return tcp_v4_lookup(skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, th->dest); + skb->nh.iph->daddr, th->dest, skb->dev->ifindex); } static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) @@ -1547,13 +1664,6 @@ ip_setsockopt, ip_getsockopt, v4_addr2sockaddr, - tcp_v4_send_reset, - tcp_v4_search_req, -#ifdef CONFIG_SYNCOOKIES - cookie_v4_check, -#else - NULL, -#endif sizeof(struct sockaddr_in) }; @@ -1592,8 +1702,6 @@ sk->priority = 1; sk->state = TCP_CLOSE; - /* This is how many unacked bytes we will accept for this socket. */ - sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ sk->max_ack_backlog = SOMAXCONN; sk->mtu = 576; diff -u --recursive --new-file v2.1.67/linux/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c --- v2.1.67/linux/net/ipv4/tcp_output.c Tue Sep 23 16:48:50 1997 +++ linux/net/ipv4/tcp_output.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_output.c,v 1.46 1997/08/24 16:22:28 freitag Exp $ + * Version: $Id: tcp_output.c,v 1.50 1997/10/15 19:13:02 freitag Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -74,9 +74,12 @@ * (part of SWS is done on packetization) * c) We are retransmiting [Nagle] * d) We have too many packets 'in flight' + * + * Don't use the nagle rule for urgent data. */ len = skb->end_seq - skb->seq; - if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out) + if (!sk->nonagle && len < (sk->mss >> 1) && tp->packets_out && + !skb->h.th->urg) nagle_check = 0; return (nagle_check && tp->packets_out < tp->snd_cwnd && @@ -471,8 +474,12 @@ if (tp->window_clamp) { free_space = min(tp->window_clamp, free_space); mss = min(tp->window_clamp, mss); - } else + } +#ifdef NO_ANK_FIX + /* I am tired of this message */ + else printk(KERN_DEBUG "Clamp failure. Water leaking.\n"); +#endif if (mss < 1) { mss = 1; @@ -487,8 +494,11 @@ if (cur_win < 0) { cur_win = 0; +#ifdef NO_ANK_FIX + /* And this too. */ printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n", tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup); +#endif } if (free_space < sk->rcvbuf/4 && free_space < mss/2) @@ -610,9 +620,8 @@ th1->urg = 1; th1->urg_ptr = th2->urg_ptr + size1; } - if (th2->fin) { + if (th2->fin) th1->fin = 1; - } /* ... and off you go. */ kfree_skb(buff, FREE_WRITE); @@ -1007,11 +1016,8 @@ * following states. If any other state is encountered, return. * [listen/close will never occur here anyway] */ - if (sk->state != TCP_ESTABLISHED && - sk->state != TCP_CLOSE_WAIT && - sk->state != TCP_FIN_WAIT1 && - sk->state != TCP_LAST_ACK && - sk->state != TCP_CLOSING) + if ((1 << sk->state) & + ~(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT1|TCPF_LAST_ACK|TCPF_CLOSING)) return; if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && (skb=tp->send_head)) { diff -u --recursive --new-file v2.1.67/linux/net/ipv4/tcp_timer.c linux/net/ipv4/tcp_timer.c --- v2.1.67/linux/net/ipv4/tcp_timer.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv4/tcp_timer.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: @(#)tcp.c 1.0.16 05/25/93 + * Version: $Id: tcp_timer.c,v 1.31 1997/11/05 08:14:01 freitag Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -212,7 +212,7 @@ tcp_clear_xmit_timers(sk); /* Time wait the socket. */ - if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING) { + if ((1<state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) { tcp_set_state(sk,TCP_TIME_WAIT); tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); } else { @@ -263,8 +263,7 @@ sk->error_report(sk); /* Time wait the socket. */ - if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 - || sk->state == TCP_CLOSING) { + if ((1<state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) { tcp_set_state(sk, TCP_TIME_WAIT); tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN); } else { @@ -280,8 +279,7 @@ { int res = 0; - if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT || - sk->state == TCP_FIN_WAIT2) { + if ((1<state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2)) { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; __u32 elapsed = jiffies - tp->rcv_tstamp; @@ -382,6 +380,11 @@ return; } + if (sk->sock_readers) { + /* Try again in a second. */ + tcp_reset_xmit_timer(sk, TIME_RETRANS, HZ); + return; + } lock_sock(sk); /* Clear delay ack timer. */ diff -u --recursive --new-file v2.1.67/linux/net/ipv4/timer.c linux/net/ipv4/timer.c --- v2.1.67/linux/net/ipv4/timer.c Fri Apr 4 08:52:28 1997 +++ linux/net/ipv4/timer.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * TIMER - implementation of software timers for IP. * - * Version: @(#)timer.c 1.0.7 05/25/93 + * Version: $Id: timer.c,v 1.7 1997/09/17 18:50:26 freitag Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, diff -u --recursive --new-file v2.1.67/linux/net/ipv4/udp.c linux/net/ipv4/udp.c --- v2.1.67/linux/net/ipv4/udp.c Sat May 24 09:10:26 1997 +++ linux/net/ipv4/udp.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * * The User Datagram Protocol (UDP). * - * Version: @(#)udp.c 1.0.13 06/02/93 + * Version: $Id: udp.c,v 1.44 1997/10/15 19:56:35 freitag Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -81,8 +81,7 @@ when application doesn't choose (NOT YET - doesn't seem to be in the BSD API) [Does opening a SOCK_PACKET and snooping your output count 8)] 4.1.3.6 (Invalid Addresses) - MUST discard invalid source addresses (NOT YET -- will be implemented - in IP, so UDP will eventually be OK. Right now it's a violation.) + MUST discard invalid source addresses (OK -- done in the new routing code) MUST only send datagrams with one of our addresses (NOT YET - ought to be OK ) 950728 -- MS */ @@ -133,6 +132,13 @@ unsigned char state = sk2->state; int sk2_reuse = sk2->reuse; + /* Two sockets can be bound to the same port if they're + * bound to different interfaces. + */ + + if(sk2->bound_dev_if != sk->bound_dev_if) + continue; + if(!sk2->rcv_saddr || !sk->rcv_saddr) { if((!sk2_reuse) || (!sk_reuse) || @@ -173,20 +179,24 @@ int i, best, best_size_so_far; SOCKHASH_LOCK(); + if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0]) + start = sysctl_local_port_range[0]; - /* Select initial not-so-random "best" */ - best = PROT_SOCK + 1 + (start & 1023); best_size_so_far = 32767; /* "big" num */ - result = best; - for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { + best = result = start; + + for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) { struct sock *sk; int size; sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - /* No clashes - take it */ - if (!sk) + if(!sk) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); goto out; + } /* Is this one better than our best so far? */ size = 0; @@ -196,12 +206,19 @@ } while((sk = sk->next) != NULL); best_size_so_far = size; best = result; -next: + next: } - while (udp_lport_inuse(best)) - best += UDP_HTABLE_SIZE; result = best; + + for(;; result += UDP_HTABLE_SIZE) { + /* Get into range (but preserve hash bin)... */ + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); + if (!udp_lport_inuse(result)) + break; + } out: start = result; SOCKHASH_UNLOCK(); @@ -277,7 +294,7 @@ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this here plus the last hit cache. -DaveM */ -struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport) +struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) { struct sock *sk, *result = NULL; unsigned short hnum = ntohs(dport); @@ -301,7 +318,12 @@ continue; score++; } - if(score == 3) { + if(sk->bound_dev_if) { + if(sk->bound_dev_if != dif) + continue; + score++; + } + if(score == 4) { result = sk; break; } else if(score > badness) { @@ -313,23 +335,25 @@ return result; } -__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport) +__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) { struct sock *sk; - if(uh_cache_sk && + if(!dif && uh_cache_sk && uh_cache_saddr == saddr && uh_cache_sport == sport && uh_cache_dport == dport && uh_cache_daddr == daddr) return uh_cache_sk; - sk = udp_v4_lookup_longway(saddr, sport, daddr, dport); - uh_cache_sk = sk; - uh_cache_saddr = saddr; - uh_cache_daddr = daddr; - uh_cache_sport = sport; - uh_cache_dport = dport; + sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif); + if(!dif) { + uh_cache_sk = sk; + uh_cache_saddr = saddr; + uh_cache_daddr = daddr; + uh_cache_sport = sport; + uh_cache_dport = dport; + } return sk; } @@ -348,16 +372,25 @@ #define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \ secondlist((hpnum),(sk)->next,(fpass)) -struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr, - unsigned short rnum, unsigned long laddr, - unsigned long paddr, unsigned short pnum) +static struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr, + unsigned short rnum, unsigned long laddr, + struct device *dev, unsigned short pnum, + int dif) { struct sock *s, *result = NULL; int badness = -1; + u32 paddr = 0; unsigned short hnum = ntohs(num); unsigned short hpnum = ntohs(pnum); int firstpass = 1; + if(dev && dev->ip_ptr) { + struct in_device *idev = dev->ip_ptr; + + if(idev->ifa_list) + paddr = idev->ifa_list->ifa_local; + } + SOCKHASH_LOCK(); for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass); s != NULL; @@ -382,7 +415,12 @@ continue; score++; } - if(score == 3 && s->num == hnum) { + if(s->bound_dev_if) { + if(s->bound_dev_if != dif) + continue; + score++; + } + if(score == 4 && s->num == hnum) { result = s; break; } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) { @@ -434,7 +472,7 @@ * to find the appropriate port. */ -void udp_err(struct sk_buff *skb, unsigned char *dp) +void udp_err(struct sk_buff *skb, unsigned char *dp, int len) { struct iphdr *iph = (struct iphdr*)dp; struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2)); @@ -442,9 +480,16 @@ int code = skb->h.icmph->code; struct sock *sk; - sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source); - if (sk == NULL) - return; /* No socket for error */ + if (len < (iph->ihl<<2)+sizeof(struct udphdr)) { + icmp_statistics.IcmpInErrors++; + return; + } + + sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); + if (sk == NULL) { + icmp_statistics.IcmpInErrors++; + return; /* No socket for error */ + } if (sk->ip_recverr && !sk->sock_readers) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -519,7 +564,6 @@ u32 daddr; u32 other; struct iovec *iov; - int nriov; u32 wcheck; }; @@ -533,46 +577,23 @@ static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; - struct iovec *iov; - char *src; - char *dst = to; - unsigned int len; - - if (offset == 0) { - fraglen -= sizeof(struct udphdr); - dst += sizeof(struct udphdr); - } - - iov = ufh->iov; - do { - if ((len = iov->iov_len) > fraglen) - len = fraglen; - src = (char *) iov->iov_base + iov->iov_len - len; - ufh->wcheck = csum_partial_copy_fromuser(src, - dst + fraglen - len, len, - ufh->wcheck); - if ((iov->iov_len -= len) == 0) { - if (--(ufh->nriov) < 0) { - printk(KERN_NOTICE "udp_getfrag: nriov = %d\n", - ufh->nriov); - return -EINVAL; - } - iov--; - } - fraglen -= len; - } while (fraglen); - ufh->iov = iov; - - if (offset == 0) { + if (offset==0) { + if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset, + fraglen-sizeof(struct udphdr), &ufh->wcheck)) + return -EFAULT; ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr), - ufh->wcheck); + ufh->wcheck); ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr, ntohs(ufh->uh.len), IPPROTO_UDP, ufh->wcheck); if (ufh->uh.check == 0) ufh->uh.check = -1; memcpy(to, ufh, sizeof(struct udphdr)); + return 0; } + if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr), + fraglen, &ufh->wcheck)) + return -EFAULT; return 0; } @@ -586,45 +607,19 @@ static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen) { struct udpfakehdr *ufh = (struct udpfakehdr *)p; - struct iovec *iov; - char *src; - char *dst = to; - int err; - unsigned int len; - if (offset == 0) { - fraglen -= sizeof(struct udphdr); - dst += sizeof(struct udphdr); - } - - iov = ufh->iov; - do { - if ((len = iov->iov_len) > fraglen) - len = fraglen; - src = (char *) iov->iov_base + iov->iov_len - len; - err = copy_from_user(dst + fraglen - len, src, len); - fraglen -= len; - if ((iov->iov_len -= len) == 0) { - if (--(ufh->nriov) < 0) { - printk(KERN_NOTICE "udp_getfrag: nriov = %d\n", - ufh->nriov); - return -EINVAL; - } - iov--; - } - } while (fraglen && err >= 0); - ufh->iov = iov; - - if (offset == 0) + if (offset==0) { memcpy(to, ufh, sizeof(struct udphdr)); - return err; + return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset, + fraglen-sizeof(struct udphdr)); + } + return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr), + fraglen); } - int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len) { int ulen = len + sizeof(struct udphdr); - struct device *dev = NULL; struct ipcm_cookie ipc; struct udpfakehdr ufh; struct rtable *rt; @@ -674,8 +669,9 @@ ipc.addr = sk->saddr; ipc.opt = NULL; + ipc.oif = sk->bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(msg, &ipc, &dev); + err = ip_cmsg_send(msg, &ipc); if (err) return err; if (ipc.opt) @@ -695,17 +691,21 @@ tos = RT_TOS(sk->ip_tos) | (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) || (ipc.opt && ipc.opt->is_strictroute)); - if (MULTICAST(daddr) && sk->ip_mc_index && dev == NULL) - err = ip_route_output_dev(&rt, daddr, ufh.saddr, tos, sk->ip_mc_index); - else - err = ip_route_output(&rt, daddr, ufh.saddr, tos, dev); + if (MULTICAST(daddr)) { + if (!ipc.oif) + ipc.oif = sk->ip_mc_index; + if (!ufh.saddr) + ufh.saddr = sk->ip_mc_addr; + } + + err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif); if (err) { if (free) kfree(ipc.opt); return err; } - if (rt->rt_flags&RTF_BROADCAST && !sk->broadcast) { + if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) { if (free) kfree(ipc.opt); ip_rt_put(rt); return -EACCES; @@ -718,8 +718,7 @@ ufh.uh.len = htons(ulen); ufh.uh.check = 0; ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256; - ufh.iov = msg->msg_iov + msg->msg_iovlen - 1; - ufh.nriov = msg->msg_iovlen; + ufh.iov = msg->msg_iov; ufh.wcheck = 0; /* RFC1122: OK. Provides the checksumming facility (MUST) as per */ @@ -907,10 +906,10 @@ return(-EAFNOSUPPORT); err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr, - sk->ip_tos|sk->localroute); + sk->ip_tos|sk->localroute, sk->bound_dev_if); if (err) return err; - if ((rt->rt_flags&RTF_BROADCAST) && !sk->broadcast) { + if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) { ip_rt_put(rt); return -EACCES; } @@ -1024,7 +1023,7 @@ struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4); struct sock *sk; - sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest); + sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest, skb->dev->ifindex); if (!sk) return 0; @@ -1113,17 +1112,17 @@ skb_trim(skb,len); - if(rt->rt_flags & (RTF_BROADCAST|RTF_MULTICAST)) + if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return udp_v4_mcast_deliver(skb, uh, saddr, daddr); #ifdef CONFIG_IP_TRANSPARENT_PROXY if (IPCB(skb)->redirport) sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source, - daddr, skb->dev->pa_addr, - IPCB(skb)->redirport); + daddr, skb->dev, IPCB(skb)->redirport, + skb->dev->ifindex); else #endif - sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest); + sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex); if (sk == NULL) { udp_statistics.UdpNoPorts++; diff -u --recursive --new-file v2.1.67/linux/net/ipv4/utils.c linux/net/ipv4/utils.c --- v2.1.67/linux/net/ipv4/utils.c Thu Jun 26 12:33:41 1997 +++ linux/net/ipv4/utils.c Sun Nov 30 14:00:39 1997 @@ -6,7 +6,7 @@ * Various kernel-resident INET utility functions; mainly * for format conversion and debugging output. * - * Version: @(#)utils.c 1.0.7 05/18/93 + * Version: $Id: utils.c,v 1.5 1997/09/17 18:50:31 freitag Exp $ * * Author: Fred N. van Kempen, * diff -u --recursive --new-file v2.1.67/linux/net/ipv6/Config.in linux/net/ipv6/Config.in --- v2.1.67/linux/net/ipv6/Config.in Wed Dec 31 16:00:00 1969 +++ linux/net/ipv6/Config.in Sun Nov 30 14:00:39 1997 @@ -0,0 +1,7 @@ +# +# IPv6 configuration +# +bool 'IPv6: enable EUI-64 token format' CONFIG_IPV6_EUI64 +bool 'IPv6: disable provided based addresses' CONFIG_IPV6_NO_PB +#bool 'IPv6: flow policy support' CONFIG_RT6_POLICY +#bool 'IPv6: firewall support' CONFIG_IPV6_FIREWALL diff -u --recursive --new-file v2.1.67/linux/net/ipv6/addrconf.c linux/net/ipv6/addrconf.c --- v2.1.67/linux/net/ipv6/addrconf.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv6/addrconf.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: addrconf.c,v 1.21 1997/08/09 03:44:24 davem Exp $ + * $Id: addrconf.c,v 1.28 1997/11/05 20:20:43 kuznet Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -42,7 +43,8 @@ #include #include #include -#include +#include +#include #include @@ -92,12 +94,11 @@ st = addr->s6_addr32[0]; - /* - * UCast Provider Based Address - * 0x4/3 + /* Consider all addresses with the first three bits different of + 000 and 111 as unicasts. */ - - if ((st & __constant_htonl(0xE0000000)) == __constant_htonl(0x40000000)) + if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) && + (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000)) return IPV6_ADDR_UNICAST; if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000)) { @@ -184,6 +185,8 @@ printk(KERN_DEBUG "joining all-routers\n"); #endif idev->router = 1; + + /* Wrong. It is user level function. */ ipv6_addr_all_routers(&maddr); ipv6_dev_mc_inc(idev->dev, &maddr); } @@ -222,6 +225,7 @@ memcpy(&ifa->addr, addr, sizeof(struct in6_addr)); init_timer(&ifa->timer); + ifa->timer.data = (unsigned long) ifa; ifa->scope = scope; ifa->idev = idev; @@ -361,7 +365,7 @@ } out: - if (ifp == NULL && match) + if (ifp == NULL) ifp = match; atomic_dec(&addr_list_lock); return ifp; @@ -410,6 +414,157 @@ return ifp; } +/* Join to solicited addr multicast group. */ + +static void addrconf_join_solict(struct device *dev, struct in6_addr *addr) +{ + struct in6_addr maddr; + + addrconf_addr_solict_mult(addr, &maddr); + ipv6_dev_mc_inc(dev, &maddr); +} + +#ifdef CONFIG_IPV6_EUI64 +static int ipv6_generate_eui64(u8 *eui, struct device *dev) +{ + switch (dev->type) { + case ARPHRD_ETHER: + if (dev->addr_len != ETH_ALEN) + return -1; + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr+3, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + return 0; + } + return -1; +} +#endif + +/* + * Add prefix route. + */ + +static void +addrconf_prefix_route(struct in6_addr *pfx, int plen, struct device *dev, + unsigned long info) +{ + struct in6_rtmsg rtmsg; + int err; + + memset(&rtmsg, 0, sizeof(rtmsg)); + memcpy(&rtmsg.rtmsg_dst, pfx, sizeof(struct in6_addr)); + rtmsg.rtmsg_dst_len = plen; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + rtmsg.rtmsg_ifindex = dev->ifindex; + rtmsg.rtmsg_info = info; + rtmsg.rtmsg_flags = RTF_UP|RTF_ADDRCONF; + + /* Prevent useless cloning on PtP SIT. + This thing is done here expecting that the whole + class of non-broadcast devices need not cloning. + */ + if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) + rtmsg.rtmsg_flags |= RTF_NONEXTHOP; + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + + ip6_route_add(&rtmsg, &err); + + if (err) + printk(KERN_DEBUG "IPv6: error %d adding prefix route\n", err); +} + +/* Create "default" multicast route to the interface */ + +static void addrconf_add_mroute(struct device *dev) +{ + struct in6_rtmsg rtmsg; + struct rt6_info *rt; + int err; + + memset(&rtmsg, 0, sizeof(rtmsg)); + ipv6_addr_set(&rtmsg.rtmsg_dst, + __constant_htonl(0xFF000000), 0, 0, 0); + rtmsg.rtmsg_dst_len = 8; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + rtmsg.rtmsg_ifindex = dev->ifindex; + rtmsg.rtmsg_flags = RTF_UP|RTF_ADDRCONF; + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + + rt = ip6_route_add(&rtmsg, &err); + + /* + * Pedro makes interesting thing here, he attached + * fake nexthop to multicast route. + * It is trick to avoid cloning, ugly, but efficient. --ANK + */ + + if (err) + printk(KERN_DEBUG "IPv6: error %d adding mroute\n", err); + else + rt->rt6i_nexthop = ndisc_get_neigh(dev, &rtmsg.rtmsg_dst); +} + +static void sit_route_add(struct device *dev) +{ + struct in6_rtmsg rtmsg; + struct rt6_info *rt; + int err; + + memset(&rtmsg, 0, sizeof(rtmsg)); + + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + + /* prefix length - 96 bytes "::d.d.d.d" */ + rtmsg.rtmsg_dst_len = 96; + rtmsg.rtmsg_flags = RTF_UP; + rtmsg.rtmsg_ifindex = dev->ifindex; + + rt = ip6_route_add(&rtmsg, &err); + + /* See comment in addrconf_add_mroute. + * It is the same trick, but to avoid cloning for direct + * sit routes i.e. IPv4 comaptible destinations. + */ + if (err) + printk(KERN_DEBUG "sit_route_add: error %d in route_add\n", err); + else + rt->rt6i_nexthop = ndisc_get_neigh(dev, &rtmsg.rtmsg_dst); +} + +static void addrconf_add_lroute(struct device *dev) +{ + struct in6_addr addr; + + ipv6_addr_set(&addr, __constant_htonl(0xFE800000), 0, 0, 0); + addrconf_prefix_route(&addr, 10, dev, 0); +} + +static struct inet6_dev *addrconf_add_dev(struct device *dev) +{ + struct in6_addr maddr; + struct inet6_dev *idev; + + if ((idev = ipv6_get_idev(dev)) == NULL) { + idev = ipv6_add_dev(dev); + if (idev == NULL) + return NULL; + } + + /* Add default multicast route */ + addrconf_add_mroute(dev); + + /* Add link local route */ + addrconf_add_lroute(dev); + + /* Join to all nodes multicast group. */ + ipv6_addr_all_nodes(&maddr); + ipv6_dev_mc_inc(dev, &maddr); + return idev; +} + void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len) { struct prefix_info *pinfo; @@ -432,7 +587,7 @@ addr_type = ipv6_addr_type(&pinfo->prefix); - if (addr_type & IPV6_ADDR_LINKLOCAL) + if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)) return; valid_lft = ntohl(pinfo->valid); @@ -470,23 +625,12 @@ rt->rt6i_expires = rt_expires; } } else if (pinfo->onlink && valid_lft) { - struct in6_rtmsg rtmsg; - int err; - - memset(&rtmsg, 0, sizeof(rtmsg)); - - printk(KERN_DEBUG "adding on link route\n"); - - ipv6_addr_copy(&rtmsg.rtmsg_dst, &pinfo->prefix); - rtmsg.rtmsg_dst_len = pinfo->prefix_len; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - rtmsg.rtmsg_flags = RTF_UP | RTF_ADDRCONF; - rtmsg.rtmsg_info = rt_expires; - - ip6_route_add(&rtmsg, &err); + addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, + dev, rt_expires); } + /* Try to figure out our local address for this prefix */ + if (pinfo->autoconf && ipv6_config.autoconf) { struct inet6_ifaddr * ifp; struct in6_addr addr; @@ -494,33 +638,41 @@ plen = pinfo->prefix_len >> 3; - if (plen + dev->addr_len == sizeof(struct in6_addr)) { +#ifdef CONFIG_IPV6_EUI64 + if (pinfo->prefix_len == 64) { + memcpy(&addr, &pinfo->prefix, 8); + if (ipv6_generate_eui64(addr.s6_addr + 8, dev)) + return; + goto ok; + } +#endif +#ifndef CONFIG_IPV6_NO_PB + if (pinfo->prefix_len == ((sizeof(struct in6_addr) - dev->addr_len)<<3)) { memcpy(&addr, &pinfo->prefix, plen); memcpy(addr.s6_addr + plen, dev->dev_addr, dev->addr_len); - } else { - ADBG(("addrconf: prefix_len invalid\n")); - return; + goto ok; } +#endif + printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", pinfo->prefix_len); + return; +ok: ifp = ipv6_chk_addr(&addr); if (ifp == NULL && valid_lft) { struct inet6_dev *in6_dev = ipv6_get_idev(dev); - if (in6_dev == NULL) - ADBG(("addrconf: device not configured\n")); - + if (in6_dev == NULL) { + printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); + return; + } + ifp = ipv6_add_addr(in6_dev, &addr, addr_type & IPV6_ADDR_SCOPE_MASK); - if (dev->flags & IFF_MULTICAST) { - struct in6_addr maddr; - - /* Join to solicited addr multicast group. */ - addrconf_addr_solict_mult(&addr, &maddr); - ipv6_dev_mc_inc(dev, &maddr); - } + if (ifp == NULL) + return; ifp->prefix_len = pinfo->prefix_len; @@ -564,17 +716,32 @@ } if (dev->type == ARPHRD_SIT) { - struct device *dev; - + struct ifreq ifr; + mm_segment_t oldfs; + struct ip_tunnel_parm p; + if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) return -EADDRNOTAVAIL; - - dev = sit_add_tunnel(ireq.ifr6_addr.s6_addr32[3]); - - if (dev == NULL) - err = -ENODEV; - else - err = 0; + + memset(&p, 0, sizeof(p)); + p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; + p.iph.saddr = 0; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPV6; + p.iph.ttl = 64; + ifr.ifr_ifru.ifru_data = (void*)&p; + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); + set_fs(oldfs); + + if (err == 0) { + err = -ENOBUFS; + if ((dev = dev_get(p.name)) == NULL) + goto err_exit; + err = dev_open(dev); + } } err_exit: @@ -595,38 +762,27 @@ if (!suser()) return -EPERM; - if(copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) + if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) return -EFAULT; - if((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL) - return -EINVAL; + if ((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; - if ((idev = ipv6_get_idev(dev)) == NULL) - return -EINVAL; + if ((idev = addrconf_add_dev(dev)) == NULL) + return -ENOBUFS; scope = ipv6_addr_scope(&ireq.ifr6_addr); if((ifp = ipv6_add_addr(idev, &ireq.ifr6_addr, scope)) == NULL) return -ENOMEM; - ifp->prefix_len = 128; - - if (dev->flags & IFF_MULTICAST) { - struct in6_addr maddr; - - /* Join to solicited addr multicast group. */ - addrconf_addr_solict_mult(&ireq.ifr6_addr, &maddr); - ipv6_dev_mc_inc(dev, &maddr); - } - ifp->prefix_len = ireq.ifr6_prefixlen; ifp->flags |= ADDR_PERMANENT; - if (!(dev->flags & (IFF_NOARP|IFF_LOOPBACK))) - addrconf_dad_start(ifp); - else - ip6_rt_addr_add(&ifp->addr, dev); - + addrconf_dad_start(ifp); return 0; } @@ -645,90 +801,22 @@ return -EFAULT; if ((dev = dev_get_by_index(ireq.ifr6_ifindex)) == NULL) - return -EINVAL; + return -ENODEV; if ((idev = ipv6_get_idev(dev)) == NULL) - return -EINVAL; + return -ENXIO; scope = ipv6_addr_scope(&ireq.ifr6_addr); for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == scope && - (!memcmp(&ireq.ifr6_addr, &ifp->addr, sizeof(struct in6_addr)))) { - ipv6_del_addr(ifp); - break; - } - } - - return 0; -} - -static void sit_route_add(struct device *dev) -{ - struct in6_rtmsg rtmsg; - struct rt6_info *rt; - int err; - - ADBG(("sit_route_add(%s): ", dev->name)); - memset(&rtmsg, 0, sizeof(rtmsg)); - - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - - if (dev->pa_dstaddr == 0) { - ADBG(("pa_dstaddr=0, ")); - /* prefix length - 96 bytes "::d.d.d.d" */ - rtmsg.rtmsg_dst_len = 96; - rtmsg.rtmsg_flags = RTF_NONEXTHOP|RTF_UP; - } else { - ADBG(("pa_dstaddr=%08x, ", dev->pa_dstaddr)); - rtmsg.rtmsg_dst_len = 10; - rtmsg.rtmsg_dst.s6_addr32[0] = __constant_htonl(0xfe800000); - rtmsg.rtmsg_dst.s6_addr32[3] = dev->pa_dstaddr; - rtmsg.rtmsg_gateway.s6_addr32[3]= dev->pa_dstaddr; - rtmsg.rtmsg_flags = RTF_UP; - } - - rtmsg.rtmsg_ifindex = dev->ifindex; - ADBG(("doing ip6_route_add()\n")); - rt = ip6_route_add(&rtmsg, &err); - - if (err) { -#if ACONF_DEBUG >= 1 - printk(KERN_DEBUG "sit_route_add: error %d in route_add\n", err); -#endif - } - - ADBG(("sit_route_add(cont): ")); - if (dev->pa_dstaddr) { - struct rt6_info *mrt; - - ADBG(("pa_dstaddr != 0, ")); - rt->rt6i_nexthop = ndisc_get_neigh(dev, &rtmsg.rtmsg_gateway); - if (rt->rt6i_nexthop == NULL) { - ADBG(("can't get neighbour\n")); - printk(KERN_DEBUG "sit_route: get_neigh failed\n"); + if (ifp->scope == scope && + (!memcmp(&ireq.ifr6_addr, &ifp->addr, sizeof(struct in6_addr)))) { + ipv6_del_addr(ifp); + break; } - - /* - * Add multicast route. - */ - ADBG(("add MULT, ")); - ipv6_addr_set(&rtmsg.rtmsg_dst, __constant_htonl(0xFF000000), 0, 0, 0); - - rtmsg.rtmsg_dst_len = 8; - rtmsg.rtmsg_flags = RTF_UP; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - - memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr)); - ADBG(("doing ip6_route_add()\n")); - mrt = ip6_route_add(&rtmsg, &err); - - if (mrt) - mrt->rt6i_nexthop = ndisc_get_neigh(dev, &rtmsg.rtmsg_dst); - } else { - ADBG(("pa_dstaddr==0\n")); } + + return 0; } static void sit_add_v4_addrs(struct inet6_dev *idev) @@ -739,34 +827,55 @@ int scope; memset(&addr, 0, sizeof(struct in6_addr)); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); - if (idev->dev->pa_dstaddr) { + if (idev->dev->flags&IFF_POINTOPOINT) { addr.s6_addr32[0] = __constant_htonl(0xfe800000); scope = IFA_LINK; } else { scope = IPV6_ADDR_COMPATv4; } + if (addr.s6_addr32[3]) { + ifp = ipv6_add_addr(idev, &addr, scope); + if (ifp) { + ifp->flags |= ADDR_PERMANENT; + ifp->prefix_len = 128; + ip6_rt_addr_add(&ifp->addr, idev->dev); + } + return; + } + for (dev = dev_base; dev != NULL; dev = dev->next) { - if (dev->family == AF_INET && (dev->flags & IFF_UP)) { + if (dev->ip_ptr && (dev->flags & IFF_UP)) { + struct in_device * in_dev = dev->ip_ptr; + struct in_ifaddr * ifa; + int flag = scope; - - addr.s6_addr32[3] = dev->pa_addr; - if (dev->flags & IFF_LOOPBACK) { - if (idev->dev->pa_dstaddr) - continue; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + addr.s6_addr32[3] = ifa->ifa_local; - flag |= IFA_HOST; - } - - ifp = ipv6_add_addr(idev, &addr, flag); + if (ifa->ifa_scope == RT_SCOPE_LINK) + continue; + if (ifa->ifa_scope >= RT_SCOPE_HOST) { + if (idev->dev->flags&IFF_POINTOPOINT) + continue; + flag |= IFA_HOST; + } - if (ifp == NULL) - continue; + ifp = ipv6_add_addr(idev, &addr, flag); + + if (ifp == NULL) + continue; - ifp->flags |= ADDR_PERMANENT; - ip6_rt_addr_add(&ifp->addr, dev); + if (idev->dev->flags&IFF_POINTOPOINT) + ifp->prefix_len = 10; + else + ifp->prefix_len = 96; + ifp->flags |= ADDR_PERMANENT; + ip6_rt_addr_add(&ifp->addr, dev); + } } } } @@ -804,56 +913,98 @@ printk(KERN_DEBUG "init_loopback: error in route_add\n"); } -static void addrconf_eth_config(struct device *dev) +static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) +{ + struct inet6_ifaddr * ifp; + + ifp = ipv6_add_addr(idev, addr, IFA_LINK); + if (ifp == NULL) + return; + + ifp->flags = ADDR_PERMANENT; + ifp->prefix_len = 10; + + addrconf_dad_start(ifp); +} + +static void addrconf_dev_config(struct device *dev) { struct in6_addr addr; struct in6_addr maddr; - struct inet6_ifaddr * ifp; struct inet6_dev * idev; + if (dev->type != ARPHRD_ETHER) { + /* Alas, we support only ethernet autoconfiguration. */ + return; + } + + idev = addrconf_add_dev(dev); + if (idev == NULL) + return; + +#ifdef CONFIG_IPV6_EUI64 memset(&addr, 0, sizeof(struct in6_addr)); - /* Generate link local address. */ addr.s6_addr[0] = 0xFE; addr.s6_addr[1] = 0x80; - memcpy(addr.s6_addr + (sizeof(struct in6_addr) - dev->addr_len), - dev->dev_addr, dev->addr_len); + if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) + addrconf_add_linklocal(idev, &addr); +#endif - idev = ipv6_add_dev(dev); - if (idev == NULL) - return; - - ifp = ipv6_add_addr(idev, &addr, IFA_LINK); - if (ifp == NULL) - return; +#ifndef CONFIG_IPV6_NO_PB + memset(&addr, 0, sizeof(struct in6_addr)); - ifp->flags = ADDR_PERMANENT; - ifp->prefix_len = 10; + addr.s6_addr[0] = 0xFE; + addr.s6_addr[1] = 0x80; - /* Join to all nodes multicast group. */ - ipv6_addr_all_nodes(&maddr); - ipv6_dev_mc_inc(dev, &maddr); + memcpy(addr.s6_addr + (sizeof(struct in6_addr) - dev->addr_len), + dev->dev_addr, dev->addr_len); + addrconf_add_linklocal(idev, &addr); +#endif if (ipv6_config.forwarding) { idev->router = 1; + + /* It is wrong. + It is routing daemon or radvd that must make it, + rather than kernel. + */ ipv6_addr_all_routers(&maddr); ipv6_dev_mc_inc(dev, &maddr); } +} - /* Join to solicited addr multicast group. */ - addrconf_addr_solict_mult(&addr, &maddr); - ipv6_dev_mc_inc(dev, &maddr); +static void addrconf_sit_config(struct device *dev) +{ + struct inet6_dev *idev; - /* Start duplicate address detection. */ - addrconf_dad_start(ifp); + /* + * Configure the tunnel with one of our IPv4 + * addresses... we should configure all of + * our v4 addrs in the tunnel + */ + + idev = ipv6_add_dev(dev); + if (idev == NULL) { + printk(KERN_DEBUG "init sit: add_dev failed\n"); + return; + } + + sit_add_v4_addrs(idev); + + if (dev->flags&IFF_POINTOPOINT) { + addrconf_add_mroute(dev); + addrconf_add_lroute(dev); + } else + sit_route_add(dev); } + int addrconf_notify(struct notifier_block *this, unsigned long event, void * data) { struct device *dev; - struct inet6_dev * idev; dev = (struct device *) data; @@ -861,34 +1012,15 @@ case NETDEV_UP: switch(dev->type) { case ARPHRD_SIT: - - printk(KERN_DEBUG "sit device up: %s\n", dev->name); - - /* - * Configure the tunnel with one of our IPv4 - * addresses... we should configure all of - * our v4 addrs in the tunnel - */ - - idev = ipv6_add_dev(dev); - - sit_add_v4_addrs(idev); - - /* - * we do an hack for now to configure the tunnel - * route. - */ - - sit_route_add(dev); + addrconf_sit_config(dev); break; case ARPHRD_LOOPBACK: init_loopback(dev); break; - case ARPHRD_ETHER: - printk(KERN_DEBUG "Configuring eth interface\n"); - addrconf_eth_config(dev); + default: + addrconf_dev_config(dev); break; }; @@ -934,7 +1066,6 @@ } if (idev == NULL) { - printk(KERN_DEBUG "addrconf_ifdown: device not found\n"); end_bh_atomic(); return -ENODEV; } @@ -958,8 +1089,8 @@ ifa = *bifa; continue; } - ifa = ifa->lst_next; bifa = &ifa->lst_next; + ifa = *bifa; } } @@ -968,6 +1099,7 @@ return 0; } + static void addrconf_rs_timer(unsigned long data) { struct inet6_ifaddr *ifp; @@ -1003,10 +1135,8 @@ struct in6_rtmsg rtmsg; int err; -#if ACONF_DEBUG >= 2 printk(KERN_DEBUG "%s: no IPv6 routers present\n", ifp->idev->dev->name); -#endif memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); rtmsg.rtmsg_type = RTMSG_NEWROUTE; @@ -1031,27 +1161,17 @@ dev = ifp->idev->dev; - if (dev->flags & IFF_MULTICAST) { - struct in6_rtmsg rtmsg; - struct rt6_info *mrt; - int err; - - memset(&rtmsg, 0, sizeof(rtmsg)); - ipv6_addr_set(&rtmsg.rtmsg_dst, - __constant_htonl(0xFF000000), 0, 0, 0); - - rtmsg.rtmsg_dst_len = 8; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - - rtmsg.rtmsg_flags = RTF_UP; + addrconf_join_solict(dev, &ifp->addr); - mrt = ip6_route_add(&rtmsg, &err); + if (ifp->prefix_len != 128) + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0); - if (err) - printk(KERN_DEBUG "dad_start: mcast route add failed\n"); - else - mrt->rt6i_nexthop = ndisc_get_neigh(dev, &rtmsg.rtmsg_dst); + if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { + start_bh_atomic(); + ifp->flags &= ~DAD_INCOMPLETE; + addrconf_dad_completed(ifp); + end_bh_atomic(); + return; } if (rand_seed) { @@ -1059,15 +1179,12 @@ nd_rand_seed = ifp->addr.s6_addr32[3]; } - init_timer(&ifp->timer); - ifp->probes = ipv6_config.dad_transmits; ifp->flags |= DAD_INCOMPLETE; rand_num = ipv6_random() % ipv6_config.rtr_solicit_delay; ifp->timer.function = addrconf_dad_timer; - ifp->timer.data = (unsigned long) ifp; ifp->timer.expires = jiffies + rand_num; add_timer(&ifp->timer); @@ -1105,62 +1222,41 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { - struct device *dev; - int err; - - dev = ifp->idev->dev; + struct device * dev = ifp->idev->dev; - if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) { - struct in6_rtmsg rtmsg; - struct in6_addr all_routers; + /* + * Configure the address for reception. Now it is valid. + */ - /* - * 1) configure a link route for this interface - * 2) send a (delayed) router solicitation - */ + ip6_rt_addr_add(&ifp->addr, dev); - memset(&rtmsg, 0, sizeof(rtmsg)); - - memcpy(&rtmsg.rtmsg_dst, &ifp->addr, sizeof(struct in6_addr)); + /* If added prefix is link local and forwarding is off, + start sending router solicitations. + */ - rtmsg.rtmsg_dst_len = ifp->prefix_len; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; + if (ipv6_config.forwarding == 0 && + (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) == 0 && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + struct in6_addr all_routers; - rtmsg.rtmsg_flags = RTF_UP; + ipv6_addr_set(&all_routers, + __constant_htonl(0xff020000U), 0, 0, + __constant_htonl(0x2U)); - ip6_route_add(&rtmsg, &err); - - if (err) - printk(KERN_DEBUG "dad_complete: error in route_add\n"); + /* + * If a host as already performed a random delay + * [...] as part of DAD [...] there is no need + * to delay again before sending the first RS + */ + ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); - if (ipv6_config.forwarding == 0) { - ipv6_addr_set(&all_routers, - __constant_htonl(0xff020000U), 0, 0, - __constant_htonl(0x2U)); - - /* - * If a host as already performed a random delay - * [...] as part of DAD [...] there is no need - * to delay again before sending the first RS - */ - ndisc_send_rs(ifp->idev->dev, &ifp->addr, - &all_routers); - - ifp->probes = 1; - ifp->timer.function = addrconf_rs_timer; - ifp->timer.expires = (jiffies + - ipv6_config.rtr_solicit_interval); - ifp->idev->if_flags |= IF_RS_SENT; - add_timer(&ifp->timer); - } + ifp->probes = 1; + ifp->timer.function = addrconf_rs_timer; + ifp->timer.expires = (jiffies + + ipv6_config.rtr_solicit_interval); + ifp->idev->if_flags |= IF_RS_SENT; + add_timer(&ifp->timer); } - - /* - * configure the address for reception - */ - - ip6_rt_addr_add(&ifp->addr, dev); } #ifdef CONFIG_PROC_FS @@ -1251,7 +1347,9 @@ __initfunc(void addrconf_init(void)) { +#ifdef MODULE struct device *dev; +#endif /* * init address and device hash lists @@ -1263,24 +1361,25 @@ memset(inet6_dev_lst, 0, IN6_ADDR_HSIZE * sizeof(struct inet6_dev *)); - /* - * Init loopback device - */ - - dev = dev_get("lo"); - - if (dev && (dev->flags & IFF_UP)) - init_loopback(dev); +#ifdef MODULE + /* This takes sense only during module load. */ - /* - * and maybe: - * search availiable AF_INET devs and try to configure them - */ - - dev = dev_get("eth0"); + for (dev = dev_base; dev; dev = dev->next) { + if (!(dev->flags&IFF_UP)) + continue; - if (dev && (dev->flags & IFF_UP)) - addrconf_eth_config(dev); + switch (dev->type) { + case ARPHRD_LOOPBACK: + init_loopback(dev); + break; + case ARPHRD_ETHER: + addrconf_dev_config(dev); + break; + default: + /* Ignore all other */ + } + } +#endif #ifdef CONFIG_PROC_FS proc_net_register(&iface_proc_entry); diff -u --recursive --new-file v2.1.67/linux/net/ipv6/af_inet6.c linux/net/ipv6/af_inet6.c --- v2.1.67/linux/net/ipv6/af_inet6.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv6/af_inet6.c Sun Nov 30 14:00:39 1997 @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * $Id: af_inet6.c,v 1.21 1997/08/20 11:25:00 alan Exp $ + * $Id: af_inet6.c,v 1.23 1997/10/29 20:27:52 kuznet Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include @@ -200,7 +200,7 @@ /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (__ip_chk_addr(v4addr) != IS_MYADDR) + if (inet_addr_type(v4addr) != RTN_LOCAL) return(-EADDRNOTAVAIL); } else { if (addr_type != IPV6_ADDR_ANY) { @@ -354,8 +354,8 @@ case SIOCGIFMAP: case SIOCSIFSLAVE: case SIOCGIFSLAVE: - case SIOGIFINDEX: - case SIOGIFNAME: + case SIOCGIFINDEX: + case SIOCGIFNAME: case SIOCGIFCOUNT: return(dev_ioctl(cmd,(void *) arg)); diff -u --recursive --new-file v2.1.67/linux/net/ipv6/icmp.c linux/net/ipv6/icmp.c --- v2.1.67/linux/net/ipv6/icmp.c Tue Sep 23 16:48:50 1997 +++ linux/net/ipv6/icmp.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: icmp.c,v 1.10 1997/06/05 11:07:20 schenk Exp $ + * $Id: icmp.c,v 1.11 1997/09/20 20:48:26 davem Exp $ * * Based on net/ipv4/icmp.c * diff -u --recursive --new-file v2.1.67/linux/net/ipv6/ip6_fib.c linux/net/ipv6/ip6_fib.c --- v2.1.67/linux/net/ipv6/ip6_fib.c Tue Sep 23 16:48:50 1997 +++ linux/net/ipv6/ip6_fib.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: ip6_fib.c,v 1.7 1997/04/12 04:32:46 davem Exp $ + * $Id: ip6_fib.c,v 1.9 1997/09/20 20:48:27 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff -u --recursive --new-file v2.1.67/linux/net/ipv6/ip6_fw.c linux/net/ipv6/ip6_fw.c --- v2.1.67/linux/net/ipv6/ip6_fw.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/ip6_fw.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: ip6_fw.c,v 1.5 1997/04/29 09:38:44 mj Exp $ + * $Id: ip6_fw.c,v 1.7 1997/10/06 23:09:54 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -368,12 +369,16 @@ __initfunc(void ip6_fw_init(void)) { +#ifdef CONFIG_NETLINK netlink_attach(NETLINK_IP6_FW, ip6_fw_msgrcv); +#endif } #ifdef MODULE void module_cleanup(void) { +#ifdef CONFIG_NETLINK netlink_detach(NETLINK_IP6_FW); +#endif } #endif diff -u --recursive --new-file v2.1.67/linux/net/ipv6/ip6_input.c linux/net/ipv6/ip6_input.c --- v2.1.67/linux/net/ipv6/ip6_input.c Tue Sep 23 16:48:50 1997 +++ linux/net/ipv6/ip6_input.c Sun Nov 30 14:00:39 1997 @@ -6,7 +6,7 @@ * Pedro Roque * Ian P. Morris * - * $Id: ip6_input.c,v 1.6 1997/05/11 16:06:52 davem Exp $ + * $Id: ip6_input.c,v 1.7 1997/09/20 20:48:27 davem Exp $ * * Based in linux/net/ipv4/ip_input.c * diff -u --recursive --new-file v2.1.67/linux/net/ipv6/ip6_output.c linux/net/ipv6/ip6_output.c --- v2.1.67/linux/net/ipv6/ip6_output.c Tue Sep 23 16:48:50 1997 +++ linux/net/ipv6/ip6_output.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: ip6_output.c,v 1.3 1997/03/18 18:24:37 davem Exp $ + * $Id: ip6_output.c,v 1.5 1997/09/21 18:33:14 kuznet Exp $ * * Based on linux/net/ipv4/ip_output.c * @@ -540,6 +540,11 @@ struct ipv6hdr *hdr = skb->nh.ipv6h; int size; + if (ipv6_config.forwarding == 0) { + kfree_skb(skb, FREE_READ); + return -EINVAL; + } + /* * check hop-by-hop options present */ diff -u --recursive --new-file v2.1.67/linux/net/ipv6/ipv6_sockglue.c linux/net/ipv6/ipv6_sockglue.c --- v2.1.67/linux/net/ipv6/ipv6_sockglue.c Thu May 15 16:48:06 1997 +++ linux/net/ipv6/ipv6_sockglue.c Sun Nov 30 14:00:39 1997 @@ -7,7 +7,7 @@ * * Based on linux/net/ipv4/ip_sockglue.c * - * $Id: ipv6_sockglue.c,v 1.13 1997/05/15 18:55:10 davem Exp $ + * $Id: ipv6_sockglue.c,v 1.15 1997/10/29 20:27:54 kuznet Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -43,7 +43,6 @@ #include #include #include -#include #include #include @@ -111,6 +110,7 @@ sk->prot = &tcp_prot; tp->af_specific = &ipv4_specific; sk->socket->ops = &inet_stream_ops; + sk->family = AF_INET; } else { sk->prot = &udp_prot; sk->socket->ops = &inet_dgram_ops; diff -u --recursive --new-file v2.1.67/linux/net/ipv6/mcast.c linux/net/ipv6/mcast.c --- v2.1.67/linux/net/ipv6/mcast.c Thu May 15 16:48:06 1997 +++ linux/net/ipv6/mcast.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: mcast.c,v 1.10 1997/05/07 09:40:22 davem Exp $ + * $Id: mcast.c,v 1.11 1997/10/29 20:27:50 kuznet Exp $ * * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c * @@ -417,7 +417,10 @@ skb_reserve(skb, (dev->hard_header_len + 15) & ~15); if (dev->hard_header) { unsigned char ha[MAX_ADDR_LEN]; - ipv6_mc_map(addr, ha); + if (dev->type == ARPHRD_ETHER) + ipv6_mc_map(addr, ha); + else + memcpy(ha, dev->broadcast, dev->addr_len); dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, plen); skb->arp = 1; } diff -u --recursive --new-file v2.1.67/linux/net/ipv6/ndisc.c linux/net/ipv6/ndisc.c --- v2.1.67/linux/net/ipv6/ndisc.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/ndisc.c Sun Nov 30 14:00:39 1997 @@ -6,8 +6,6 @@ * Pedro Roque * Mike Shaver * - * $Id: ndisc.c,v 1.15 1997/04/29 09:38:48 mj Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -24,7 +22,7 @@ */ /* Set to 3 to get tracing... */ -#define ND_DEBUG 2 +#define ND_DEBUG 1 #if ND_DEBUG >= 3 #define NDBG(x) printk x @@ -396,7 +394,10 @@ struct in6_addr *daddr; daddr = &skb->nh.ipv6h->daddr; - ipv6_mc_map(daddr, h_dest); + if (skb->dev->type == ARPHRD_ETHER) + ipv6_mc_map(daddr, h_dest); + else + memcpy(h_dest, skb->dev->broadcast, skb->dev->addr_len); return 0; } @@ -434,6 +435,54 @@ return 1; } +static int +ndisc_build_ll_hdr(struct sk_buff *skb, struct device *dev, + struct in6_addr *daddr, struct neighbour *neigh, int len) +{ + unsigned char ha[MAX_ADDR_LEN]; + unsigned char *h_dest = NULL; + + skb->arp = 1; + if (dev->hard_header_len) { + skb_reserve(skb, (dev->hard_header_len + 15) & ~15); + + if (dev->hard_header) { + if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) { + nd_stats.snt_probes_mcast++; + if (dev->type == ARPHRD_ETHER) + ipv6_mc_map(daddr, ha); + else + memcpy(ha, dev->broadcast, dev->addr_len); + h_dest = ha; + } else if (neigh) { + h_dest = neigh->ha; + nd_stats.snt_probes_ucast++; + } else { + struct nd_neigh *ndn; + + neigh_table_lock(&nd_tbl); + + neigh = neigh_lookup(&nd_tbl, (void *) daddr, + sizeof(struct in6_addr), dev); + if (neigh) { + ndn = (struct nd_neigh*)neigh; + if (ndn->ndn_flags&NTF_COMPLETE) { + memcpy(ha, ndn->ndn_ha, dev->addr_len); + h_dest = ha; + } + } + neigh_table_unlock(&nd_tbl); + } + + if (dev->hard_header(skb, dev, ETH_P_IPV6, h_dest, NULL, len) < 0) + skb->arp = 0; + } + } + + return skb->arp; +} + + /* * Send a Neighbour Advertisement */ @@ -486,17 +535,10 @@ printk(KERN_DEBUG "send_na: alloc skb failed\n"); return; } - /* - * build the MAC header - */ - - if (dev->hard_header_len) { - skb_reserve(skb, (dev->hard_header_len + 15) & ~15); - if (dev->hard_header) { - dev->hard_header(skb, dev, ETH_P_IPV6, ndn->ndn_ha, - NULL, len); - skb->arp = 1; - } + + if (ndisc_build_ll_hdr(skb, dev, daddr, (struct neighbour*)ndn, len) == 0) { + kfree_skb(skb, FREE_WRITE); + return; } ip6_nd_hdr(sk, skb, dev, solicited_addr, daddr, IPPROTO_ICMPV6, len); @@ -540,12 +582,10 @@ struct in6_addr *solicit, struct in6_addr *daddr, struct in6_addr *saddr) { - unsigned char ha[MAX_ADDR_LEN]; struct sock *sk = ndisc_socket->sk; struct sk_buff *skb; struct nd_msg *msg; int len, opt_len; - void *h_dest; int err; NDBG(("ndisc_send_ns(%s,%p): ", (dev ? dev->name : "[NULL]"), neigh)); @@ -581,7 +621,11 @@ return; } +#if 0 + /* Why Pedro did it? Is it remnant of early + attempts to avoid looping back? I have no idea. --ANK */ skb->pkt_type = PACKET_NDISC; +#endif if (saddr == NULL) { struct inet6_ifaddr *ifa; @@ -593,29 +637,9 @@ saddr = &ifa->addr; } - if ((ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST)) { - nd_stats.snt_probes_mcast++; - ipv6_mc_map(daddr, ha); - h_dest = ha; - } else { - if (neigh == NULL) { -#if ND_DEBUG >= 1 - printk(KERN_DEBUG "send_ns: ucast destination " - "with null neighbour\n"); -#endif - return; - } - h_dest = neigh->ha; - nd_stats.snt_probes_ucast++; - } - - if (dev->hard_header_len) { - skb_reserve(skb, (dev->hard_header_len + 15) & ~15); - if (dev->hard_header) { - dev->hard_header(skb, dev, ETH_P_IPV6, h_dest, NULL, - len); - skb->arp = 1; - } + if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) { + kfree_skb(skb, FREE_WRITE); + return; } ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); @@ -684,15 +708,9 @@ return; } - if (dev->hard_header_len) { - skb_reserve(skb, (dev->hard_header_len + 15) & ~15); - if (dev->hard_header) { - unsigned char ha[MAX_ADDR_LEN]; - - ipv6_mc_map(daddr, ha); - dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, len); - skb->arp = 1; - } + if (ndisc_build_ll_hdr(skb, dev, daddr, NULL, len) == 0) { + kfree_skb(skb, FREE_WRITE); + return; } ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len); @@ -783,15 +801,19 @@ ntimer = min(ntimer, time); } ndn = (struct nd_neigh *) ndn->neigh.next; - } while (ndn != head); } if (ntimer != (~0UL)) { - ndisc_timer.expires = now + ntimer; + unsigned long tval = jiffies + ntimer; + if (del_timer(&ndisc_timer)) { + if (ndisc_timer.expires - tval < 0) + tval = ndisc_timer.expires; + } + ndisc_timer.expires = tval; add_timer(&ndisc_timer); } - + neigh_table_unlock(&nd_tbl); } @@ -1238,14 +1260,12 @@ NDBG(("ndisc_redirect_rcv(%p)\n", skb)); if (skb->nh.ipv6h->hop_limit != 255) { - printk(KERN_WARNING - "NDISC: fake ICMP redirect received\n"); + printk(KERN_WARNING "NDISC: fake ICMP redirect received\n"); return; } if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) { - printk(KERN_WARNING - "ICMP redirect: source address is not linklocal\n"); + printk(KERN_WARNING "ICMP redirect: source address is not linklocal\n"); return; } @@ -1269,19 +1289,15 @@ if (ipv6_addr_cmp(dest, target) == 0) { on_link = 1; } else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) { - printk(KERN_WARNING - "ICMP redirect: target address is not linklocal\n"); + printk(KERN_WARNING "ICMP redirect: target address is not linklocal\n"); return; } /* passed validation tests */ - rt = rt6_redirect(dest, &skb->nh.ipv6h->saddr, target, skb->dev, - on_link); + rt = rt6_redirect(dest, &skb->nh.ipv6h->saddr, target, skb->dev, on_link); - if (rt == NULL) { - printk(KERN_WARNING "ICMP redirect: no route to host\n"); + if (rt == NULL) return; - } ndn = (struct nd_neigh *) rt->rt6i_nexthop; @@ -1365,13 +1381,9 @@ hlen = 0; - if (dev->hard_header_len) { - skb_reserve(buff, (dev->hard_header_len + 15) & ~15); - if (dev->hard_header) { - dev->hard_header(buff, dev, ETH_P_IPV6, ndn->ndn_ha, - NULL, len); - buff->arp = 1; - } + if (ndisc_build_ll_hdr(buff, dev, &skb->nh.ipv6h->saddr, NULL, len) == 0) { + kfree_skb(buff, FREE_WRITE); + return; } ip6_nd_hdr(sk, buff, dev, &ifp->addr, &skb->nh.ipv6h->saddr, @@ -1471,25 +1483,32 @@ switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: NDBG(("NS ")); - if ((ifp = ipv6_chk_addr(&msg->target))) { - int addr_type; + if ((ifp = ipv6_chk_addr(&msg->target)) != NULL) { + int addr_type = ipv6_addr_type(saddr); if (ifp->flags & DAD_INCOMPLETE) { - /* - * DAD failed + /* Address is tentative. If the source + is unspecified address, it is someone + does DAD, otherwise we ignore solicitations + until DAD timer expires. */ + if (addr_type == IPV6_ADDR_ANY) { + printk(KERN_INFO "%s: duplicate address detected!\n", + ifp->idev->dev->name); + del_timer(&ifp->timer); + } + return 0; + } - /* XXX Check if this came in over same interface - * XXX we just sent an NS from! That is valid! -DaveM - */ + if (addr_type == IPV6_ADDR_ANY) { + struct in6_addr maddr; - printk(KERN_DEBUG "%s: duplicate address\n", - ifp->idev->dev->name); - del_timer(&ifp->timer); + ipv6_addr_all_nodes(&maddr); + ndisc_send_na(dev, NULL, &maddr, &ifp->addr, + ifp->idev->router, 0, 1, 1); return 0; } - addr_type = ipv6_addr_type(saddr); if (addr_type & IPV6_ADDR_UNICAST) { int inc; @@ -1512,7 +1531,6 @@ ifp->idev->router, 1, inc, inc); } else { #if ND_DEBUG >= 1 - /* FIXME */ printk(KERN_DEBUG "ns: non unicast saddr\n"); #endif } @@ -1521,6 +1539,28 @@ case NDISC_NEIGHBOUR_ADVERTISEMENT: NDBG(("NA ")); + if ((ipv6_addr_type(saddr)&IPV6_ADDR_MULTICAST) && + msg->icmph.icmp6_solicited) { + printk(KERN_DEBUG "NDISC: solicited NA is multicasted\n"); + return 0; + } + if ((ifp = ipv6_chk_addr(&msg->target))) { + if (ifp->flags & DAD_INCOMPLETE) { + /* Address is duplicate. */ + printk(KERN_INFO "%s: duplicate address detected!\n", + ifp->idev->dev->name); + del_timer(&ifp->timer); + return 0; + } + /* What should we make now? The advertisement + is invalid, but ndisc specs say nothing + about it. It could be misconfiguration, or + an smart proxy agent tries to help us :-) + */ + printk(KERN_DEBUG "%s: someone avertise our address!\n", + ifp->idev->dev->name); + return 0; + } neigh_table_lock(&nd_tbl); ndn = (struct nd_neigh *) neigh_lookup(&nd_tbl, (void *) &msg->target, diff -u --recursive --new-file v2.1.67/linux/net/ipv6/raw.c linux/net/ipv6/raw.c --- v2.1.67/linux/net/ipv6/raw.c Mon Apr 14 16:28:28 1997 +++ linux/net/ipv6/raw.c Sun Nov 30 14:00:39 1997 @@ -7,7 +7,7 @@ * * Adapted from linux/net/ipv4/raw.c * - * $Id: raw.c,v 1.12 1997/04/01 02:23:34 davem Exp $ + * $Id: raw.c,v 1.13 1997/09/14 08:32:14 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -156,7 +156,7 @@ /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (__ip_chk_addr(v4addr) != IS_MYADDR) + if (inet_addr_type(v4addr) != RTN_LOCAL) return(-EADDRNOTAVAIL); } else { if (addr_type != IPV6_ADDR_ANY) { @@ -307,8 +307,9 @@ { struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data; - hdr->cksum = csum_partial_copy_fromiovecend(buff, hdr->iov, offset, - len, hdr->cksum); + if (csum_partial_copy_fromiovecend(buff, hdr->iov, offset, + len, &hdr->cksum)) + return -EFAULT; if (offset == 0) { struct sock *sk; @@ -461,28 +462,49 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, char *optval, int optlen) { - struct raw6_opt *opt = &sk->tp_pinfo.tp_raw; - int err = 0; + switch (optname) { + case ICMPV6_FILTER: + if (optlen > sizeof(struct icmp6_filter)) + optlen = sizeof(struct icmp6_filter); + if (copy_from_user(&sk->tp_pinfo.tp_raw.filter, optval, optlen)) + return -EFAULT; + return 0; + default: + return -ENOPROTOOPT; + }; + + return 0; +} + +static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + int len; switch (optname) { - case ICMPV6_FILTER: - err = copy_from_user(&opt->filter, optval, - sizeof(struct icmp6_filter)); - if (err) - err = -EFAULT; - break; - default: - err = -ENOPROTOOPT; + case ICMPV6_FILTER: + if (get_user(len, optlen)) + return -EFAULT; + if (len > sizeof(struct icmp6_filter)) + len = sizeof(struct icmp6_filter); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &sk->tp_pinfo.tp_raw.filter, len)) + return -EFAULT; + return 0; + default: + return -ENOPROTOOPT; }; - return err; + return 0; } + static int rawv6_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen) { struct raw6_opt *opt = &sk->tp_pinfo.tp_raw; - int val, err; + int val; switch(level) { case SOL_RAW: @@ -501,12 +523,8 @@ optlen); }; - if (optval == NULL) - return(-EINVAL); - - err = get_user(val, (int *)optval); - if(err) - return err; + if (get_user(val, (int *)optval)) + return -EFAULT; switch (optname) { case IPV6_CHECKSUM: @@ -525,6 +543,53 @@ } } +static int rawv6_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + struct raw6_opt *opt = &sk->tp_pinfo.tp_raw; + int val, len; + + switch(level) { + case SOL_RAW: + break; + + case SOL_ICMPV6: + if (sk->num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + return rawv6_geticmpfilter(sk, level, optname, optval, + optlen); + case SOL_IPV6: + if (optname == IPV6_CHECKSUM) + break; + default: + return ipv6_getsockopt(sk, level, optname, optval, + optlen); + }; + + if (get_user(len,optlen)) + return -EFAULT; + + switch (optname) { + case IPV6_CHECKSUM: + if (opt->checksum == 0) + val = -1; + else + val = opt->offset; + + default: + return -ENOPROTOOPT; + } + + len=min(sizeof(int),len); + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval,&val,len)) + return -EFAULT; + return 0; +} + + static void rawv6_close(struct sock *sk, unsigned long timeout) { struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6; @@ -558,7 +623,7 @@ NULL, /* destroy */ NULL, /* shutdown */ rawv6_setsockopt, /* setsockopt */ - ipv6_getsockopt, /* getsockopt - FIXME */ + rawv6_getsockopt, /* getsockopt */ rawv6_sendmsg, /* sendmsg */ rawv6_recvmsg, /* recvmsg */ rawv6_bind, /* bind */ diff -u --recursive --new-file v2.1.67/linux/net/ipv6/route.c linux/net/ipv6/route.c --- v2.1.67/linux/net/ipv6/route.c Tue Sep 23 16:48:50 1997 +++ linux/net/ipv6/route.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: route.c,v 1.13 1997/07/19 11:11:35 davem Exp $ + * $Id: route.c,v 1.18 1997/10/17 00:15:05 freitag Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -23,6 +23,8 @@ #include #include #include +#include +#include #ifdef CONFIG_PROC_FS #include @@ -34,7 +36,7 @@ #include #include #include -#include +#include #include @@ -64,7 +66,7 @@ struct rt6_info ip6_null_entry = { {{NULL, ATOMIC_INIT(0), ATOMIC_INIT(0), NULL, - 0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL, ip6_pkt_discard, ip6_pkt_discard, &ip6_dst_ops}}, NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0UL, 0, {NULL}, {{{{0}}}, 128}, {{{{0}}}, 128} @@ -297,7 +299,7 @@ rt6_lock(); fn = fib6_lookup(&ip6_routing_table, daddr, saddr); - rt = rt6_device_match(fn->leaf, dev, 0); + rt = rt6_device_match(fn->leaf, dev, flags&RTF_LINKRT); rt6_unlock(); return rt; } @@ -314,6 +316,9 @@ if (rt) { ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); + if (!(rt->rt6i_flags&RTF_GATEWAY)) + ipv6_addr_copy(&rt->rt6i_gateway, daddr); + rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; @@ -322,7 +327,7 @@ rt->rt6i_src.plen = 128; } - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, daddr); + rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); rtreq_add(rt, RT_OPER_ADD); } else { @@ -556,6 +561,23 @@ return NULL; } +/* Clean host part of a prefix. Not necessary in radix tree, + but results in cleaner routing tables. + + Remove it only when all the things will work! + */ + +static void ipv6_wash_prefix(struct in6_addr *pfx, int plen) +{ + int b = plen&0x7; + int o = (plen + 7)>>3; + + if (o < 16) + memset(pfx->s6_addr + o, 0, 16 - o); + if (b != 0) + pfx->s6_addr[plen>>3] &= (0xFF<<(8-b)); +} + /* * */ @@ -566,7 +588,11 @@ struct device *dev = NULL; int addr_type; - RDBG(("ip6_route_add(%p)[%p] ", rtmsg, __builtin_return_address(0))); + if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) { + *err = -EINVAL; + return NULL; + } + *err = 0; rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops); @@ -577,29 +603,6 @@ goto out; } - /* - * default... this should be chosen according to route flags - */ - -#if RT6_DEBUG >= 3 - { - struct in6_addr *addr = &rtmsg->rtmsg_dst; - int i; - - RDBG(("daddr[")); - for(i = 0; i < 8; i++) { - RDBG(("%04x%c", addr->s6_addr16[i], - i == 7 ? ']' : ':')); - } - addr = &rtmsg->rtmsg_src; - RDBG(("saddr[")); - for(i = 0; i < 8; i++) { - RDBG(("%04x%c", addr->s6_addr16[i], - i == 7 ? ']' : ':')); - } - } -#endif - addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); if (addr_type & IPV6_ADDR_MULTICAST) { @@ -609,71 +612,58 @@ RDBG(("!MCAST ")); rt->u.dst.input = ip6_forward; } - + rt->u.dst.output = dev_queue_xmit; - - if (rtmsg->rtmsg_ifindex) + + if (rtmsg->rtmsg_ifindex) { dev = dev_get_by_index(rtmsg->rtmsg_ifindex); - if(dev) - RDBG(("d[%s] ", dev->name)); + if (dev == NULL) { + *err = -ENODEV; + goto out; + } + } ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst); rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; + ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen); - /* XXX Figure out what really is supposed to be happening here -DaveM */ ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src); rt->rt6i_src.plen = rtmsg->rtmsg_src_len; - - if ((rt->rt6i_src.plen = rtmsg->rtmsg_src_len)) { - RDBG(("splen, ")); - ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src); - } else { - RDBG(("!splen, ")); - } - /* XXX */ + ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen); - if (rtmsg->rtmsg_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - struct rt6_info *grt; + if (rtmsg->rtmsg_flags & RTF_GATEWAY) { struct in6_addr *gw_addr; - u32 flags = 0; - - RDBG(("RTF_GATEWAY, ")); - /* - * 1. gateway route lookup - * 2. ndisc_get_neigh - */ + int gwa_type; gw_addr = &rtmsg->rtmsg_gateway; + ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); + gwa_type = ipv6_addr_type(gw_addr); -#if RT6_DEBUG >= 3 - { - struct in6_addr *addr = gw_addr; - int i; - - RDBG(("gwaddr[")); - for(i = 0; i < 8; i++) { - RDBG(("%04x%c", addr->s6_addr16[i], - i == 7 ? ']' : ':')); + if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { + struct rt6_info *grt; + + /* IPv6 strictly inhibits using not link-local + addresses as nexthop address. + It is very good, but in some (rare!) curcumstances + (SIT, NBMA NOARP links) it is handy to allow + some exceptions. + */ + if (!(gwa_type&IPV6_ADDR_UNICAST)) { + *err = -EINVAL; + goto out; } - } -#endif - if ((rtmsg->rtmsg_flags & RTF_GATEWAY) && - (rtmsg->rtmsg_flags & RTF_ADDRCONF) == 0) { - RDBG(("RTF_GATEWAY && !RTF_ADDRCONF, ")); - if (dev) - flags |= RTF_LINKRT; - - grt = rt6_lookup(gw_addr, NULL, dev, flags); - - if (grt == NULL) - { - RDBG(("!grt, ")); + grt = rt6_lookup(gw_addr, NULL, dev, RTF_LINKRT); + + if (grt == NULL || (grt->rt6i_flags&RTF_GATEWAY)) { *err = -EHOSTUNREACH; goto out; } dev = grt->rt6i_dev; - RDBG(("grt(d=%s), ", dev ? dev->name : "NULL")); + } + if (dev == NULL) { + *err = -EINVAL; + goto out; } rt->rt6i_nexthop = ndisc_get_neigh(dev, gw_addr); @@ -739,20 +729,26 @@ /* * Find device */ - if(rtmsg->rtmsg_ifindex) + if(rtmsg->rtmsg_ifindex) { dev=dev_get_by_index(rtmsg->rtmsg_ifindex); + if (dev == NULL) + return -ENODEV; + } /* * Find route */ - rt=rt6_lookup(&rtmsg->rtmsg_dst, &rtmsg->rtmsg_src, dev, rtmsg->rtmsg_flags); - + rt=rt6_lookup(&rtmsg->rtmsg_dst, &rtmsg->rtmsg_src, dev, dev ? RTF_LINKRT : 0); + /* * Blow it away */ - if(rt) + if(rt && rt->rt6i_dst.plen == rtmsg->rtmsg_dst_len && + rt->rt6i_src.plen == rtmsg->rtmsg_src_len) { ip6_del_rt(rt); + return 0; + } - return 0; + return -ESRCH; } @@ -777,6 +773,7 @@ rt6_bh_mask = 0; } +#ifdef CONFIG_NETLINK /* * NETLINK interface * routing socket moral equivalent @@ -815,6 +812,7 @@ kfree_skb(skb, FREE_READ); return count; } +#endif /* CONFIG_NETLINK */ static void rt6_sndrtmsg(struct in6_rtmsg *rtmsg) { @@ -827,7 +825,9 @@ memcpy(skb_put(skb, sizeof(struct in6_rtmsg)), &rtmsg, sizeof(struct in6_rtmsg)); +#ifdef CONFIG_NETLINK if (netlink_post(NETLINK_ROUTE6, skb)) +#endif kfree_skb(skb, FREE_WRITE); } @@ -867,7 +867,9 @@ msg->rtmsg_flags = flags; +#ifdef CONFIG_NETLINK if (netlink_post(NETLINK_ROUTE6, skb)) +#endif kfree_skb(skb, FREE_WRITE); } @@ -878,54 +880,28 @@ struct in6_addr *target, struct device *dev, int on_link) { - struct rt6_info *rt, *tgtr, *nrt; + struct rt6_info *rt, *nrt; - RDBG(("rt6_redirect(%s)[%p]: ", - dev ? dev->name : "NULL", - __builtin_return_address(0))); + /* Locate old route to this destination. */ rt = rt6_lookup(dest, NULL, dev, 0); - if (rt == NULL || rt->u.dst.error) { - RDBG(("!rt\n")); - printk(KERN_DEBUG "rt6_redirect: no route to destination\n"); + if (rt == NULL || rt->u.dst.error) return NULL; - } - if (rt->rt6i_flags & RTF_GATEWAY) { - /* - * This can happen due to misconfiguration - * if we are dealing with an "on link" redirect. - */ - RDBG(("RTF_GATEWAY\n")); - printk(KERN_DEBUG "rt6_redirect: destination not directly " - "connected\n"); - return NULL; - } - RDBG(("tgt_lkup, ")); - tgtr = rt6_lookup(target, NULL, dev, 0); - - if (tgtr == NULL || tgtr->u.dst.error) { - /* - * duh?! no route to redirect target. - * How where we talking to it in the first place ? - */ - RDBG(("!tgtr||dsterr\n")); - printk(KERN_DEBUG "rt6_redirect: no route to target\n"); + /* Duplicate redirect: silently ignore. */ + if (ipv6_addr_cmp(target, &rt->rt6i_gateway) == 0) return NULL; - } - if ((tgtr->rt6i_flags & RTF_GATEWAY) && - ipv6_addr_cmp(dest, &tgtr->rt6i_gateway) == 0) { - RDBG(("tgt RTF_GATEWAY && dstmatch, dup\n")); - /* - * Check if we already have the right route. - */ -#if RT6_DEBUG >= 1 - printk(KERN_DEBUG "rt6_redirect: duplicate\n"); -#endif + /* Current route is on-link; redirect is always invalid. */ + if (!(rt->rt6i_flags&RTF_GATEWAY)) return NULL; - } +#if !defined(CONFIG_IPV6_EUI64) || defined(CONFIG_IPV6_NO_PB) + /* + * During transition gateways have more than + * one link local address. Certainly, it is violation + * of basic principles, but it is temparary. + */ /* * RFC 1970 specifies that redirects should only be * accepted if they come from the nexthop to the target. @@ -934,62 +910,57 @@ * routers. */ - if (ipv6_addr_cmp(saddr, &tgtr->rt6i_gateway)) { - RDBG(("saddr/tgt->gway match, ")); - if (tgtr->rt6i_flags & RTF_DEFAULT) { - tgtr = ip6_routing_table.leaf; - - for (; tgtr; tgtr = tgtr->u.next) { - if (!ipv6_addr_cmp(saddr, &tgtr->rt6i_gateway)) { - RDBG(("found srcok, ")); + if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) { + if (rt->rt6i_flags & RTF_DEFAULT) { + rt = ip6_routing_table.leaf; + + for (; rt; rt = rt->u.next) { + if (!ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) goto source_ok; - } } } - RDBG(("!dflt||!srcok, ")); printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " - "for redirect target\n"); + "for redirect target\n"); + return NULL; } source_ok: +#endif /* * We have finally decided to accept it. */ - RDBG(("srcok: ")); - if ((tgtr->rt6i_flags & RTF_HOST)) { + if (rt->rt6i_dst.plen == 128) { /* * Already a host route. * */ - RDBG(("hralready, ")); - if (tgtr->rt6i_nexthop) { - RDBG(("nrel(nxthop) ")); - neigh_release(tgtr->rt6i_nexthop); - } + if (rt->rt6i_nexthop) + neigh_release(rt->rt6i_nexthop); /* * purge hh_cache */ - tgtr->rt6i_flags |= RTF_MODIFIED | RTF_CACHE; - ipv6_addr_copy(&tgtr->rt6i_gateway, dest); - tgtr->rt6i_nexthop = ndisc_get_neigh(tgtr->rt6i_dev, dest); - RDBG(("hhpurge, getnewneigh, ret(%p)\n", tgtr)); - return tgtr; + rt->rt6i_flags |= RTF_MODIFIED | RTF_CACHE; + if (on_link) + rt->rt6i_flags &= ~RTF_GATEWAY; + ipv6_addr_copy(&rt->rt6i_gateway, target); + rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, target); + return rt; } - nrt = ip6_rt_copy(tgtr); - nrt->rt6i_flags = RTF_GATEWAY|RTF_HOST|RTF_UP|RTF_DYNAMIC|RTF_CACHE; + nrt = ip6_rt_copy(rt); + nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; + if (on_link) + nrt->rt6i_flags &= ~RTF_GATEWAY; - ipv6_addr_copy(&nrt->rt6i_dst.addr, target); + ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); nrt->rt6i_dst.plen = 128; - ipv6_addr_copy(&nrt->rt6i_gateway, dest); - nrt->rt6i_nexthop = ndisc_get_neigh(nrt->rt6i_dev, dest); + ipv6_addr_copy(&nrt->rt6i_gateway, target); + nrt->rt6i_nexthop = ndisc_get_neigh(nrt->rt6i_dev, target); nrt->rt6i_dev = dev; nrt->u.dst.pmtu = dev->mtu; - RDBG(("rt6_ins(%p)\n", nrt)); - rt6_lock(); rt6_ins(nrt); rt6_unlock(); @@ -1023,7 +994,15 @@ return; } - if (rt->rt6i_flags & RTF_HOST) { + /* It is wrong, but I plugged the hole here. + On-link routes are cloned differently, + look at rt6_redirect --ANK + */ + if (!(rt->rt6i_flags&RTF_GATEWAY)) { + return; + } + + if (rt->rt6i_dst.plen == 128) { /* * host route */ @@ -1037,7 +1016,7 @@ ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt->rt6i_flags |= (RTF_HOST | RTF_DYNAMIC | RTF_CACHE); + rt->rt6i_flags |= (RTF_DYNAMIC | RTF_CACHE); rt6_lock(); rt6_ins(rt); @@ -1065,7 +1044,7 @@ rt->rt6i_keylen = ort->rt6i_keylen; rt->rt6i_flags = ort->rt6i_flags; rt->rt6i_metric = ort->rt6i_metric; - + memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); } @@ -1257,7 +1236,7 @@ rt->rt6i_dev = dev_get("lo"); rt->u.dst.pmtu = rt->rt6i_dev->mtu; - rt->rt6i_flags = RTF_HOST | RTF_LOCAL | RTF_UP | RTF_NONEXTHOP; + rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; @@ -1600,7 +1579,9 @@ proc_net_register(&proc_rt6_stats); proc_net_register(&proc_rt6_tree); #endif +#ifdef CONFIG_NETLINK netlink_attach(NETLINK_ROUTE6, rt6_msgrcv); +#endif } #ifdef MODULE @@ -1611,7 +1592,9 @@ proc_net_unregister(PROC_NET_RT6_TREE); proc_net_unregister(PROC_NET_RT6_STATS); #endif +#ifdef CONFIG_NETLINK netlink_detach(NETLINK_ROUTE6); +#endif #if 0 fib6_flush(); #endif diff -u --recursive --new-file v2.1.67/linux/net/ipv6/sit.c linux/net/ipv6/sit.c --- v2.1.67/linux/net/ipv6/sit.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/sit.c Sun Nov 30 14:00:39 1997 @@ -4,8 +4,9 @@ * * Authors: * Pedro Roque + * Alexey Kuznetsov * - * $Id: sit.c,v 1.14 1997/04/29 09:38:52 mj Exp $ + * $Id: sit.c,v 1.23 1997/11/08 18:15:49 kuznet Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -13,6 +14,9 @@ * 2 of the License, or (at your option) any later version. */ +#include +#define __NO_VERSION__ +#include #include #include #include @@ -23,6 +27,7 @@ #include #include #include +#include #include #include @@ -31,385 +36,363 @@ #include #include #include +#include +#include #include #include #include #include -#include +#include +#include +/* + This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c -static int sit_init_dev(struct device *dev); - -static struct device sit_device = { - "sit0", - 0, 0, 0, 0, - 0x0, 0, - 0, 0, 0, NULL, sit_init_dev -}; - -static unsigned long sit_gc_last_run; -static void sit_mtu_cache_gc(void); - -static int sit_xmit(struct sk_buff *skb, - struct device *dev); -static int sit_rcv(struct sk_buff *skb, unsigned short len); -static void sit_err(struct sk_buff *skb, unsigned char *dp); - -static int sit_open(struct device *dev); -static int sit_close(struct device *dev); + For comments look at net/ipv4/ip_gre.c --ANK + */ -static struct net_device_stats *sit_get_stats(struct device *dev); +#define HASH_SIZE 16 +#define HASH(addr) ((addr^(addr>>4))&0xF) -extern void udp_err(struct sk_buff *, unsigned char *); +static int ipip6_fb_tunnel_init(struct device *dev); +static int ipip6_tunnel_init(struct device *dev); -static struct inet_protocol sit_protocol = { - sit_rcv, - sit_err, - 0, - IPPROTO_IPV6, - 0, - NULL, - "IPv6" +static struct device ipip6_fb_tunnel_dev = { + NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipip6_fb_tunnel_init, }; -#define SIT_NUM_BUCKETS 16 - -struct sit_mtu_info *sit_mtu_cache[SIT_NUM_BUCKETS]; - -static int vif_num = 0; -static struct sit_vif *vif_list = NULL; - -static __inline__ __u32 sit_addr_hash(__u32 addr) -{ - - __u32 hash_val; - - hash_val = addr; - - hash_val ^= hash_val >> 16; - hash_val ^= hash_val >> 8; - - return (hash_val & (SIT_NUM_BUCKETS - 1)); -} - -static void sit_cache_insert(__u32 addr, int mtu) -{ - struct sit_mtu_info *minfo; - int hash; - - minfo = kmalloc(sizeof(struct sit_mtu_info), GFP_ATOMIC); - - if (minfo == NULL) - return; - - minfo->addr = addr; - minfo->tstamp = jiffies; - minfo->mtu = mtu; - - hash = sit_addr_hash(addr); +static struct ip_tunnel ipip6_fb_tunnel = { + NULL, &ipip6_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"sit0", } +}; - minfo->next = sit_mtu_cache[hash]; - sit_mtu_cache[hash] = minfo; +static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; +static struct ip_tunnel *tunnels_r[HASH_SIZE]; +static struct ip_tunnel *tunnels_l[HASH_SIZE]; +static struct ip_tunnel *tunnels_wc[1]; +static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; + +static struct ip_tunnel * ipip6_tunnel_lookup(u32 remote, u32 local) +{ + unsigned h0 = HASH(remote); + unsigned h1 = HASH(local); + struct ip_tunnel *t; + + for (t = tunnels_r_l[h0^h1]; t; t = t->next) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) + return t; + } + for (t = tunnels_r[h0]; t; t = t->next) { + if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) + return t; + } + for (t = tunnels_l[h1]; t; t = t->next) { + if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) + return t; + } + if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) + return t; + return NULL; } -static struct sit_mtu_info * sit_mtu_lookup(__u32 addr) +struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int create) { - struct sit_mtu_info *iter; - int hash; + u32 remote = parms->iph.daddr; + u32 local = parms->iph.saddr; + struct ip_tunnel *t, **tp, *nt; + struct device *dev; + unsigned h = 0; + int prio = 0; - hash = sit_addr_hash(addr); + if (remote) { + prio |= 2; + h ^= HASH(remote); + } + if (local) { + prio |= 1; + h ^= HASH(local); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) + return t; + } + if (!create) + return NULL; - for(iter = sit_mtu_cache[hash]; iter; iter=iter->next) { - if (iter->addr == addr) { - iter->tstamp = jiffies; - break; + MOD_INC_USE_COUNT; + dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL); + if (dev == NULL) { + MOD_DEC_USE_COUNT; + return NULL; + } + memset(dev, 0, sizeof(*dev) + sizeof(*t)); + dev->priv = (void*)(dev+1); + nt = (struct ip_tunnel*)dev->priv; + nt->dev = dev; + dev->name = nt->parms.name; + dev->init = ipip6_tunnel_init; + memcpy(&nt->parms, parms, sizeof(*parms)); + if (dev->name[0] == 0) { + int i; + for (i=1; i<100; i++) { + sprintf(dev->name, "sit%d", i); + if (dev_get(dev->name) == NULL) + break; } + if (i==100) + goto failed; + memcpy(parms->name, dev->name, IFNAMSIZ); } + if (register_netdevice(dev) < 0) + goto failed; - /* - * run garbage collector - */ + start_bh_atomic(); + nt->next = t; + *tp = nt; + end_bh_atomic(); + /* Do not decrement MOD_USE_COUNT here. */ + return nt; + +failed: + kfree(dev); + MOD_DEC_USE_COUNT; + return NULL; +} + +static void ipip6_tunnel_destroy(struct device *dev) +{ + struct ip_tunnel *t, **tp; + struct ip_tunnel *t0 = (struct ip_tunnel*)dev->priv; + u32 remote = t0->parms.iph.daddr; + u32 local = t0->parms.iph.saddr; + unsigned h = 0; + int prio = 0; - if (jiffies - sit_gc_last_run > SIT_GC_FREQUENCY) { - sit_mtu_cache_gc(); - sit_gc_last_run = jiffies; + if (dev == &ipip6_fb_tunnel_dev) { + tunnels_wc[0] = NULL; + return; } - return iter; -} - -static void sit_mtu_cache_gc(void) -{ - struct sit_mtu_info *iter, *back; - unsigned long now = jiffies; - int i; - - for (i=0; i < SIT_NUM_BUCKETS; i++) { - back = NULL; - for (iter = sit_mtu_cache[i]; iter;) { - if (now - iter->tstamp > SIT_GC_TIMEOUT) { - struct sit_mtu_info *old; - - old = iter; - iter = iter->next; - - if (back) - back->next = iter; - else - sit_mtu_cache[i] = iter; - - kfree(old); - continue; - } - back = iter; - iter = iter->next; + if (remote) { + prio |= 2; + h ^= HASH(remote); + } + if (local) { + prio |= 1; + h ^= HASH(local); + } + for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { + if (t == t0) { + *tp = t->next; + kfree(dev); + MOD_DEC_USE_COUNT; + break; } } } -static int sit_init_dev(struct device *dev) -{ - int i; - - dev->open = sit_open; - dev->stop = sit_close; - - dev->hard_start_xmit = sit_xmit; - dev->get_stats = sit_get_stats; - - dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); - - if (dev->priv == NULL) - return -ENOMEM; - - memset(dev->priv, 0, sizeof(struct net_device_stats)); - - - for (i = 0; i < DEV_NUMBUFFS; i++) - skb_queue_head_init(&dev->buffs[i]); - - dev->hard_header = NULL; - dev->rebuild_header = NULL; - dev->set_mac_address = NULL; - dev->hard_header_cache = NULL; - dev->header_cache_update= NULL; - - dev->type = ARPHRD_SIT; - - dev->hard_header_len = MAX_HEADER; - dev->mtu = 1500 - sizeof(struct iphdr); - dev->addr_len = 0; - dev->tx_queue_len = 0; - - memset(dev->broadcast, 0, MAX_ADDR_LEN); - memset(dev->dev_addr, 0, MAX_ADDR_LEN); - - dev->flags = IFF_NOARP; - - dev->family = AF_INET6; - dev->pa_addr = 0; - dev->pa_brdaddr = 0; - dev->pa_dstaddr = 0; - dev->pa_mask = 0; - dev->pa_alen = 4; - return 0; -} - -static int sit_init_vif(struct device *dev) +void ipip6_err(struct sk_buff *skb, unsigned char *dp, int len) { - int i; - - dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST; - dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); - - if (dev->priv == NULL) - return -ENOMEM; - - memset(dev->priv, 0, sizeof(struct net_device_stats)); - - for (i = 0; i < DEV_NUMBUFFS; i++) - skb_queue_head_init(&dev->buffs[i]); - - return 0; -} - -static int sit_open(struct device *dev) -{ - return 0; -} - -static int sit_close(struct device *dev) -{ - return 0; -} +#ifndef I_WISH_WORLD_WERE_PERFECT -__initfunc(int sit_init(void)) -{ - int i; - - /* register device */ - - if (register_netdev(&sit_device) != 0) - return -EIO; - - inet_add_protocol(&sit_protocol); - - for (i=0; i < SIT_NUM_BUCKETS; i++) - sit_mtu_cache[i] = NULL; - - sit_gc_last_run = jiffies; - - return 0; -} - -struct device *sit_add_tunnel(__u32 dstaddr) -{ - struct sit_vif *vif; - struct device *dev; - - if ((sit_device.flags & IFF_UP) == 0) - return NULL; - - vif = kmalloc(sizeof(struct sit_vif), GFP_KERNEL); - if (vif == NULL) - return NULL; - - /* - * Create PtoP configured tunnel - */ - - dev = kmalloc(sizeof(struct device), GFP_KERNEL); - if (dev == NULL) - return NULL; - - memcpy(dev, &sit_device, sizeof(struct device)); - dev->init = sit_init_vif; - dev->pa_dstaddr = dstaddr; - - dev->name = vif->name; - sprintf(vif->name, "sit%d", ++vif_num); - - register_netdev(dev); - - vif->dev = dev; - vif->next = vif_list; - vif_list = vif; - - return dev; -} +/* It is not :-( All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. + */ + struct iphdr *iph = (struct iphdr*)dp; + int type = skb->h.icmph->type; + int code = skb->h.icmph->code; + struct ip_tunnel *t; -void sit_cleanup(void) -{ - struct sit_vif *vif; + if (len < sizeof(struct iphdr)) + return; - for (vif = vif_list; vif;) { - struct device *dev = vif->dev; - struct sit_vif *cur; + switch (type) { + default: + case ICMP_PARAMETERPROB: + return; - unregister_netdev(dev); - kfree(dev->priv); - kfree(dev); - - cur = vif; - vif = vif->next; + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* Soft state for pmtu is maintained by IP core. */ + return; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe they are just ether pollution. --ANK + */ + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + break; } - vif_list = NULL; - - unregister_netdev(&sit_device); - inet_del_protocol(&sit_protocol); - -} - -/* - * receive IPv4 ICMP messages - */ + t = ipip6_tunnel_lookup(iph->daddr, iph->saddr); + if (t == NULL || t->parms.iph.daddr == 0) + return; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) + return; -static void sit_err(struct sk_buff *skb, unsigned char *dp) -{ + if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; + return; +#else struct iphdr *iph = (struct iphdr*)dp; + int hlen = iph->ihl<<2; + struct ipv6hdr *iph6; int type = skb->h.icmph->type; int code = skb->h.icmph->code; + int rel_type = 0; + int rel_code = 0; + int rel_info = 0; + struct sk_buff *skb2; + struct rt6_info *rt6i; - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - struct sit_mtu_info *minfo; - unsigned short info = skb->h.icmph->un.frag.mtu - sizeof(struct iphdr); - - minfo = sit_mtu_lookup(iph->daddr); - - printk(KERN_DEBUG "sit: %08lx pmtu = %ul\n", ntohl(iph->saddr), - info); - - if (minfo == NULL) { - minfo = kmalloc(sizeof(struct sit_mtu_info), - GFP_ATOMIC); + if (len < hlen + sizeof(struct ipv6hdr)) + return; + iph6 = (struct ipv6hdr*)(dp + hlen); - if (minfo == NULL) - return; + switch (type) { + default: + return; + case ICMP_PARAMETERPROB: + if (skb->h.icmph->un.gateway < hlen) + return; + + /* So... This guy found something strange INSIDE encapsulated + packet. Well, he is fool, but what can we do ? + */ + rel_type = ICMPV6_PARAMPROB; + rel_info = skb->h.icmph->un.gateway - hlen; + break; + + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + case ICMP_PORT_UNREACH: + /* Impossible event. */ + return; + case ICMP_FRAG_NEEDED: + /* Too complicated case ... */ + return; + default: + /* All others are translated to HOST_UNREACH. + rfc2003 contains "deep thoughts" about NET_UNREACH, + I believe, it is just ether pollution. --ANK + */ + rel_type = ICMPV6_DEST_UNREACH; + rel_code = ICMPV6_ADDR_UNREACH; + break; + } + break; + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + return; + rel_type = ICMPV6_TIME_EXCEED; + rel_code = ICMPV6_EXC_HOPLIMIT; + break; + } - start_bh_atomic(); - sit_cache_insert(iph->daddr, info); - end_bh_atomic(); - } else { - minfo->mtu = info; + /* Prepare fake skb to feed it to icmpv6_send */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 == NULL) + return; + dst_release(skb2->dst); + skb2->dst = NULL; + skb_pull(skb2, skb->data - (u8*)iph6); + skb2->nh.raw = skb2->data; + + /* Try to guess incoming interface */ + rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0); + if (rt6i && rt6i->rt6i_dev) { + skb2->dev = rt6i->rt6i_dev; + + rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); + + if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { + struct ip_tunnel * t = (struct ip_tunnel*)rt6i->rt6i_dev->priv; + if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { + rel_type = ICMPV6_DEST_UNREACH; + rel_code = ICMPV6_ADDR_UNREACH; + } + icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); } } + kfree_skb(skb2, FREE_WRITE); + return; +#endif } -static int sit_rcv(struct sk_buff *skb, unsigned short len) +int ipip6_rcv(struct sk_buff *skb, unsigned short len) { - struct net_device_stats *stats; - struct device *dev = NULL; - struct sit_vif *vif; - __u32 saddr = skb->nh.iph->saddr; - - skb->h.raw = skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data); + struct iphdr *iph; + struct ip_tunnel *tunnel; - skb->protocol = __constant_htons(ETH_P_IPV6); + iph = skb->nh.iph; - for (vif = vif_list; vif; vif = vif->next) { - if (saddr == vif->dev->pa_dstaddr) { - dev = vif->dev; - break; - } + if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { + skb->mac.raw = skb->nh.raw; + skb->nh.raw = skb_pull(skb, skb->h.raw - skb->data); + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + skb->protocol = __constant_htons(ETH_P_IPV6); + skb->ip_summed = 0; + skb->pkt_type = PACKET_HOST; + tunnel->stat.rx_packets++; + tunnel->stat.rx_bytes += skb->len; + skb->dev = tunnel->dev; + dst_release(skb->dst); + skb->dst = NULL; + netif_rx(skb); + return 0; } - if (dev == NULL) - dev = &sit_device; - - skb->dev = dev; - skb->ip_summed = CHECKSUM_NONE; - - stats = (struct net_device_stats *)dev->priv; - stats->rx_bytes += len; - stats->rx_packets++; - - ipv6_rcv(skb, dev, NULL); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); + kfree_skb(skb, FREE_READ); return 0; } -static int sit_xmit(struct sk_buff *skb, struct device *dev) +/* + * This function assumes it is being called from dev_queue_xmit() + * and that skb is filled properly by that function. + */ + +static int ipip6_tunnel_xmit(struct sk_buff *skb, struct device *dev) { - struct net_device_stats *stats; - struct sit_mtu_info *minfo; + struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct net_device_stats *stats = &tunnel->stat; + struct iphdr *tiph = &tunnel->parms.iph; + struct ipv6hdr *iph6 = skb->nh.ipv6h; + u8 tos = tunnel->parms.iph.tos; + struct rtable *rt; /* Route to the other host */ + struct device *tdev; /* Device to other host */ + struct iphdr *iph; /* Our new IP header */ + int max_headroom; /* The extra header space needed */ + u32 dst = tiph->daddr; + int mtu; struct in6_addr *addr6; - struct rtable *rt; - struct iphdr *iph; - __u32 saddr; - __u32 daddr; int addr_type; - int mtu; - int headroom; - /* - * Make sure we are not busy (check lock variable) - */ + if (tunnel->recursion++) { + tunnel->stat.collisions++; + goto tx_error; + } - stats = (struct net_device_stats *)dev->priv; + if (skb->protocol != __constant_htons(ETH_P_IPV6)) + goto tx_error; - daddr = dev->pa_dstaddr; - if (daddr == 0) { + if (!dst) { struct nd_neigh *neigh = NULL; if (skb->dst) @@ -417,9 +400,9 @@ if (neigh == NULL) { printk(KERN_DEBUG "sit: nexthop == NULL\n"); - goto on_error; + goto tx_error; } - + addr6 = &neigh->ndn_addr; addr_type = ipv6_addr_type(addr6); @@ -428,88 +411,329 @@ addr_type = ipv6_addr_type(addr6); } - if ((addr_type & IPV6_ADDR_COMPATv4) == 0) { - printk(KERN_DEBUG "sit_xmit: non v4 address\n"); - goto on_error; - } - daddr = addr6->s6_addr32[3]; - } + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + goto tx_error_icmp; - if (ip_route_output(&rt, daddr, 0, 0, NULL)) { - printk(KERN_DEBUG "sit: no route to host\n"); - goto on_error; + dst = addr6->s6_addr32[3]; } - minfo = sit_mtu_lookup(daddr); + if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { + tunnel->stat.tx_carrier_errors++; + goto tx_error_icmp; + } + tdev = rt->u.dst.dev; - /* IP should calculate pmtu correctly, - * let's check it... - */ -#if 0 - if (minfo) - mtu = minfo->mtu; - else -#endif - mtu = rt->u.dst.pmtu; + if (tdev == dev) { + ip_rt_put(rt); + tunnel->stat.collisions++; + goto tx_error; + } - if (mtu > 576 && skb->tail - (skb->data + sizeof(struct ipv6hdr)) > mtu) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + mtu = rt->u.dst.pmtu - sizeof(struct iphdr); + if (mtu < 68) { + tunnel->stat.collisions++; ip_rt_put(rt); - goto on_error; + goto tx_error; + } + if (mtu >= 576) { + if (skb->dst && mtu < skb->dst->pmtu) { + struct rt6_info *rt6 = (struct rt6_info*)skb->dst; + if (mtu < rt6->u.dst.pmtu) { + if (tunnel->parms.iph.daddr || rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + rt6->u.dst.pmtu = mtu; + } + } + } + if (skb->len > mtu) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + ip_rt_put(rt); + goto tx_error; + } + } + + if (tunnel->err_count > 0) { + if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + tunnel->err_count--; + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); + } else + tunnel->err_count = 0; } - headroom = ((rt->u.dst.dev->hard_header_len+15)&~15)+sizeof(struct iphdr); + skb->h.raw = skb->nh.raw; - if (skb_headroom(skb) < headroom || skb_shared(skb)) { - struct sk_buff *new_skb = skb_realloc_headroom(skb, headroom); + /* + * Okay, now see if we can stuff it in the buffer as-is. + */ + max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr)); + + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - goto on_error; + stats->tx_dropped++; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; } dev_kfree_skb(skb, FREE_WRITE); skb = new_skb; } - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr)); - skb->nh.iph = iph; - - saddr = rt->rt_src; + skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); dst_release(skb->dst); skb->dst = &rt->u.dst; - iph->version = 4; - iph->ihl = 5; - iph->tos = 0; /* tos set to 0... */ + /* + * Push down and install the IPIP header. + */ + iph = skb->nh.iph; + iph->version = 4; + iph->ihl = sizeof(struct iphdr)>>2; if (mtu > 576) - iph->frag_off = htons(IP_DF); + iph->frag_off = __constant_htons(IP_DF); else - iph->frag_off = 0; + iph->frag_off = 0; - iph->ttl = 64; - iph->saddr = saddr; - iph->daddr = daddr; - iph->protocol = IPPROTO_IPV6; - iph->tot_len = htons(skb->len); - iph->id = htons(ip_id_count++); - ip_send_check(iph); + iph->protocol = IPPROTO_IPV6; + iph->tos = tos; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; - ip_send(skb); + if ((iph->ttl = tiph->ttl) == 0) + iph->ttl = iph6->hop_limit; + + iph->tot_len = htons(skb->len); + iph->id = htons(ip_id_count++); + ip_send_check(iph); stats->tx_bytes += skb->len; stats->tx_packets++; + ip_send(skb); + tunnel->recursion--; return 0; -on_error: - dev_kfree_skb(skb, FREE_WRITE); +tx_error_icmp: + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, dev); +tx_error: stats->tx_errors++; - return 0; + dev_kfree_skb(skb, FREE_WRITE); + tunnel->recursion--; + return 0; +} + +static int +ipip6_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip_tunnel_parm p; + struct ip_tunnel *t; + + MOD_INC_USE_COUNT; + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == &ipip6_fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + t = ipip6_tunnel_locate(&p, 0); + } + if (t == NULL) + t = (struct ip_tunnel*)dev->priv; + memcpy(&p, &t->parms, sizeof(p)); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || + p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF))) + goto done; + if (p.iph.ttl) + p.iph.frag_off |= __constant_htons(IP_DF); + + t = ipip6_tunnel_locate(&p, cmd == SIOCADDTUNNEL); + + if (t) { + err = 0; + if (cmd == SIOCCHGTUNNEL) { + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + } + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + if (dev == &ipip6_fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + if ((t = ipip6_tunnel_locate(&p, 0)) == NULL) + goto done; + err = -EPERM; + if (t == &ipip6_fb_tunnel) + goto done; + } + err = unregister_netdevice(dev); + break; + + default: + err = -EINVAL; + } + +done: + MOD_DEC_USE_COUNT; + return err; +} + +static struct net_device_stats *ipip6_tunnel_get_stats(struct device *dev) +{ + return &(((struct ip_tunnel*)dev->priv)->stat); +} + +static int ipip6_tunnel_change_mtu(struct device *dev, int new_mtu) +{ + if (new_mtu < 576 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; } -static struct net_device_stats *sit_get_stats(struct device *dev) +static void ipip6_tunnel_init_gen(struct device *dev) { - return((struct net_device_stats *) dev->priv); + struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + + dev->destructor = ipip6_tunnel_destroy; + dev->hard_start_xmit = ipip6_tunnel_xmit; + dev->get_stats = ipip6_tunnel_get_stats; + dev->do_ioctl = ipip6_tunnel_ioctl; + dev->change_mtu = ipip6_tunnel_change_mtu; + + dev_init_buffers(dev); + + dev->type = ARPHRD_SIT; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); + dev->mtu = 1500 - sizeof(struct iphdr); + dev->flags = IFF_NOARP; + dev->iflink = 0; + dev->addr_len = 4; + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); +} + +static int ipip6_tunnel_init(struct device *dev) +{ + struct device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + + tunnel = (struct ip_tunnel*)dev->priv; + iph = &tunnel->parms.iph; + + ipip6_tunnel_init_gen(dev); + + if (iph->daddr) { + struct rtable *rt; + if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) { + tdev = rt->u.dst.dev; + ip_rt_put(rt); + } + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = dev_get_by_index(tunnel->parms.link); + + if (tdev) { + dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); + dev->mtu = tdev->mtu - sizeof(struct iphdr); + if (dev->mtu < 576) + dev->mtu = 576; + } + dev->iflink = tunnel->parms.link; + + return 0; +} + +#ifdef MODULE +static int ipip6_fb_tunnel_open(struct device *dev) +{ + MOD_INC_USE_COUNT; + return 0; +} + +static int ipip6_fb_tunnel_close(struct device *dev) +{ + MOD_DEC_USE_COUNT; + return 0; +} +#endif + +__initfunc(int ipip6_fb_tunnel_init(struct device *dev)) +{ + struct iphdr *iph; + + ipip6_tunnel_init_gen(dev); +#ifdef MODULE + dev->open = ipip6_fb_tunnel_open; + dev->stop = ipip6_fb_tunnel_close; +#endif + + iph = &ipip6_fb_tunnel.parms.iph; + iph->version = 4; + iph->protocol = IPPROTO_IPV6; + iph->ihl = 5; + iph->ttl = 64; + + tunnels_wc[0] = &ipip6_fb_tunnel; + return 0; +} + +static struct inet_protocol sit_protocol = { + ipip6_rcv, + ipip6_err, + 0, + IPPROTO_IPV6, + 0, + NULL, + "IPv6" +}; + +#ifdef MODULE +void sit_cleanup(void) +{ + inet_del_protocol(&sit_protocol); + unregister_netdevice(&ipip6_fb_tunnel_dev); +} +#endif + +__initfunc(int sit_init(void)) +{ + printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); + + ipip6_fb_tunnel_dev.priv = (void*)&ipip6_fb_tunnel; + ipip6_fb_tunnel_dev.name = ipip6_fb_tunnel.parms.name; +#ifdef MODULE + register_netdev(&ipip6_fb_tunnel_dev); +#else + register_netdevice(&ipip6_fb_tunnel_dev); +#endif + inet_add_protocol(&sit_protocol); + return 0; } diff -u --recursive --new-file v2.1.67/linux/net/ipv6/tcp_ipv6.c linux/net/ipv6/tcp_ipv6.c --- v2.1.67/linux/net/ipv6/tcp_ipv6.c Thu Sep 4 17:07:32 1997 +++ linux/net/ipv6/tcp_ipv6.c Sun Nov 30 14:00:39 1997 @@ -5,7 +5,7 @@ * Authors: * Pedro Roque * - * $Id: tcp_ipv6.c,v 1.37 1997/08/22 19:15:40 freitag Exp $ + * $Id: tcp_ipv6.c,v 1.43 1997/10/30 23:52:34 davem Exp $ * * Based on: * linux/net/ipv4/tcp.c @@ -42,22 +42,23 @@ #include +#define ICMP_PARANOIA + extern int sysctl_tcp_sack; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; -static void tcp_v6_send_reset(struct in6_addr *saddr, - struct in6_addr *daddr, - struct tcphdr *th, struct proto *prot, - struct ipv6_options *opt, - struct device *dev, int pri, int hop_limit); - +static void tcp_v6_send_reset(struct sk_buff *skb); static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); static int tcp_v6_backlog_rcv(struct sock *sk, struct sk_buff *skb); static int tcp_v6_build_header(struct sock *sk, struct sk_buff *skb); static void tcp_v6_xmit(struct sk_buff *skb); +static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, + struct ipv6hdr *ip6h, + struct tcphdr *th, + struct open_request **prevp); static struct tcp_func ipv6_mapped; static struct tcp_func ipv6_specific; @@ -536,7 +537,6 @@ return retval; } -/* XXX: this functions needs to be updated like tcp_v4_err. */ void tcp_v6_err(int type, int code, unsigned char *header, __u32 info, struct in6_addr *saddr, struct in6_addr *daddr, struct inet6_protocol *protocol) @@ -546,14 +546,34 @@ struct sock *sk; int err; int opening; + struct tcp_opt *tp; +#ifdef ICMP_PARANOIA + __u32 seq; +#endif + + /* XXX: length check for tcphdr missing here */ sk = tcp_v6_lookup(daddr, th->dest, saddr, th->source); - if (sk == NULL) + if (sk == NULL) { + /* XXX: Update ICMP error count */ return; + } + + tp = &sk->tp_pinfo.af_tcp; +#ifdef ICMP_PARANOIA + seq = ntohl(th->seq); + if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) { + if (net_ratelimit()) + printk(KERN_DEBUG "icmp packet outside the tcp window:" + " s:%d %u,%u,%u\n", + (int)sk->state, seq, tp->snd_una, tp->snd_nxt); + return; + } +#endif - np = &sk->net_pinfo.af_inet6; + np = &sk->net_pinfo.af_inet6; if (type == ICMPV6_PKT_TOOBIG && sk->state != TCP_LISTEN) { /* icmp should have updated the destination cache entry */ @@ -580,12 +600,52 @@ else sk->mtu = np->dst->pmtu; - release_sock(sk); + if (sk->sock_readers) { /* remove later */ + printk(KERN_DEBUG "tcp_v6_err: pmtu disc: socket locked.\n"); + return; + } + tcp_simple_retransmit(sk); return; } - /* FIXME: This is wrong. Need to check for open_requests here. */ - opening = (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV); + opening = 0; + /* Might be for an open_request */ + switch (sk->state) { + struct open_request *req, *prev; + struct ipv6hdr hd; + case TCP_LISTEN: + if (sk->sock_readers) + return; + + /* Grrrr - fix this later. */ + ipv6_addr_copy(&hd.saddr, saddr); + ipv6_addr_copy(&hd.daddr, daddr); + req = tcp_v6_search_req(tp, &hd,th, &prev); + if (!req) + return; +#ifdef ICMP_PARANOIA + if (seq != req->snt_isn) { + if (net_ratelimit()) + printk(KERN_DEBUG "icmp packet for openreq " + "with wrong seq number:%d:%d\n", + seq, req->snt_isn); + return; + } +#endif + if (req->sk) { + sk = req->sk; /* report error in accept */ + } else { + tcp_synq_unlink(tp, req, prev); + req->class->destructor(req); + tcp_openreq_free(req); + } + /* FALL THROUGH */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + opening = 1; + break; + } + if (icmpv6_err_convert(type, code, &err) || opening) { sk->err = err; @@ -692,7 +752,8 @@ static struct or_calltable or_ipv6 = { tcp_v6_send_synack, - tcp_v6_or_free + tcp_v6_or_free, + tcp_v6_send_reset }; /* FIXME: this is substantially similar to the ipv4 code. @@ -864,8 +925,6 @@ atomic_set(&newsk->rmem_alloc, 0); newsk->localroute = sk->localroute; - newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF; - newsk->err = 0; newsk->shutdown = 0; newsk->ack_backlog = 0; @@ -957,17 +1016,10 @@ return newsk; } -static void tcp_v6_reply_reset(struct sk_buff *skb) -{ -} - -static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr, - struct tcphdr *th, struct proto *prot, - struct ipv6_options *opt, - struct device *dev, int pri, int hop_limit) +static void tcp_v6_send_reset(struct sk_buff *skb) { + struct tcphdr *th = skb->h.th, *t1; struct sk_buff *buff; - struct tcphdr *t1; struct flowi fl; if(th->rst) @@ -982,7 +1034,7 @@ if (buff == NULL) return; - buff->dev = dev; + buff->dev = skb->dev; tcp_v6_build_header(NULL, buff); @@ -1009,29 +1061,32 @@ } buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); - - t1->check = csum_ipv6_magic(saddr, daddr, sizeof(*t1), IPPROTO_TCP, + + fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->daddr; + fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->saddr; + + t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr, + fl.nl_u.ip6_u.daddr, + sizeof(*t1), IPPROTO_TCP, buff->csum); fl.proto = IPPROTO_TCP; - fl.nl_u.ip6_u.daddr = daddr; - fl.nl_u.ip6_u.saddr = saddr; - fl.dev = dev; + fl.dev = skb->dev; fl.uli_u.ports.dport = th->dest; fl.uli_u.ports.sport = th->source; ip6_xmit(NULL, buff, &fl, NULL); tcp_statistics.TcpOutSegs++; + tcp_statistics.TcpOutRsts++; } static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, - void *header, + struct ipv6hdr *ip6h, struct tcphdr *th, struct open_request **prevp) { - struct ipv6hdr *ip6h = header; struct open_request *req, *prev; - __u16 rport = th->source; + __u16 rport = th->source; /* assumption: the socket is not in use. * as we checked the user count on tcp_rcv and we're @@ -1050,6 +1105,22 @@ return NULL; } +static void tcp_v6_rst_req(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct open_request *req, *prev; + + req = tcp_v6_search_req(tp,skb->nh.ipv6h,skb->h.th,&prev); + if (!req) + return; + /* Sequence number check required by RFC793 */ + if (before(skb->seq, req->snt_isn) || after(skb->seq, req->snt_isn+1)) + return; + tcp_synq_unlink(tp, req, prev); + req->class->destructor(req); + tcp_openreq_free(req); +} + int tcp_v6_rcv(struct sk_buff *skb, struct device *dev, struct in6_addr *saddr, struct in6_addr *daddr, struct ipv6_options *opt, unsigned short len, @@ -1077,7 +1148,13 @@ * Pull up the IP header. */ - skb_pull(skb, skb->h.raw - skb->data); + __skb_pull(skb, skb->h.raw - skb->data); + + /* + * Count it even if it's bad. + */ + + tcp_statistics.TcpInSegs++; /* * Try to use the device checksum if provided. @@ -1089,14 +1166,13 @@ case CHECKSUM_HW: if (tcp_v6_check(th,len,saddr,daddr,skb->csum)) { printk(KERN_DEBUG "tcp csum failed\n"); + tcp_statistics.TcpInErrs++; goto discard_it; } default: /* CHECKSUM_UNNECESSARY */ }; - tcp_statistics.TcpInSegs++; - sk = __tcp_v6_lookup(th, saddr, th->source, daddr, th->dest); if (!sk) { @@ -1137,28 +1213,35 @@ } } - if (!sk->prot) { - printk(KERN_DEBUG "tcp_rcv: sk->prot == NULL\n"); - return(0); - } - skb_set_owner_r(skb, sk); - /* I don't understand why lock_sock()/release_sock() is not - * called here. IPv4 does this. It looks like a bug to me. -AK - */ if (sk->state == TCP_ESTABLISHED) { if (tcp_rcv_established(sk, skb, th, len)) goto no_tcp_socket; return 0; } + if (sk->state == TCP_LISTEN) { + __u32 flg = ((u32 *)th)[3]; - if (sk->state == TCP_LISTEN && - ((u32 *)th)[3] & __constant_htonl(0x00120000)) { - sk = tcp_check_req(sk, skb, opt); - if (sk == NULL) - goto discard_it; + /* Check for RST */ + if (flg & __constant_htonl(0x00040000)) { + tcp_v6_rst_req(sk, skb); + } + + /* Check SYN|ACK */ + if (flg & __constant_htonl(0x00120000)) { + struct open_request *req, *prev; + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + req = tcp_v6_search_req(tp, skb->nh.ipv6h,th,&prev); + if (req) { + sk = tcp_check_req(sk, skb, req); + } + /* else do syncookies (add them here) */ + if (sk == NULL) + goto discard_it; + } } if (tcp_rcv_state_process(sk, skb, th, opt, len) == 0) @@ -1168,11 +1251,10 @@ /* * No such TCB. If th->rst is 0 send a reset - * (checked in tcp_send_reset) + * (checked in tcp_v6_send_reset) */ - tcp_v6_send_reset(daddr, saddr, th, &tcpv6_prot, opt, dev, - skb->nh.ipv6h->priority, 255); + tcp_v6_send_reset(skb); discard_it: @@ -1285,12 +1367,6 @@ sin6->sin6_port = sk->dummy_th.dest; } -static struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb, - void *opt) -{ - return sk; /* dummy */ -} - static struct tcp_func ipv6_specific = { tcp_v6_build_header, tcp_v6_xmit, @@ -1302,9 +1378,6 @@ ipv6_setsockopt, ipv6_getsockopt, v6_addr2sockaddr, - tcp_v6_reply_reset, - tcp_v6_search_req, - /* not implemented yet: */ cookie_v6_check, sizeof(struct sockaddr_in6) }; @@ -1323,9 +1396,6 @@ ipv6_setsockopt, ipv6_getsockopt, v6_addr2sockaddr, - tcp_v6_reply_reset, - tcp_v6_search_req, - cookie_v6_check, /* not implemented yet. */ sizeof(struct sockaddr_in6) }; @@ -1364,8 +1434,6 @@ sk->priority = 1; sk->state = TCP_CLOSE; - /* this is how many unacked bytes we will accept for this socket. */ - sk->max_unacked = 2048; /* needs to be at most 2 full packets. */ sk->max_ack_backlog = SOMAXCONN; sk->mtu = 576; diff -u --recursive --new-file v2.1.67/linux/net/ipv6/udp.c linux/net/ipv6/udp.c --- v2.1.67/linux/net/ipv6/udp.c Tue May 13 22:41:24 1997 +++ linux/net/ipv6/udp.c Sun Nov 30 14:00:39 1997 @@ -7,7 +7,7 @@ * * Based on linux/ipv4/udp.c * - * $Id: udp.c,v 1.17 1997/04/29 09:38:55 mj Exp $ + * $Id: udp.c,v 1.18 1997/09/14 08:32:24 davem Exp $ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -601,8 +601,9 @@ clen -= sizeof(struct udphdr); } - udh->wcheck = csum_partial_copy_fromiovecend(dst, udh->iov, offset, - clen, udh->wcheck); + if (csum_partial_copy_fromiovecend(dst, udh->iov, offset, + clen, &udh->wcheck)) + return -EFAULT; if (final) { struct in6_addr *daddr; diff -u --recursive --new-file v2.1.67/linux/net/ipx/af_ipx.c linux/net/ipx/af_ipx.c --- v2.1.67/linux/net/ipx/af_ipx.c Mon Nov 17 18:47:22 1997 +++ linux/net/ipx/af_ipx.c Sun Nov 30 14:00:39 1997 @@ -694,7 +694,6 @@ /* * Send it out */ - skb->priority = SOPRI_NORMAL; dev_queue_xmit(skb); return 0; } diff -u --recursive --new-file v2.1.67/linux/net/netlink/Makefile linux/net/netlink/Makefile --- v2.1.67/linux/net/netlink/Makefile Wed Dec 31 16:00:00 1969 +++ linux/net/netlink/Makefile Sun Nov 30 14:00:40 1997 @@ -0,0 +1,26 @@ +# +# Makefile for the netlink driver. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := netlink.o +MOD_LIST_NAME := NET_MISC_MODULES + +O_OBJS := +OX_OBJS := af_netlink.o + +M_OBJS := + +ifeq ($(CONFIG_NETLINK_DEV), y) + O_OBJS += netlink_dev.o +endif + +ifeq ($(CONFIG_NETLINK_DEV), m) + M_OBJS += netlink_dev.o +endif + +include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.67/linux/net/netlink/af_netlink.c linux/net/netlink/af_netlink.c --- v2.1.67/linux/net/netlink/af_netlink.c Wed Dec 31 16:00:00 1969 +++ linux/net/netlink/af_netlink.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,1025 @@ +/* + * NETLINK Kernel-user communication protocol. + * + * Authors: Alan Cox + * Alexey Kuznetsov + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define Nprintk(a...) + +#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE) +#define NL_EMULATE_DEV +#endif + +static struct sock *nl_table[MAX_LINKS]; +static atomic_t nl_table_lock[MAX_LINKS]; +static struct wait_queue *nl_table_wait; + +#ifdef NL_EMULATE_DEV +static struct socket *netlink_kernel[MAX_LINKS]; +#endif + +static int netlink_dump(struct sock *sk); +static void netlink_destroy_callback(struct netlink_callback *cb); + +extern __inline__ void +netlink_wait_on_table(int protocol) +{ + while (atomic_read(&nl_table_lock[protocol])) + sleep_on(&nl_table_wait); +} + +extern __inline__ void +netlink_lock_table(int protocol) +{ + atomic_inc(&nl_table_lock[protocol]); +} + +extern __inline__ void +netlink_unlock_table(int protocol, int wakeup) +{ +#if 0 + /* F...g gcc does not eat it! */ + + if (atomic_dec_and_test(&nl_table_lock[protocol]) && wakeup) + wake_up(&nl_table_wait); +#else + atomic_dec(&nl_table_lock[protocol]); + if (atomic_read(&nl_table_lock[protocol]) && wakeup) + wake_up(&nl_table_wait); +#endif +} + +static __inline__ void netlink_lock(struct sock *sk) +{ + atomic_inc(&sk->protinfo.af_netlink.locks); +} + +static __inline__ void netlink_unlock(struct sock *sk) +{ + atomic_dec(&sk->protinfo.af_netlink.locks); +} + +static __inline__ int netlink_locked(struct sock *sk) +{ + return atomic_read(&sk->protinfo.af_netlink.locks); +} + +static __inline__ struct sock *netlink_lookup(int protocol, pid_t pid) +{ + struct sock *sk; + + for (sk=nl_table[protocol]; sk; sk=sk->next) { + if (sk->protinfo.af_netlink.pid == pid) { + netlink_lock(sk); + return sk; + } + } + + return NULL; +} + +extern struct proto_ops netlink_ops; + +static void netlink_insert(struct sock *sk) +{ + cli(); + sk->next = nl_table[sk->protocol]; + nl_table[sk->protocol] = sk; + sti(); +} + +static void netlink_remove(struct sock *sk) +{ + struct sock **skp; + for (skp = &nl_table[sk->protocol]; *skp; skp = &((*skp)->next)) { + if (*skp == sk) { + *skp = sk->next; + return; + } + } +} + +static int netlink_create(struct socket *sock, int protocol) +{ + struct sock *sk; + + sock->state = SS_UNCONNECTED; + + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; + + if (protocol<0 || protocol >= MAX_LINKS) + return -EPROTONOSUPPORT; + + sock->ops = &netlink_ops; + + sk = sk_alloc(AF_NETLINK, GFP_KERNEL); + if (!sk) + return -ENOMEM; + + sock_init_data(sock,sk); + sk->destruct = NULL; + + sk->mtu=4096; + sk->protocol=protocol; + return 0; +} + +static void netlink_destroy_timer(unsigned long data) +{ + struct sock *sk=(struct sock *)data; + + if (!netlink_locked(sk) && !atomic_read(&sk->wmem_alloc) + && !atomic_read(&sk->rmem_alloc)) { + sk_free(sk); + return; + } + + sk->timer.expires=jiffies+10*HZ; + add_timer(&sk->timer); + printk(KERN_DEBUG "netlink sk destroy delayed\n"); +} + +static int netlink_release(struct socket *sock, struct socket *peer) +{ + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + /* Wait on table before removing socket */ + netlink_wait_on_table(sk->protocol); + netlink_remove(sk); + + if (sk->protinfo.af_netlink.cb) { + netlink_unlock(sk); + sk->protinfo.af_netlink.cb->done(sk->protinfo.af_netlink.cb); + netlink_destroy_callback(sk->protinfo.af_netlink.cb); + sk->protinfo.af_netlink.cb = NULL; + } + + /* OK. Socket is unlinked, and, therefore, + no new packets will arrive */ + sk->state_change(sk); + sk->dead = 1; + + skb_queue_purge(&sk->receive_queue); + skb_queue_purge(&sk->write_queue); + + /* IMPORTANT! It is the major unpleasant feature of this + transport (and AF_UNIX datagram, when it will be repaired). + + Someone could wait on our sock->wait now. + We cannot release socket until waiter will remove yourself + from wait queue. I choose the most conservetive way of solving + the problem. + + We waked up this queue above, so that we need only to wait + when the readers release us. + */ + + while (netlink_locked(sk)) { + current->counter = 0; + schedule(); + } + + if (sk->socket) { + sk->socket = NULL; + sock->sk = NULL; + } + + if (atomic_read(&sk->rmem_alloc) || atomic_read(&sk->wmem_alloc)) { + sk->timer.data=(unsigned long)sk; + sk->timer.expires=jiffies+HZ; + sk->timer.function=netlink_destroy_timer; + add_timer(&sk->timer); + printk(KERN_DEBUG "impossible 333\n"); + return 0; + } + + sk_free(sk); + return 0; +} + +static int netlink_autobind(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct sock *osk; + + netlink_wait_on_table(sk->protocol); + + sk->protinfo.af_netlink.groups = 0; + sk->protinfo.af_netlink.pid = current->pid; + +retry: + for (osk=nl_table[sk->protocol]; osk; osk=osk->next) { + if (osk->protinfo.af_netlink.pid == sk->protinfo.af_netlink.pid) { + /* Bind collision, search negative pid values. */ + if (sk->protinfo.af_netlink.pid > 0) + sk->protinfo.af_netlink.pid = -4096; + sk->protinfo.af_netlink.pid--; + goto retry; + } + } + + netlink_insert(sk); + return 0; +} + +static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + struct sock *sk = sock->sk; + struct sock *osk; + struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; + + if (nladdr->nl_family != AF_NETLINK) + return -EINVAL; + + /* Only superuser is allowed to listen multicasts */ + if (nladdr->nl_groups && !suser()) + return -EPERM; + + if (sk->protinfo.af_netlink.pid) { + if (nladdr->nl_pid != sk->protinfo.af_netlink.pid) + return -EINVAL; + sk->protinfo.af_netlink.groups = nladdr->nl_groups; + return 0; + } + + if (nladdr->nl_pid == 0) { + netlink_autobind(sock); + sk->protinfo.af_netlink.groups = nladdr->nl_groups; + return 0; + } + + netlink_wait_on_table(sk->protocol); + + for (osk=nl_table[sk->protocol]; osk; osk=osk->next) { + if (osk->protinfo.af_netlink.pid == nladdr->nl_pid) + return -EADDRINUSE; + } + + sk->protinfo.af_netlink.pid = nladdr->nl_pid; + sk->protinfo.af_netlink.groups = nladdr->nl_groups; + netlink_insert(sk); + return 0; +} + +static int netlink_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr; + + if (addr->sa_family == AF_UNSPEC) + { + sk->protinfo.af_netlink.dst_pid = 0; + sk->protinfo.af_netlink.dst_groups = 0; + return 0; + } + if (addr->sa_family != AF_NETLINK) + return -EINVAL; + + /* Only superuser is allowed to send multicasts */ + if (!suser() && nladdr->nl_groups) + return -EPERM; + + sk->protinfo.af_netlink.dst_pid = nladdr->nl_pid; + sk->protinfo.af_netlink.dst_groups = nladdr->nl_groups; + + if (!sk->protinfo.af_netlink.pid) + netlink_autobind(sock); + return 0; +} + +static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) +{ + struct sock *sk = sock->sk; + struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr; + + nladdr->nl_family = AF_NETLINK; + *addr_len = sizeof(*nladdr); + + if (peer) { + nladdr->nl_pid = sk->protinfo.af_netlink.dst_pid; + nladdr->nl_groups = sk->protinfo.af_netlink.dst_groups; + } else { + nladdr->nl_pid = sk->protinfo.af_netlink.pid; + nladdr->nl_groups = sk->protinfo.af_netlink.groups; + } + return 0; +} + +int netlink_unicast(struct sock *ssk, struct sk_buff *skb, pid_t pid, int nonblock) +{ + struct sock *sk; + int len = skb->len; + int protocol = ssk->protocol; + +retry: + for (sk = nl_table[protocol]; sk; sk = sk->next) { + if (sk->protinfo.af_netlink.pid != pid) + continue; + + netlink_lock(sk); + +#ifdef NL_EMULATE_DEV + if (sk->protinfo.af_netlink.handler) { + len = sk->protinfo.af_netlink.handler(protocol, skb); + netlink_unlock(sk); + return len; + } +#endif + + cli(); + if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { + if (nonblock) { + sti(); + netlink_unlock(sk); + kfree_skb(skb, 0); + return -EAGAIN; + } + interruptible_sleep_on(sk->sleep); + netlink_unlock(sk); + sti(); + + if (current->signal & ~current->blocked) { + kfree_skb(skb, 0); + return -ERESTARTSYS; + } + goto retry; + } + sti(); +Nprintk("unicast_deliver %d\n", skb->len); + skb_orphan(skb); + skb_set_owner_r(skb, sk); + skb_queue_tail(&sk->receive_queue, skb); + sk->data_ready(sk, len); + netlink_unlock(sk); + return len; + } + kfree_skb(skb, 0); + return -ECONNREFUSED; +} + +static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) +{ +#ifdef NL_EMULATE_DEV + if (sk->protinfo.af_netlink.handler) { + sk->protinfo.af_netlink.handler(sk->protocol, skb); + return 0; + } else +#endif + if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) { +Nprintk("broadcast_deliver %d\n", skb->len); + skb_orphan(skb); + skb_set_owner_r(skb, sk); + skb_queue_tail(&sk->receive_queue, skb); + sk->data_ready(sk, skb->len); + return 0; + } + return -1; +} + +void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, pid_t pid, + unsigned group, int allocation) +{ + struct sock *sk; + struct sk_buff *skb2 = NULL; + int protocol = ssk->protocol; + int failure = 0; + + /* While we sleep in clone, do not allow to change socket list */ + + netlink_lock_table(protocol); + + for (sk = nl_table[protocol]; sk; sk = sk->next) { + if (ssk == sk) + continue; + + if (sk->protinfo.af_netlink.pid == pid || + !(sk->protinfo.af_netlink.groups&group)) + continue; + + if (failure) { + sk->err = -ENOBUFS; + sk->state_change(sk); + continue; + } + + netlink_lock(sk); + if (skb2 == NULL) { + if (atomic_read(&skb->users) != 1) { + skb2 = skb_clone(skb, allocation); + } else { + skb2 = skb; + atomic_inc(&skb->users); + } + } + if (skb2 == NULL) { + sk->err = -ENOBUFS; + sk->state_change(sk); + /* Clone failed. Notify ALL listeners. */ + failure = 1; + } else if (netlink_broadcast_deliver(sk, skb2)) { + sk->err = -ENOBUFS; + sk->state_change(sk); + } else + skb2 = NULL; + netlink_unlock(sk); + } + + netlink_unlock_table(protocol, allocation == GFP_KERNEL); + + if (skb2) + kfree_skb(skb2, 0); + kfree_skb(skb, 0); +} + +void netlink_set_err(struct sock *ssk, pid_t pid, unsigned group, int code) +{ + struct sock *sk; + int protocol = ssk->protocol; + +Nprintk("seterr"); + for (sk = nl_table[protocol]; sk; sk = sk->next) { + if (ssk == sk) + continue; + + if (sk->protinfo.af_netlink.pid == pid || + !(sk->protinfo.af_netlink.groups&group)) + continue; + + sk->err = -code; + sk->state_change(sk); + } +} + +static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, int len, + struct scm_cookie *scm) +{ + struct sock *sk = sock->sk; + struct sockaddr_nl *addr=msg->msg_name; + pid_t dst_pid; + unsigned dst_groups; + struct sk_buff *skb; + int err; + + if (msg->msg_flags&MSG_OOB) + return -EOPNOTSUPP; + + if (msg->msg_flags&~MSG_DONTWAIT) { + printk("1 %08x\n", msg->msg_flags); + return -EINVAL; + } + + if (msg->msg_namelen) { + if (addr->nl_family != AF_NETLINK) { + printk("2 %08x\n", addr->nl_family); + return -EINVAL; + } + dst_pid = addr->nl_pid; + dst_groups = addr->nl_groups; + if (dst_groups && !suser()) + return -EPERM; + } else { + dst_pid = sk->protinfo.af_netlink.dst_pid; + dst_groups = sk->protinfo.af_netlink.dst_groups; + } + + + if (!sk->protinfo.af_netlink.pid) + netlink_autobind(sock); + + skb = sock_wmalloc(sk, len, 0, GFP_KERNEL); + if (skb==NULL) + return -ENOBUFS; + + NETLINK_CB(skb).pid = sk->protinfo.af_netlink.pid; + NETLINK_CB(skb).groups = sk->protinfo.af_netlink.groups; + NETLINK_CB(skb).dst_pid = dst_pid; + NETLINK_CB(skb).dst_groups = dst_groups; + memcpy(NETLINK_CREDS(skb), &scm->creds, sizeof(struct ucred)); + memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + + if (dst_groups) { + atomic_inc(&skb->users); + netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL); + } + err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); + if (err < 0) { + printk("3\n"); + } + return err; +} + +static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, int len, + int flags, struct scm_cookie *scm) +{ + struct sock *sk = sock->sk; + int noblock = flags&MSG_DONTWAIT; + int copied; + struct sk_buff *skb; + int err; + + if (flags&(MSG_OOB|MSG_PEEK)) + return -EOPNOTSUPP; + + err = -sock_error(sk); + if (err) + return err; + + skb = skb_recv_datagram(sk,flags,noblock,&err); + if (skb==NULL) + return err; + + msg->msg_namelen = 0; + + copied = skb->len; + if (len < copied) { + msg->msg_flags |= MSG_TRUNC; + copied = len; + } + + skb->h.raw = skb->data; + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + + if (msg->msg_name) { + struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name; + addr->nl_family = AF_NETLINK; + addr->nl_pid = NETLINK_CB(skb).pid; + addr->nl_groups = NETLINK_CB(skb).dst_groups; + msg->msg_namelen = sizeof(*addr); + } + + scm->creds = *NETLINK_CREDS(skb); + skb_free_datagram(sk, skb); + + if (sk->protinfo.af_netlink.cb + && atomic_read(&sk->rmem_alloc) <= sk->rcvbuf/2) + netlink_dump(sk); + return err ? err : copied; +} + +/* + * We export these functions to other modules. They provide a + * complete set of kernel non-blocking support for message + * queueing. + */ + +struct sock * +netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) +{ + struct socket *sock; + struct sock *sk; + + if (unit<0 || unit>=MAX_LINKS) + return NULL; + + if (!(sock = sock_alloc())) + return NULL; + + sock->type = SOCK_RAW; + + if (netlink_create(sock, unit) < 0) { + sock_release(sock); + return NULL; + } + sk = sock->sk; + if (input) + sk->data_ready = input; + + netlink_insert(sk); + return sk; +} + +static void netlink_destroy_callback(struct netlink_callback *cb) +{ + if (cb->skb) + kfree_skb(cb->skb, 0); + kfree(cb); +} + +/* + * It looks a bit ugly. + * It would be better to create kernel thread. + */ + +static int netlink_dump(struct sock *sk) +{ + struct netlink_callback *cb; + struct sk_buff *skb; + struct nlmsghdr *nlh; + int len; + + skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + cb = sk->protinfo.af_netlink.cb; + + len = cb->dump(skb, cb); + + if (len > 0) { + skb_queue_tail(&sk->receive_queue, skb); + sk->data_ready(sk, len); + return 0; + } + + nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int)); + nlh->nlmsg_flags |= NLM_F_MULTI; + memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); + skb_queue_tail(&sk->receive_queue, skb); + sk->data_ready(sk, skb->len); + + cb->done(cb); + sk->protinfo.af_netlink.cb = NULL; + netlink_destroy_callback(cb); + netlink_unlock(sk); + return 0; +} + +int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + struct nlmsghdr *nlh, + int (*dump)(struct sk_buff *skb, struct netlink_callback*), + int (*done)(struct netlink_callback*)) +{ + struct netlink_callback *cb; + struct sock *sk; + + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + if (cb == NULL) + return -ENOBUFS; + + memset(cb, 0, sizeof(*cb)); + cb->dump = dump; + cb->done = done; + cb->nlh = nlh; + atomic_inc(&skb->users); + cb->skb = skb; + + sk = netlink_lookup(ssk->protocol, NETLINK_CB(skb).pid); + if (sk == NULL) { + netlink_destroy_callback(cb); + return -ECONNREFUSED; + } + /* A dump is in progress... */ + if (sk->protinfo.af_netlink.cb) { + netlink_destroy_callback(cb); + netlink_unlock(sk); + return -EBUSY; + } + sk->protinfo.af_netlink.cb = cb; + netlink_dump(sk); + return 0; +} + +void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) +{ + struct sk_buff *skb; + struct nlmsghdr *rep; + struct nlmsgerr *errmsg; + int size; + + if (err == 0) + size = NLMSG_SPACE(sizeof(struct nlmsgerr)); + else + size = NLMSG_SPACE(4 + nlh->nlmsg_len); + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; + + rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NLMSG_ERROR, sizeof(struct nlmsgerr)); + errmsg = NLMSG_DATA(rep); + errmsg->error = err; + memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); +} + + +#ifdef NL_EMULATE_DEV +/* + * Backward compatibility. + */ + +int netlink_attach(int unit, int (*function)(int, struct sk_buff *skb)) +{ + struct sock *sk = netlink_kernel_create(unit, NULL); + if (sk == NULL) + return -ENOBUFS; + sk->protinfo.af_netlink.handler = function; + netlink_kernel[unit] = sk->socket; + return 0; +} + +void netlink_detach(int unit) +{ + struct socket *sock = netlink_kernel[unit]; + netlink_kernel[unit] = NULL; + sock_release(sock); +} + +int netlink_post(int unit, struct sk_buff *skb) +{ + if (netlink_kernel[unit]) { + netlink_broadcast(netlink_kernel[unit]->sk, skb, 0, ~0, GFP_ATOMIC); + return 0; + } + return -EUNATCH;; +} + +EXPORT_SYMBOL(netlink_attach); +EXPORT_SYMBOL(netlink_detach); +EXPORT_SYMBOL(netlink_post); + +#endif + +#if 0 + +/* What a pity... It was good code, but at the moment it + results in unnecessary complications. + */ + +/* + * "High" level netlink interface. (ANK) + * + * Features: + * - standard message format. + * - pseudo-reliable delivery. Messages can be still lost, but + * user level will know that they were lost and can + * recover (f.e. gated could reread FIB and device list) + * - messages are batched. + */ + +/* + * Try to deliver queued messages. + */ + +static void nlmsg_delayed_flush(struct sock *sk) +{ + nlmsg_flush(sk, GFP_ATOMIC); +} + +static void nlmsg_flush(struct sock *sk, int allocation) +{ + struct sk_buff *skb; + unsigned long flags; + + save_flags(flags); + cli(); + while ((skb=skb_dequeue(&sk->write_queue)) != NULL) { + if (skb->users != 1) { + skb_queue_head(&sk->write_queue, skb); + break; + } + restore_flags(flags); + netlink_broadcast(sk, skb, 0, NETLINK_CB(skb).dst_groups, allocation); + cli(); + } + start_bh_atomic(); + restore_flags(flags); + if (skb) { + if (sk->timer.function) + del_timer(&sk->timer) + sk->timer.expires = jiffies + (sk->protinfo.af_netlink.delay ? : HZ/2); + sk->timer.function = (void (*)(unsigned long))nlmsg_delayed_flush; + sk->timer.data = (unsigned long)sk; + add_timer(&sk->timer); + } + end_bh_atomic(); +} + +/* + * Allocate room for new message. If it is impossible, return NULL. + */ + +void *nlmsg_broadcast(struct sock *sk, struct sk_buff **skbp, + unsigned long type, int len, + unsigned groups, int allocation) +{ + struct nlmsghdr *nlh; + struct sk_buff *skb; + int rlen; + unsigned long flags; + + rlen = NLMSG_SPACE(len); + + save_flags(flags); + cli(); + skb = sk->write_queue.tail; + if (skb == sk->write_queue.head) + skb = NULL; + if (skb == NULL || skb_tailroom(skb) < rlen || NETLINK_CB(skb).dst_groups != groups) { + restore_flags(flags); + + if (skb) + nlmsg_flush(sk, allocation); + + skb = sock_wmalloc(rlen > NLMSG_GOODSIZE ? rlen : NLMSG_GOODSIZE, + sk, 0, allocation); + + if (skb==NULL) { + printk (KERN_WARNING "nlmsg at unit %d overrunned\n", sk->protocol); + return NULL; + } + + NETLINK_CB(skb).dst_groups = groups; + cli(); + skb_queue_tail(&sk->write_queue, skb); + } + atomic_inc(&skb->users); + restore_flags(flags); + + nlh = (struct nlmsghdr*)skb_put(skb, rlen); + nlh->nlmsg_type = type; + nlh->nlmsg_len = NLMSG_LENGTH(len); + nlh->nlmsg_seq = 0; + nlh->nlmsg_pid = 0; + *skbp = skb; + return nlh->nlmsg_data; +} + +struct sk_buff* nlmsg_alloc(unsigned long type, int len, + unsigned long seq, unsigned long pid, int allocation) +{ + struct nlmsghdr *nlh; + struct sk_buff *skb; + int rlen; + + rlen = NLMSG_SPACE(len); + + skb = alloc_skb(rlen, allocation); + if (skb==NULL) + return NULL; + + nlh = (struct nlmsghdr*)skb_put(skb, rlen); + nlh->nlmsg_type = type; + nlh->nlmsg_len = NLMSG_LENGTH(len); + nlh->nlmsg_seq = seq; + nlh->nlmsg_pid = pid; + return skb; +} + +void nlmsg_release(struct sk_buff *skb) +{ + atomic_dec(skb->users); +} + + +/* + * Kick message queue. + * Two modes: + * - synchronous (delay==0). Messages are delivered immediately. + * - delayed. Do not deliver, but start delivery timer. + */ + +void __nlmsg_transmit(struct sock *sk, int allocation) +{ + start_bh_atomic(); + if (!sk->protinfo.af_netlink.delay) { + if (sk->timer.function) { + del_timer(&sk->timer); + sk->timer.function = NULL; + } + end_bh_atomic(); + nlmsg_flush(sk, allocation); + return; + } + if (!sk->timer.function) { + sk->timer.expires = jiffies + sk->protinfo.af_netlink.delay; + sk->timer.function = (void (*)(unsigned long))nlmsg_delayed_flush; + sk->timer.data = (unsigned long)sk; + add_timer(&sk->timer); + } + end_bh_atomic(); +} + +#endif + +#ifdef CONFIG_PROC_FS +static int netlink_read_proc(char *buffer, char **start, off_t offset, + int length, int *eof, void *data) +{ + off_t pos=0; + off_t begin=0; + int len=0; + int i; + struct sock *s; + + len+= sprintf(buffer,"sk Eth Pid Groups " + "Rmem Wmem Dump Locks\n"); + + for (i=0; inext) { + len+=sprintf(buffer+len,"%p %-3d %-6d %08x %-8d %-8d %p %d", + s, + s->protocol, + s->protinfo.af_netlink.pid, + s->protinfo.af_netlink.groups, + atomic_read(&s->rmem_alloc), + atomic_read(&s->wmem_alloc), + s->protinfo.af_netlink.cb, + atomic_read(&s->protinfo.af_netlink.locks) + ); + + buffer[len++]='\n'; + + pos=begin+len; + if(posoffset+length) + goto done; + } + } + *eof = 1; + +done: + *start=buffer+(offset-begin); + len-=(offset-begin); + if(len>length) + len=length; + return len; +} +#endif + +struct proto_ops netlink_ops = { + AF_NETLINK, + + sock_no_dup, + netlink_release, + netlink_bind, + netlink_connect, + NULL, + NULL, + netlink_getname, + datagram_poll, + sock_no_ioctl, + sock_no_listen, + sock_no_shutdown, + NULL, + NULL, + sock_no_fcntl, + netlink_sendmsg, + netlink_recvmsg +}; + +struct net_proto_family netlink_family_ops = { + AF_NETLINK, + netlink_create +}; + +void netlink_proto_init(struct net_proto *pro) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *ent; +#endif + struct sk_buff *dummy_skb; + + if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) { + printk(KERN_CRIT "netlink_proto_init: panic\n"); + return; + } + sock_register(&netlink_family_ops); +#ifdef CONFIG_PROC_FS + ent = create_proc_entry("net/netlink", 0, 0); + ent->read_proc = netlink_read_proc; +#endif +} diff -u --recursive --new-file v2.1.67/linux/net/netlink/netlink_dev.c linux/net/netlink/netlink_dev.c --- v2.1.67/linux/net/netlink/netlink_dev.c Wed Dec 31 16:00:00 1969 +++ linux/net/netlink/netlink_dev.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,213 @@ +/* + * NETLINK An implementation of a loadable kernel mode driver providing + * multiple kernel/user space bidirectional communications links. + * + * Author: Alan Cox + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Now netlink devices are emulated on the top of netlink sockets + * by compatibility reasons. Remove this file after a period. --ANK + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static unsigned open_map = 0; +static struct socket *netlink_user[MAX_LINKS]; + +/* + * Device operations + */ + +static unsigned int netlink_poll(struct file *file, poll_table * wait) +{ + struct socket *sock = netlink_user[MINOR(file->f_dentry->d_inode->i_rdev)]; + + if (sock->ops->poll==NULL) + return 0; + return sock->ops->poll(sock, wait); +} + +/* + * Write a message to the kernel side of a communication link + */ + +static ssize_t netlink_write(struct file * file, const char * buf, + size_t count, loff_t *pos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct socket *sock = netlink_user[MINOR(inode->i_rdev)]; + struct msghdr msg; + struct iovec iov; + + iov.iov_base = (void*)buf; + iov.iov_len = count; + msg.msg_name=NULL; + msg.msg_namelen=0; + msg.msg_controllen=0; + msg.msg_flags=0; + msg.msg_iov=&iov; + msg.msg_iovlen=1; + + return sock_sendmsg(sock, &msg, count); +} + +/* + * Read a message from the kernel side of the communication link + */ + +static ssize_t netlink_read(struct file * file, char * buf, + size_t count, loff_t *pos) +{ + struct inode *inode = file->f_dentry->d_inode; + struct socket *sock = netlink_user[MINOR(inode->i_rdev)]; + struct msghdr msg; + struct iovec iov; + + iov.iov_base = buf; + iov.iov_len = count; + msg.msg_name=NULL; + msg.msg_namelen=0; + msg.msg_controllen=0; + msg.msg_flags=0; + msg.msg_iov=&iov; + msg.msg_iovlen=1; + if (file->f_flags&O_NONBLOCK) + msg.msg_flags=MSG_DONTWAIT; + + return sock_recvmsg(sock, &msg, count, msg.msg_flags); +} + +static loff_t netlink_lseek(struct file * file, loff_t offset, int origin) +{ + return -ESPIPE; +} + +static int netlink_open(struct inode * inode, struct file * file) +{ + unsigned int minor = MINOR(inode->i_rdev); + struct socket *sock; + struct sockaddr_nl nladdr; + int err; + + if (minor>=MAX_LINKS) + return -ENODEV; + if (open_map&(1<type = SOCK_RAW; + + if ((err = net_families[AF_NETLINK]->create(sock, minor)) < 0) + { + sock_release(sock); + goto out; + } + + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + nladdr.nl_groups = ~0; + if ((err = sock->ops->bind(sock, (struct sockaddr*)&nladdr, sizeof(nladdr))) < 0) { + sock_release(sock); + goto out; + } + + netlink_user[minor] = sock; + return 0; + +out: + open_map &= ~(1<i_rdev); + struct socket *sock = netlink_user[minor]; + + netlink_user[minor] = NULL; + open_map &= ~(1<i_rdev); + int retval = 0; + + if (minor >= MAX_LINKS) + return -ENODEV; + switch ( cmd ) { + default: + retval = -EINVAL; + } + return retval; +} + + +static struct file_operations netlink_fops = { + netlink_lseek, + netlink_read, + netlink_write, + NULL, /* netlink_readdir */ + netlink_poll, + netlink_ioctl, + NULL, /* netlink_mmap */ + netlink_open, + netlink_release +}; + +__initfunc(int init_netlink(void)) +{ + if (register_chrdev(NETLINK_MAJOR,"netlink", &netlink_fops)) { + printk(KERN_ERR "netlink: unable to get major %d\n", NETLINK_MAJOR); + return -EIO; + } + return 0; +} + +#ifdef MODULE + +int init_module(void) +{ + printk(KERN_INFO "Network Kernel/User communications module 0.04\n"); + return init_netlink(); +} + +void cleanup_module(void) +{ + unregister_chrdev(NET_MAJOR,"netlink"); +} + +#endif diff -u --recursive --new-file v2.1.67/linux/net/netlink.c linux/net/netlink.c --- v2.1.67/linux/net/netlink.c Wed Nov 26 16:24:03 1997 +++ linux/net/netlink.c Wed Dec 31 16:00:00 1969 @@ -1,475 +0,0 @@ -/* - * NETLINK An implementation of a loadable kernel mode driver providing - * multiple kernel/user space bidirectional communications links. - * - * Author: Alan Cox - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -static int (*netlink_handler[MAX_LINKS])(int minor, struct sk_buff *skb); -static struct sk_buff_head skb_queue_rd[MAX_LINKS]; -static int rdq_size[MAX_LINKS]; -static struct wait_queue *read_space_wait[MAX_LINKS]; - -static unsigned long active_map = 0; -static unsigned long open_map = 0; - -/* - * Device operations - */ - -/* - * Default write handler. - */ - -static int netlink_err(int minor, struct sk_buff *skb) -{ - kfree_skb(skb, FREE_READ); - return -EUNATCH; -} - -/* - * Exported do nothing receiver for one way - * interfaces. - */ - -int netlink_donothing(int minor, struct sk_buff *skb) -{ - kfree_skb(skb, FREE_READ); - return -EINVAL; -} - -static unsigned int netlink_poll(struct file *file, poll_table * wait) -{ - unsigned int mask; - unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev); - - poll_wait(&read_space_wait[minor], wait); - mask = POLLOUT | POLLWRNORM; - if (skb_peek(&skb_queue_rd[minor])) - mask |= POLLIN | POLLRDNORM; - return mask; -} - -/* - * Write a message to the kernel side of a communication link - */ - -static ssize_t netlink_write(struct file * file, const char * buf, - size_t count,loff_t *ppos) -{ - int err; - unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev); - struct sk_buff *skb; - skb=alloc_skb(count, GFP_KERNEL); - err = copy_from_user(skb_put(skb,count),buf, count); - return err ? -EFAULT : (netlink_handler[minor])(minor,skb); -} - -/* - * Read a message from the kernel side of the communication link - */ - -static ssize_t netlink_read(struct file * file, char * buf, - size_t count,loff_t *ppos) -{ - int err; - unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev); - struct sk_buff *skb; - cli(); - while((skb=skb_dequeue(&skb_queue_rd[minor]))==NULL) - { - if(file->f_flags&O_NONBLOCK) - { - sti(); - return -EAGAIN; - } - interruptible_sleep_on(&read_space_wait[minor]); - if(signal_pending(current)) - { - sti(); - return -ERESTARTSYS; - } - } - rdq_size[minor]-=skb->len; - sti(); - if(skb->lenlen; - err = copy_to_user(buf,skb->data,count); - kfree_skb(skb, FREE_READ); - return err ? -EFAULT : count; -} - -static long long netlink_lseek(struct file * file, long long offset, int origin) -{ - return -ESPIPE; -} - -static int netlink_open(struct inode * inode, struct file * file) -{ - unsigned int minor = MINOR(inode->i_rdev); - - if(minor>=MAX_LINKS) - return -ENODEV; - if(active_map&(1<f_mode & FMODE_READ) - { - if (open_map&(1<i_rdev); - if (file->f_mode & FMODE_READ) - open_map&=~(1<i_rdev); - int retval = 0; - - if (minor >= MAX_LINKS) - return -ENODEV; - switch ( cmd ) { - default: - retval = -EINVAL; - } - return retval; -} - - -static struct file_operations netlink_fops = { - netlink_lseek, - netlink_read, - netlink_write, - NULL, /* netlink_readdir */ - netlink_poll, - netlink_ioctl, - NULL, /* netlink_mmap */ - netlink_open, - netlink_release -}; - -/* - * We export these functions to other modules. They provide a - * complete set of kernel non-blocking support for message - * queueing. - */ - -int netlink_attach(int unit, int (*function)(int minor, struct sk_buff *skb)) -{ - if(unit>=MAX_LINKS) - return -ENODEV; - if(active_map&(1<len>MAX_QBYTES) - ret=-EAGAIN; - else - { - skb_queue_tail(&skb_queue_rd[unit], skb); - rdq_size[unit]+=skb->len; - ret=0; - wake_up_interruptible(&read_space_wait[unit]); - } - restore_flags(flags); - } - return ret; -} - - -/* - * "High" level netlink interface. (ANK) - * - * Features: - * - standard message format. - * - pseudo-reliable delivery. Messages can be still lost, but - * user level will know that they were lost and can - * recover (f.e. gated could reread FIB and device list) - * - messages are batched. - * - if user is not attached, we do not make useless work. - * - * Examples: - * - netlink_post equivalent (but with pseudo-reliable delivery) - * ctl.nlmsg_delay = 0; - * ctl.nlmsg_maxsize = ; - * .... - * msg = nlmsg_send(&ctl, ...); - * if (msg) { - * ... make it ... - * nlmsg_transmit(&ctl); - * } - * - * - batched messages. - * if nlmsg_delay==0, messages are delivered only - * by nlmsg_transmit, or when batch is completed, - * otherwise nlmsg_transmit is noop (only starts - * timer) - * - * ctl.nlmsg_delay = ...; - * ctl.nlmsg_maxsize = ; - * .... - * msg = nlmsg_send(&ctl, ...); - * if (msg) - * ... make it ... - * .... - * msg = nlmsg_send(&ctl, ...); - * if (msg) - * ... make it ... - * .... - * if (ctl.nlmsg_skb) - * nlmsg_transmit(&ctl); - * - */ - -/* - * Try to deliver queued messages. - * If the delivery fails (netlink is not attached or congested), - * do not free skb to avoid useless new message creation. - * - * Notes: - * - timer should be already stopped. - * - NET SPL. - */ - -void nlmsg_flush(struct nlmsg_ctl *ctl) -{ - if (ctl->nlmsg_skb == NULL) - return; - - if (netlink_post(ctl->nlmsg_unit, ctl->nlmsg_skb) == 0) - { - ctl->nlmsg_skb = NULL; - return; - } - - ctl->nlmsg_timer.expires = jiffies + NLMSG_RECOVERY_TIMEO; - ctl->nlmsg_timer.data = (unsigned long)ctl; - ctl->nlmsg_timer.function = (void (*)(unsigned long))nlmsg_flush; - add_timer(&ctl->nlmsg_timer); - return; -} - - -/* - * Allocate room for new message. If it is impossible, - * start "overrun" mode and return NULL. - * - * Notes: - * - NET SPL. - */ - -void* nlmsg_send(struct nlmsg_ctl *ctl, unsigned long type, int len, - unsigned long seq, unsigned long pid) -{ - struct nlmsghdr *nlh; - struct sk_buff *skb; - int rlen; - - static __inline__ void nlmsg_lost(struct nlmsg_ctl *ctl, - unsigned long seq) - { - if (!ctl->nlmsg_overrun) - { - ctl->nlmsg_overrun_start = seq; - ctl->nlmsg_overrun_end = seq; - ctl->nlmsg_overrun = 1; - return; - } - if (!ctl->nlmsg_overrun_start) - ctl->nlmsg_overrun_start = seq; - if (seq) - ctl->nlmsg_overrun_end = seq; - } - - if (!(open_map&(1<nlmsg_unit))) - { - nlmsg_lost(ctl, seq); - return NULL; - } - - rlen = NLMSG_ALIGN(len + sizeof(struct nlmsghdr)); - - if (rlen > ctl->nlmsg_maxsize) - { - printk(KERN_ERR "nlmsg_send: too big message\n"); - return NULL; - } - - if ((skb=ctl->nlmsg_skb) == NULL || skb_tailroom(skb) < rlen) - { - if (skb) - { - ctl->nlmsg_force++; - nlmsg_flush(ctl); - ctl->nlmsg_force--; - } - - if (ctl->nlmsg_skb || - (skb=alloc_skb(ctl->nlmsg_maxsize, GFP_ATOMIC)) == NULL) - { - printk (KERN_WARNING "nlmsg at unit %d overrunned\n", ctl->nlmsg_unit); - nlmsg_lost(ctl, seq); - return NULL; - } - - ctl->nlmsg_skb = skb; - - if (ctl->nlmsg_overrun) - { - int *seqp; - nlh = (struct nlmsghdr*)skb_put(skb, sizeof(struct nlmsghdr) + 2*sizeof(unsigned long)); - nlh->nlmsg_type = NLMSG_OVERRUN; - nlh->nlmsg_len = sizeof(struct nlmsghdr) + 2*sizeof(unsigned long); - nlh->nlmsg_seq = 0; - nlh->nlmsg_pid = 0; - seqp = (int*)nlh->nlmsg_data; - seqp[0] = ctl->nlmsg_overrun_start; - seqp[1] = ctl->nlmsg_overrun_end; - ctl->nlmsg_overrun = 0; - } - if (ctl->nlmsg_timer.function) - { - del_timer(&ctl->nlmsg_timer); - ctl->nlmsg_timer.function = NULL; - } - if (ctl->nlmsg_delay) - { - ctl->nlmsg_timer.expires = jiffies + ctl->nlmsg_delay; - ctl->nlmsg_timer.function = (void (*)(unsigned long))nlmsg_flush; - ctl->nlmsg_timer.data = (unsigned long)ctl; - add_timer(&ctl->nlmsg_timer); - } - } - - nlh = (struct nlmsghdr*)skb_put(skb, rlen); - nlh->nlmsg_type = type; - nlh->nlmsg_len = sizeof(struct nlmsghdr) + len; - nlh->nlmsg_seq = seq; - nlh->nlmsg_pid = pid; - return nlh->nlmsg_data; -} - -/* - * Kick message queue. - * Two modes: - * - synchronous (delay==0). Messages are delivered immediately. - * - delayed. Do not deliver, but start delivery timer. - */ - -void nlmsg_transmit(struct nlmsg_ctl *ctl) -{ - start_bh_atomic(); - - if (!ctl->nlmsg_delay) - { - if (ctl->nlmsg_timer.function) - { - del_timer(&ctl->nlmsg_timer); - ctl->nlmsg_timer.function = NULL; - } - ctl->nlmsg_force++; - nlmsg_flush(ctl); - ctl->nlmsg_force--; - end_bh_atomic(); - return; - } - if (!ctl->nlmsg_timer.function) - { - ctl->nlmsg_timer.expires = jiffies + ctl->nlmsg_delay; - ctl->nlmsg_timer.function = (void (*)(unsigned long))nlmsg_flush; - ctl->nlmsg_timer.data = (unsigned long)ctl; - add_timer(&ctl->nlmsg_timer); - } - - end_bh_atomic(); -} - - -__initfunc(int init_netlink(void)) -{ - int ct; - - if(register_chrdev(NETLINK_MAJOR,"netlink", &netlink_fops)) { - printk(KERN_ERR "netlink: unable to get major %d\n", NETLINK_MAJOR); - return -EIO; - } - for(ct=0;ctflags = 0; - dev->family = AF_INET; - -#ifdef CONFIG_INET - dev->pa_addr = in_aton("192.168.0.1"); - dev->pa_brdaddr = in_aton("192.168.0.255"); - dev->pa_mask = in_aton("255.255.255.0"); - dev->pa_alen = 4; -#endif if ((dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL)) == NULL) return -ENOMEM; diff -u --recursive --new-file v2.1.67/linux/net/netsyms.c linux/net/netsyms.c --- v2.1.67/linux/net/netsyms.c Sat Oct 25 02:44:18 1997 +++ linux/net/netsyms.c Sun Nov 30 14:00:40 1997 @@ -19,6 +19,7 @@ #ifdef CONFIG_INET #include #include +#include #include #include #include @@ -28,8 +29,8 @@ #include #include #include +#include #include -#include #include extern struct net_proto_family inet_family_ops; @@ -43,13 +44,7 @@ #endif -#ifdef CONFIG_NETLINK -#include -#endif - -#ifdef CONFIG_NET_ALIAS -#include -#endif +#include #include @@ -121,6 +116,7 @@ EXPORT_SYMBOL(skb_realloc_headroom); EXPORT_SYMBOL(datagram_poll); EXPORT_SYMBOL(put_cmsg); +EXPORT_SYMBOL(net_families); EXPORT_SYMBOL(neigh_table_init); /* Declared in but not defined? @@ -144,6 +140,12 @@ EXPORT_SYMBOL(__scm_destroy); EXPORT_SYMBOL(__scm_send); +/* Needed by unix.o */ +EXPORT_SYMBOL(scm_fp_dup); +EXPORT_SYMBOL(max_files); +EXPORT_SYMBOL(do_mknod); +EXPORT_SYMBOL(memcpy_toiovec); + #ifdef CONFIG_IPX_MODULE EXPORT_SYMBOL(make_8023_client); EXPORT_SYMBOL(destroy_8023_client); @@ -153,6 +155,9 @@ #ifdef CONFIG_ATALK_MODULE EXPORT_SYMBOL(sklist_destroy_socket); +#endif + +#if defined(CONFIG_ATALK_MODULE) || defined(CONFIG_PACKET_MODULE) EXPORT_SYMBOL(sklist_insert_socket); #endif @@ -169,15 +174,14 @@ EXPORT_SYMBOL(ip_route_output); EXPORT_SYMBOL(icmp_send); EXPORT_SYMBOL(ip_options_compile); -EXPORT_SYMBOL(ip_rt_put); EXPORT_SYMBOL(arp_send); EXPORT_SYMBOL(ip_id_count); EXPORT_SYMBOL(ip_send_check); EXPORT_SYMBOL(ip_fragment); -EXPORT_SYMBOL(ip_dev_find_tunnel); EXPORT_SYMBOL(inet_family_ops); EXPORT_SYMBOL(in_aton); EXPORT_SYMBOL(in_ntoa); +EXPORT_SYMBOL(net_ratelimit); #ifdef CONFIG_IPV6_MODULE /* inet functions common to v4 and v6 */ @@ -206,7 +210,6 @@ EXPORT_SYMBOL(destroy_sock); EXPORT_SYMBOL(ip_queue_xmit); EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(dev_lockct); EXPORT_SYMBOL(memcpy_fromiovecend); EXPORT_SYMBOL(csum_partial_copy_fromiovecend); EXPORT_SYMBOL(__release_sock); @@ -231,7 +234,6 @@ EXPORT_SYMBOL(tcp_recvmsg); EXPORT_SYMBOL(tcp_send_synack); EXPORT_SYMBOL(tcp_check_req); -EXPORT_SYMBOL(sock_wmalloc); EXPORT_SYMBOL(tcp_reset_xmit_timer); EXPORT_SYMBOL(tcp_parse_options); EXPORT_SYMBOL(tcp_rcv_established); @@ -249,13 +251,35 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); EXPORT_SYMBOL(tcp_v4_do_rcv); EXPORT_SYMBOL(tcp_v4_connect); -EXPORT_SYMBOL(__ip_chk_addr); +EXPORT_SYMBOL(inet_addr_type); EXPORT_SYMBOL(net_reset_timer); EXPORT_SYMBOL(net_delete_timer); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_openreq_cachep); EXPORT_SYMBOL(ipv4_specific); +EXPORT_SYMBOL(tcp_simple_retransmit); + +EXPORT_SYMBOL(xrlim_allow); +#endif + +#ifdef CONFIG_PACKET_MODULE +EXPORT_SYMBOL(memcpy_toiovec); +EXPORT_SYMBOL(dev_set_allmulti); +EXPORT_SYMBOL(dev_set_promiscuity); +EXPORT_SYMBOL(dev_mc_delete); +EXPORT_SYMBOL(sklist_remove_socket); +EXPORT_SYMBOL(rtnl_wait); +EXPORT_SYMBOL(rtnl_rlockct); +#ifdef CONFIG_RTNETLINK +EXPORT_SYMBOL(rtnl); +EXPORT_SYMBOL(rtnl_wlockct); +#endif +#endif + +#if defined(CONFIG_IPV6_MODULE) || defined(CONFIG_PACKET_MODULE) +EXPORT_SYMBOL(dev_lockct); +EXPORT_SYMBOL(sock_wmalloc); #endif #if defined(CONFIG_ULTRA) || defined(CONFIG_WD80x3) || \ @@ -282,15 +306,9 @@ EXPORT_SYMBOL(tr_reformat); #endif -#ifdef CONFIG_NET_ALIAS -#include -#endif - /* Used by at least ipip.c. */ EXPORT_SYMBOL(ipv4_config); -#ifdef CONFIG_IP_MROUTE -EXPORT_SYMBOL(ip_mr_find_tunnel); -#endif +EXPORT_SYMBOL(dev_open); #endif /* CONFIG_INET */ @@ -298,19 +316,19 @@ EXPORT_SYMBOL(register_netdevice_notifier); EXPORT_SYMBOL(unregister_netdevice_notifier); -#ifdef CONFIG_NET_ALIAS -EXPORT_SYMBOL(register_net_alias_type); -EXPORT_SYMBOL(unregister_net_alias_type); -#endif - /* support for loadable net drivers */ #ifdef CONFIG_NET +EXPORT_SYMBOL(register_netdevice); +EXPORT_SYMBOL(unregister_netdevice); EXPORT_SYMBOL(register_netdev); EXPORT_SYMBOL(unregister_netdev); EXPORT_SYMBOL(ether_setup); EXPORT_SYMBOL(dev_new_index); EXPORT_SYMBOL(dev_get_by_index); EXPORT_SYMBOL(eth_type_trans); +#ifdef CONFIG_FDDI +EXPORT_SYMBOL(fddi_type_trans); +#endif /* CONFIG_FDDI */ EXPORT_SYMBOL(eth_copy_and_sum); EXPORT_SYMBOL(alloc_skb); EXPORT_SYMBOL(__kfree_skb); @@ -318,7 +336,6 @@ EXPORT_SYMBOL(skb_copy); EXPORT_SYMBOL(dev_alloc_skb); EXPORT_SYMBOL(netif_rx); -EXPORT_SYMBOL(dev_tint); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_remove_pack); EXPORT_SYMBOL(dev_get); @@ -340,6 +357,9 @@ EXPORT_SYMBOL(ip_rcv); EXPORT_SYMBOL(arp_rcv); +EXPORT_SYMBOL(rtnl_lock); +EXPORT_SYMBOL(rtnl_unlock); + EXPORT_SYMBOL(if_port_text); #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) @@ -352,11 +372,13 @@ EXPORT_SYMBOL(dlci_ioctl_hook); #endif -#endif /* CONFIG_NET */ +/* Packet scheduler modules want these. */ +EXPORT_SYMBOL(qdisc_destroy); +EXPORT_SYMBOL(qdisc_reset); +EXPORT_SYMBOL(qdisc_restart); +EXPORT_SYMBOL(qdisc_head); +EXPORT_SYMBOL(register_qdisc); +EXPORT_SYMBOL(unregister_qdisc); +EXPORT_SYMBOL(noop_qdisc); -#ifdef CONFIG_NETLINK -EXPORT_SYMBOL(netlink_attach); -EXPORT_SYMBOL(netlink_detach); -EXPORT_SYMBOL(netlink_donothing); -EXPORT_SYMBOL(netlink_post); -#endif /* CONFIG_NETLINK */ +#endif /* CONFIG_NET */ diff -u --recursive --new-file v2.1.67/linux/net/packet/Makefile linux/net/packet/Makefile --- v2.1.67/linux/net/packet/Makefile Wed Dec 31 16:00:00 1969 +++ linux/net/packet/Makefile Sun Nov 30 14:00:40 1997 @@ -0,0 +1,24 @@ +# +# Makefile for the packet AF. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := packet.o +MOD_LIST_NAME := NET_MISC_MODULES + +O_OBJS := +M_OBJS := + +ifeq ($(CONFIG_PACKET),y) + O_OBJS += af_packet.o +else + ifeq ($(CONFIG_PACKET), m) + M_OBJS += af_packet.o + endif +endif + +include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.67/linux/net/packet/af_packet.c linux/net/packet/af_packet.c --- v2.1.67/linux/net/packet/af_packet.c Wed Dec 31 16:00:00 1969 +++ linux/net/packet/af_packet.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,1251 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * PACKET - implements raw packet sockets. + * + * Doesn't belong in IP but it's currently too hooked into ip + * to separate. + * + * Version: @(#)packet.c 1.0.6 05/25/93 + * + * Authors: Ross Biro, + * Fred N. van Kempen, + * Alan Cox, + * + * Fixes: + * Alan Cox : verify_area() now used correctly + * Alan Cox : new skbuff lists, look ma no backlogs! + * Alan Cox : tidied skbuff lists. + * Alan Cox : Now uses generic datagram routines I + * added. Also fixed the peek/read crash + * from all old Linux datagram code. + * Alan Cox : Uses the improved datagram code. + * Alan Cox : Added NULL's for socket options. + * Alan Cox : Re-commented the code. + * Alan Cox : Use new kernel side addressing + * Rob Janssen : Correct MTU usage. + * Dave Platt : Counter leaks caused by incorrect + * interrupt locking and some slightly + * dubious gcc output. Can you read + * compiler: it said _VOLATILE_ + * Richard Kooijman : Timestamp fixes. + * Alan Cox : New buffers. Use sk->mac.raw. + * Alan Cox : sendmsg/recvmsg support. + * Alan Cox : Protocol setting support + * Alexey Kuznetsov : Untied from IPv4 stack. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_DLCI) || defined(CONFIG_DLCI_MODULE) +#include +#endif + +/* + Old SOCK_PACKET. Do exist programs, which use it? + (not counting tcpdump) - lots of them yes - AC. + + */ +#define CONFIG_SOCK_PACKET 1 + +/* + Proposed replacement for SIOC{ADD,DEL}MULTI and + IFF_PROMISC, IFF_ALLMULTI flags. + + It is more expensive, but I believe, + it is really correct solution: reentereble, safe and fault tolerant. + + Differences: + - Changing IFF_ALLMULTI from user level is disabled. + It could only confused multicast routing daemons, not more. + - IFF_PROMISC is faked by keeping reference count and + global flag, so that real IFF_PROMISC == (gflag|(count != 0)) + I'd remove it too, but it would require recompilation tcpdump + and another applications, using promiscuous mode. + - SIOC{ADD/DEL}MULTI are moved to deprecated state, + they work, but complain. I do know who uses them. + + +*************FIXME*************** + Alexey : This doesnt cook Im afraid. We need the low level SIOCADD/DELMULTI + and also IFF_ALLMULTI for DECNET, Appletalk and other stuff as well as + BSD compatibility issues. + + */ +#define CONFIG_PACKET_MULTICAST 1 + +/* + Assumptions: + - if device has no dev->hard_header routine, it adds and removes ll header + inside itself. In this case ll header is invisible outside of device, + but higher levels still should reserve dev->hard_header_len. + Some devices are enough clever to reallocate skb, when header + will not fit to reserved space (tunnel), another ones are silly + (PPP). + - packet socket receives packets with pulled ll header, + so that SOCK_RAW should push it back. + +On receive: +----------- + +Incoming, dev->hard_header!=NULL + mac.raw -> ll header + data -> data + +Outgoing, dev->hard_header!=NULL + mac.raw -> ll header + data -> ll header + +Incoming, dev->hard_header==NULL + mac.raw -> UNKNOWN position. It is very likely, that it points to ll header. + PPP makes it, that is wrong, because introduce assymetry + between rx and tx paths. + data -> data + +Outgoing, dev->hard_header==NULL + mac.raw -> data. ll header is still not built! + data -> data + +Resume + If dev->hard_header==NULL we are unlikely to restore sensible ll header. + + +On transmit: +------------ + +dev->hard_header != NULL + mac.raw -> ll header + data -> ll header + +dev->hard_header == NULL (ll header is added by device, we cannot control it) + mac.raw -> data + data -> data + + We should set nh.raw on output to correct posistion, + packet classifier depends on it. + */ + +/* List of all packet sockets. */ +struct sock * packet_sklist = NULL; + +/* Private packet socket structures. */ + +#ifdef CONFIG_PACKET_MULTICAST +struct packet_mclist +{ + struct packet_mclist *next; + int ifindex; + int count; + unsigned short type; + unsigned short alen; + unsigned char addr[8]; +}; +#endif + +static void packet_flush_mclist(struct sock *sk); + +struct packet_opt +{ + struct packet_type prot_hook; + char running; /* prot_hook is attached*/ + int ifindex; /* bound device */ +#ifdef CONFIG_PACKET_MULTICAST + struct packet_mclist *mclist; +#endif +}; + +extern struct proto_ops packet_ops; + +#ifdef CONFIG_SOCK_PACKET +extern struct proto_ops packet_ops_spkt; + +static int packet_rcv_spkt(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct sock *sk; + struct sockaddr_pkt *spkt = (struct sockaddr_pkt*)skb->cb; + + /* + * When we registered the protocol we saved the socket in the data + * field for just this event. + */ + + sk = (struct sock *) pt->data; + + /* + * Yank back the headers [hope the device set this + * right or kerboom...] + * + * Incoming packets have ll header pulled, + * push it back. + * + * For outgoing ones skb->data == skb->mac.raw + * so that this procedure is noop. + */ + + skb_push(skb, skb->data-skb->mac.raw); + + /* + * The SOCK_PACKET socket receives _all_ frames. + */ + + spkt->spkt_family = dev->type; + strncpy(spkt->spkt_device, dev->name, 15); + spkt->spkt_protocol = skb->protocol; + + /* + * Charge the memory to the socket. This is done specifically + * to prevent sockets using all the memory up. + */ + + if (sock_queue_rcv_skb(sk,skb)<0) + { + kfree_skb(skb, FREE_READ); + return 0; + } + + /* + * Processing complete. + */ + return(0); +} + + +/* + * Output a raw packet to a device layer. This bypasses all the other + * protocol layers and you must therefore supply it with a complete frame + */ + +static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len, + struct scm_cookie *scm) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + struct device *dev; + struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name; + unsigned short proto=0; + int err; + + /* + * Check the flags. + */ + + if (msg->msg_flags&~MSG_DONTWAIT) + return(-EINVAL); + + /* + * Get and verify the address. + */ + + if (saddr) + { + if (msg->msg_namelen < sizeof(struct sockaddr)) + return(-EINVAL); + if (msg->msg_namelen==sizeof(struct sockaddr_pkt)) + proto=saddr->spkt_protocol; + } + else + return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */ + + /* + * Find the device first to size check it + */ + + saddr->spkt_device[13] = 0; + dev = dev_get(saddr->spkt_device); + if (dev == NULL) + { + return(-ENODEV); + } + + /* + * You may not queue a frame bigger than the mtu. This is the lowest level + * raw protocol and you must do your own fragmentation at this level. + */ + + if(len>dev->mtu+dev->hard_header_len) + return -EMSGSIZE; + + dev_lock_list(); + skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL); + + /* + * If the write buffer is full, then tough. At this level the user gets to + * deal with the problem - do your own algorithmic backoffs. That's far + * more flexible. + */ + + if (skb == NULL) + { + dev_unlock_list(); + return(-ENOBUFS); + } + + /* + * Fill it in + */ + + /* FIXME: Save some space for broken drivers that write a + * hard header at transmission time by themselves. PPP is the + * notable one here. This should really be fixed at the driver level. + */ + skb_reserve(skb,(dev->hard_header_len+15)&~15); + skb->mac.raw = skb->nh.raw = skb->data; + + /* Try to align data part correctly */ + if (dev->hard_header) { + skb->data -= dev->hard_header_len; + skb->tail -= dev->hard_header_len; + skb->mac.raw = skb->data; + } + err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + skb->arp = 1; /* No ARP needs doing on this (complete) frame */ + skb->protocol = proto; + skb->dev = dev; + skb->priority = sk->priority; + dev_unlock_list(); + + /* + * Now send it + */ + + if (err) + { + err = -EFAULT; + } + else + { + if (!(dev->flags & IFF_UP)) + { + err = -ENETDOWN; + } + } + + if (err) + { + kfree_skb(skb, FREE_WRITE); + return err; + } + + dev_queue_xmit(skb); + return(len); +} +#endif + +static int packet_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) +{ + struct sock *sk; + struct sockaddr_ll *sll = (struct sockaddr_ll*)skb->cb; + + /* + * When we registered the protocol we saved the socket in the data + * field for just this event. + */ + + sk = (struct sock *) pt->data; + + /* + * The SOCK_PACKET socket receives _all_ frames. + */ + + skb->dev = dev; + + sll->sll_family = AF_PACKET; + sll->sll_hatype = dev->type; + sll->sll_protocol = skb->protocol; + sll->sll_pkttype = skb->pkt_type; + sll->sll_ifindex = dev->ifindex; + sll->sll_halen = 0; + + if (dev->hard_header_parse) + sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + + if (dev->hard_header) { + /* The device has an explicit notion of ll header, + exported to higher levels. + + Otherwise, the device hides datails of it frame + structure, so that corresponding packet head + never delivered to user. + */ + if (sk->type != SOCK_DGRAM) + skb_push(skb, skb->data - skb->mac.raw); + else if (skb->pkt_type == PACKET_OUTGOING) { + /* Special case: outgoing packets have ll header at head */ + skb_pull(skb, skb->nh.raw - skb->data); + } + } + + /* + * Charge the memory to the socket. This is done specifically + * to prevent sockets using all the memory up. + */ + + if (sock_queue_rcv_skb(sk,skb)<0) + { + kfree_skb(skb, FREE_READ); + return 0; + } + return(0); +} + +static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len, + struct scm_cookie *scm) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + struct device *dev; + struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name; + unsigned short proto; + int ifindex; + int err; + int reserve = 0; + + /* + * Check the flags. + */ + + if (msg->msg_flags&~MSG_DONTWAIT) + return(-EINVAL); + + /* + * Get and verify the address. + */ + + if (saddr == NULL) { + ifindex = sk->protinfo.af_packet->ifindex; + proto = sk->num; + } else { + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) + return -EINVAL; + ifindex = saddr->sll_ifindex; + proto = saddr->sll_protocol; + } + + dev = dev_get_by_index(ifindex); + if (dev == NULL) + return -ENXIO; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; + + if (len > dev->mtu+reserve) + return -EMSGSIZE; + + dev_lock_list(); + + skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 0, msg->msg_flags&MSG_DONTWAIT, &err); + + if (skb==NULL) { + dev_unlock_list(); + return err; + } + + skb_reserve(skb, (dev->hard_header_len+15)&~15); + skb->mac.raw = skb->nh.raw = skb->data; + + if (dev->hard_header) { + if (dev->hard_header(skb, dev, ntohs(proto), + saddr ? saddr->sll_addr : NULL, + NULL, len) < 0 + && sock->type == SOCK_DGRAM) { + kfree_skb(skb, FREE_WRITE); + dev_unlock_list(); + return -EINVAL; + } + skb->mac.raw = skb->data; + if (sock->type != SOCK_DGRAM) { + skb->tail = skb->data; + skb->len = 0; + } + } + + err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + skb->arp = 1; /* No ARP needs doing on this (complete) frame */ + skb->protocol = proto; + skb->dev = dev; + skb->priority = sk->priority; + dev_unlock_list(); + + /* + * Now send it + */ + + if (err) { + err = -EFAULT; + } else { + if (!(dev->flags & IFF_UP)) + err = -ENETDOWN; + } + + if (err) { + kfree_skb(skb, FREE_WRITE); + return err; + } + + dev_queue_xmit(skb); + return(len); +} + +static void packet_destroy_timer(unsigned long data) +{ + struct sock *sk=(struct sock *)data; + + if (!atomic_read(&sk->wmem_alloc) && !atomic_read(&sk->rmem_alloc)) { + sk_free(sk); + MOD_DEC_USE_COUNT; + return; + } + + sk->timer.expires=jiffies+10*HZ; + add_timer(&sk->timer); + printk(KERN_DEBUG "packet sk destroy delayed\n"); +} + +/* + * Close a PACKET socket. This is fairly simple. We immediately go + * to 'closed' state and remove our protocol entry in the device list. + */ + +static int packet_release(struct socket *sock, struct socket *peersock) +{ + struct sk_buff *skb; + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + sklist_remove_socket(&packet_sklist, sk); + + /* + * Unhook packet receive handler. + */ + + if (sk->protinfo.af_packet->running) + { + /* + * Remove the protocol hook + */ + + dev_remove_pack(&sk->protinfo.af_packet->prot_hook); + sk->protinfo.af_packet->running = 0; + } + +#ifdef CONFIG_PACKET_MULTICAST + packet_flush_mclist(sk); +#endif + + /* + * Now the socket is dead. No more input will appear. + */ + + sk->state_change(sk); /* It is useless. Just for sanity. */ + + sock->sk = NULL; + sk->socket = NULL; + sk->dead = 1; + + /* Purge queues */ + + while ((skb=skb_dequeue(&sk->receive_queue))!=NULL) + kfree_skb(skb,FREE_READ); + + if (atomic_read(&sk->rmem_alloc) || atomic_read(&sk->wmem_alloc)) { + sk->timer.data=(unsigned long)sk; + sk->timer.expires=jiffies+HZ; + sk->timer.function=packet_destroy_timer; + add_timer(&sk->timer); + return 0; + } + + sk_free(sk); + MOD_DEC_USE_COUNT; + return 0; +} + +/* + * Attach a packet hook. + */ + +static int packet_do_bind(struct sock *sk, struct device *dev, int protocol) +{ + /* + * Detach an existing hook if present. + */ + + if (sk->protinfo.af_packet->running) { + dev_remove_pack(&sk->protinfo.af_packet->prot_hook); + sk->protinfo.af_packet->running = 0; + } + + sk->num = protocol; + sk->protinfo.af_packet->prot_hook.type = protocol; + sk->protinfo.af_packet->prot_hook.dev = dev; + + if (protocol == 0) + return 0; + + if (dev) { + sk->protinfo.af_packet->ifindex = dev->ifindex; + if (dev->flags&IFF_UP) { + dev_add_pack(&sk->protinfo.af_packet->prot_hook); + sk->protinfo.af_packet->running = 1; + } else { + sk->err = ENETDOWN; + sk->error_report(sk); + } + } else { + sk->protinfo.af_packet->ifindex = 0; + dev_add_pack(&sk->protinfo.af_packet->prot_hook); + sk->protinfo.af_packet->running = 1; + } + return 0; +} + +/* + * Bind a packet socket to a device + */ + +#ifdef CONFIG_SOCK_PACKET + +static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sock *sk=sock->sk; + char name[15]; + struct device *dev; + + /* + * Check legality + */ + + if(addr_len!=sizeof(struct sockaddr)) + return -EINVAL; + strncpy(name,uaddr->sa_data,14); + name[14]=0; + + dev = dev_get(name); + if (dev) + return packet_do_bind(sk, dev, sk->num); + return -ENODEV; +} +#endif + +static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr; + struct sock *sk=sock->sk; + struct device *dev = NULL; + + /* + * Check legality + */ + + if (addr_len < sizeof(struct sockaddr_ll)) + return -EINVAL; + if (sll->sll_family != AF_PACKET) + return -EINVAL; + + if (sll->sll_ifindex) { + dev = dev_get_by_index(sll->sll_ifindex); + if (dev == NULL) + return -ENODEV; + } + return packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num); +} + + +/* + * Create a packet of type SOCK_PACKET. + */ + +static int packet_create(struct socket *sock, int protocol) +{ + struct sock *sk; + + if (!suser()) + return -EPERM; + if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW +#ifdef CONFIG_SOCK_PACKET + && sock->type != SOCK_PACKET +#endif + ) + return -ESOCKTNOSUPPORT; + + sock->state = SS_UNCONNECTED; + MOD_INC_USE_COUNT; + sk = sk_alloc(AF_PACKET, GFP_KERNEL); + if (sk == NULL) { + MOD_DEC_USE_COUNT; + return -ENOBUFS; + } + + sk->reuse = 1; +#ifdef CONFIG_SOCK_PACKET + if (sock->type == SOCK_PACKET) + sock->ops = &packet_ops_spkt; + else +#endif + sock->ops = &packet_ops; + sock_init_data(sock,sk); + + sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL); + if (sk->protinfo.af_packet == NULL) { + sk_free(sk); + MOD_DEC_USE_COUNT; + return -ENOBUFS; + } + memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt)); + sk->zapped=0; + sk->family = AF_PACKET; + sk->num = protocol; + + /* + * Attach a protocol block + */ + +#ifdef CONFIG_SOCK_PACKET + if (sock->type == SOCK_PACKET) + sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt; + else +#endif + sk->protinfo.af_packet->prot_hook.func = packet_rcv; + + sk->protinfo.af_packet->prot_hook.data = (void *)sk; + + if (protocol) { + sk->protinfo.af_packet->prot_hook.type = protocol; + dev_add_pack(&sk->protinfo.af_packet->prot_hook); + sk->protinfo.af_packet->running = 1; + } + + sklist_insert_socket(&packet_sklist, sk); + return(0); +} + +/* + * Pull a packet from our receive queue and hand it to the user. + * If necessary we block. + */ + +/* + * NOTE about lock_* & release_* primitives. + * I do not understand why skb_recv_datagram locks socket. + * My analysis shows that it is useless for datagram services: + * i.e. here, udp, raw and netlink. FIX ME if I am wrong, + * but lock&release are necessary only for SOCK_STREAM + * and, maybe, SOCK_SEQPACKET. + * --ANK + */ + +static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len, + int flags, struct scm_cookie *scm) +{ + struct sock *sk = sock->sk; + int copied=0; + struct sk_buff *skb; + int err; + +#if 0 + /* What error should we return now? EUNATTACH? */ + if (sk->protinfo.af_packet->ifindex < 0) + return -ENODEV; +#endif + + /* + * If the address length field is there to be filled in, we fill + * it in now. + */ + + if (sock->type == SOCK_PACKET) + msg->msg_namelen = sizeof(struct sockaddr_pkt); + else + msg->msg_namelen = sizeof(struct sockaddr_ll); + + /* + * Call the generic datagram receiver. This handles all sorts + * of horrible races and re-entrancy so we can forget about it + * in the protocol layers. + * + * Now it will return ENETDOWN, if device have just gone down, + * but then it will block. + */ + + skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err); + + /* + * An error occurred so return it. Because skb_recv_datagram() + * handles the blocking we don't see and worry about blocking + * retries. + */ + + if(skb==NULL) + return err; + + /* + * You lose any data beyond the buffer you gave. If it worries a + * user program they can ask the device for its MTU anyway. + */ + + copied = skb->len; + if(copied>len) + { + copied=len; + msg->msg_flags|=MSG_TRUNC; + } + + /* We can't use skb_copy_datagram here */ + err = memcpy_toiovec(msg->msg_iov, skb->data, copied); + if (err) + { + return -EFAULT; + } + + sk->stamp=skb->stamp; + + if (msg->msg_name) + memcpy(msg->msg_name, skb->cb, msg->msg_namelen); + + /* + * Free or return the buffer as appropriate. Again this hides all the + * races and re-entrancy issues from us. + */ + + skb_free_datagram(sk, skb); + + return(copied); +} + +#ifdef CONFIG_SOCK_PACKET +static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct device *dev; + struct sock *sk = sock->sk; + + if (peer) + return -EOPNOTSUPP; + + uaddr->sa_family = AF_PACKET; + dev = dev_get_by_index(sk->protinfo.af_packet->ifindex); + if (dev) + strncpy(uaddr->sa_data, dev->name, 15); + else + memset(uaddr->sa_data, 0, 14); + *uaddr_len = sizeof(*uaddr); + + return 0; +} +#endif + +static int packet_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct device *dev; + struct sock *sk = sock->sk; + struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr; + + if (peer) + return -EOPNOTSUPP; + + sll->sll_family = AF_PACKET; + sll->sll_ifindex = sk->protinfo.af_packet->ifindex; + sll->sll_protocol = sk->num; + dev = dev_get_by_index(sk->protinfo.af_packet->ifindex); + if (dev) { + sll->sll_hatype = dev->type; + sll->sll_halen = dev->addr_len; + memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len); + } else { + sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ + sll->sll_halen = 0; + } + *uaddr_len = sizeof(*sll); + + return 0; +} + +#ifdef CONFIG_PACKET_MULTICAST +static void packet_dev_mc(struct device *dev, struct packet_mclist *i, int what) +{ + switch (i->type) { + case PACKET_MR_MULTICAST: + if (what > 0) + dev_mc_add(dev, i->addr, i->alen, 0); + else + dev_mc_delete(dev, i->addr, i->alen, 0); + break; + case PACKET_MR_PROMISC: + dev_set_promiscuity(dev, what); + break; + case PACKET_MR_ALLMULTI: + dev_set_allmulti(dev, what); + break; + default: + } +} + +static void packet_dev_mclist(struct device *dev, struct packet_mclist *i, int what) +{ + for ( ; i; i=i->next) { + if (i->ifindex == dev->ifindex) + packet_dev_mc(dev, i, what); + } +} + +static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq) +{ + int err; + struct packet_mclist *ml, *i; + struct device *dev; + + rtnl_shlock(); + + dev = dev_get_by_index(mreq->mr_ifindex); + + i = NULL; + err = -ENODEV; + if (!dev) + goto done; + err = -EINVAL; + if (mreq->mr_alen > dev->addr_len) + goto done; + + i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL); + + for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) { + if (ml->ifindex == mreq->mr_ifindex && + ml->type == mreq->mr_type && + ml->alen == mreq->mr_alen && + memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { + ml->count++; + err = 0; + goto done; + } + } + err = -ENOBUFS; + if (i == NULL) + goto done; + i->type = mreq->mr_type; + i->ifindex = mreq->mr_ifindex; + i->alen = mreq->mr_alen; + memcpy(i->addr, mreq->mr_address, i->alen); + i->count = 1; + i->next = sk->protinfo.af_packet->mclist; + sk->protinfo.af_packet->mclist = i; + packet_dev_mc(dev, i, +1); + i = NULL; + err = 0; + +done: + rtnl_shunlock(); + if (i) + kfree(i); + return err; +} + +static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq) +{ + struct packet_mclist *ml, **mlp; + + for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) { + if (ml->ifindex == mreq->mr_ifindex && + ml->type == mreq->mr_type && + ml->alen == mreq->mr_alen && + memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { + if (--ml->count == 0) { + struct device *dev; + *mlp = ml->next; + dev = dev_get_by_index(ml->ifindex); + if (dev) + packet_dev_mc(dev, ml, -1); + kfree_s(ml, sizeof(*ml)); + } + return 0; + } + } + return -EADDRNOTAVAIL; +} + +static void packet_flush_mclist(struct sock *sk) +{ + struct packet_mclist *ml; + + while ((ml=sk->protinfo.af_packet->mclist) != NULL) { + struct device *dev; + sk->protinfo.af_packet->mclist = ml->next; + if ((dev = dev_get_by_index(ml->ifindex)) != NULL) + packet_dev_mc(dev, ml, -1); + kfree_s(ml, sizeof(*ml)); + } +} + +static int +packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) +{ + struct sock *sk = sock->sk; + struct packet_mreq mreq; + + if (level != SOL_PACKET) + return -ENOPROTOOPT; + + switch(optname) { + case PACKET_ADD_MEMBERSHIP: + case PACKET_DROP_MEMBERSHIP: + + if (optlennext) { + po = sk->protinfo.af_packet; + + switch (msg) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + if (dev->ifindex == po->ifindex) { + if (po->running) { + dev_remove_pack(&po->prot_hook); + po->running = 0; + sk->err = ENETDOWN; + sk->error_report(sk); + } + if (msg == NETDEV_UNREGISTER) { + po->ifindex = -1; + po->prot_hook.dev = NULL; + } + } +#ifdef CONFIG_PACKET_MULTICAST + if (po->mclist) + packet_dev_mclist(dev, po->mclist, -1); +#endif + break; + case NETDEV_UP: + if (dev->ifindex == po->ifindex && sk->num && po->running==0) { + dev_add_pack(&po->prot_hook); + po->running = 1; + } +#ifdef CONFIG_PACKET_MULTICAST + if (po->mclist) + packet_dev_mclist(dev, po->mclist, +1); +#endif + break; + } + } + return NOTIFY_DONE; +} + + +static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + int err; + int pid; + + switch(cmd) + { + case FIOSETOWN: + case SIOCSPGRP: + err = get_user(pid, (int *) arg); + if (err) + return err; + if (current->pid != pid && current->pgrp != -pid && !suser()) + return -EPERM; + sk->proc = pid; + return(0); + case FIOGETOWN: + case SIOCGPGRP: + return put_user(sk->proc, (int *)arg); + return(0); + case SIOCGSTAMP: + if(sk->stamp.tv_sec==0) + return -ENOENT; + err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval)); + if (err) + err = -EFAULT; + return err; + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCGIFCONF: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + case SIOCGIFMEM: + case SIOCSIFMEM: + case SIOCGIFMTU: + case SIOCSIFMTU: + case SIOCSIFLINK: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case SIOCSIFMAP: + case SIOCGIFMAP: + case SIOCSIFSLAVE: + case SIOCGIFSLAVE: + case SIOCGIFINDEX: + case SIOCGIFNAME: + case SIOCGIFCOUNT: + case SIOCSIFHWBROADCAST: + return(dev_ioctl(cmd,(void *) arg)); + + case SIOCGIFBR: + case SIOCSIFBR: +#ifdef CONFIG_BRIDGE + return(br_ioctl(cmd,(void *) arg)); +#else + return -ENOPKG; +#endif + + case SIOCADDDLCI: + case SIOCDELDLCI: +#ifdef CONFIG_DLCI + return(dlci_ioctl(cmd, (void *) arg)); +#endif + +#ifdef CONFIG_DLCI_MODULE + +#ifdef CONFIG_KERNELD + if (dlci_ioctl_hook == NULL) + request_module("dlci"); +#endif + + if (dlci_ioctl_hook) + return((*dlci_ioctl_hook)(cmd, (void *) arg)); +#endif + return -ENOPKG; + + default: + if ((cmd >= SIOCDEVPRIVATE) && + (cmd <= (SIOCDEVPRIVATE + 15))) + return(dev_ioctl(cmd,(void *) arg)); + +#ifdef CONFIG_NET_RADIO + if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST)) + return(dev_ioctl(cmd,(void *) arg)); +#endif + return -EOPNOTSUPP; + } + /*NOTREACHED*/ + return(0); +} + +#ifdef CONFIG_SOCK_PACKET +struct proto_ops packet_ops_spkt = { + AF_PACKET, + + sock_no_dup, + packet_release, + packet_bind_spkt, + sock_no_connect, + NULL, + NULL, + packet_getname_spkt, + datagram_poll, + packet_ioctl, + sock_no_listen, + sock_no_shutdown, + sock_no_setsockopt, + sock_no_getsockopt, + sock_no_fcntl, + packet_sendmsg_spkt, + packet_recvmsg +}; +#endif + +struct proto_ops packet_ops = { + AF_PACKET, + + sock_no_dup, + packet_release, + packet_bind, + sock_no_connect, + NULL, + NULL, + packet_getname, + datagram_poll, + packet_ioctl, + sock_no_listen, + sock_no_shutdown, +#ifdef CONFIG_PACKET_MULTICAST + packet_setsockopt, +#else + sock_no_setsockopt, +#endif + sock_no_getsockopt, + sock_no_fcntl, + packet_sendmsg, + packet_recvmsg +}; + +static struct net_proto_family packet_family_ops = { + AF_PACKET, + packet_create +}; + +struct notifier_block packet_netdev_notifier={ + packet_notifier, + NULL, + 0 +}; + + +#ifdef MODULE +void cleanup_module(void) +{ + unregister_netdevice_notifier(&packet_netdev_notifier); + sock_unregister(packet_family_ops.family); + return; +} + + +int init_module(void) +#else +__initfunc(void packet_proto_init(struct net_proto *pro)) +#endif +{ + sock_register(&packet_family_ops); + register_netdevice_notifier(&packet_netdev_notifier); +#ifdef MODULE + return 0; +#endif +} diff -u --recursive --new-file v2.1.67/linux/net/protocols.c linux/net/protocols.c --- v2.1.67/linux/net/protocols.c Mon Jun 16 16:36:02 1997 +++ linux/net/protocols.c Sun Nov 30 14:00:40 1997 @@ -10,8 +10,6 @@ #include #include -#define CONFIG_UNIX /* always present... */ - #ifdef CONFIG_UNIX #include #include @@ -24,6 +22,14 @@ #endif #endif /* INET */ +#ifdef CONFIG_NETLINK +extern void netlink_proto_init(struct net_proto *pro); +#endif + +#ifdef CONFIG_PACKET +extern void packet_proto_init(struct net_proto *pro); +#endif + #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE) #define NEED_802 #include @@ -61,6 +67,10 @@ #include #endif +#if defined(CONFIG_LLC) +#define NEED_LLC +#endif + #include #ifdef CONFIG_TR @@ -84,6 +94,14 @@ */ struct net_proto protocols[] = { +#ifdef CONFIG_NETLINK + { "NETLINK", netlink_proto_init }, +#endif + +#ifdef CONFIG_PACKET + { "PACKET", packet_proto_init }, +#endif + #ifdef CONFIG_UNIX { "UNIX", unix_proto_init }, /* Unix domain socket family */ #endif diff -u --recursive --new-file v2.1.67/linux/net/rose/af_rose.c linux/net/rose/af_rose.c --- v2.1.67/linux/net/rose/af_rose.c Wed Sep 24 20:05:48 1997 +++ linux/net/rose/af_rose.c Sun Nov 30 14:00:40 1997 @@ -548,6 +548,8 @@ sock_init_data(sock, sk); + skb_queue_head_init(&rose->ack_queue); + sock->ops = &rose_proto_ops; sk->protocol = protocol; sk->mtu = ROSE_MTU; /* 253 */ @@ -555,8 +557,6 @@ init_timer(&rose->timer); init_timer(&rose->idletimer); - skb_queue_head_init(&rose->frag_queue); - rose->t1 = sysctl_rose_call_request_timeout; rose->t2 = sysctl_rose_reset_request_timeout; rose->t3 = sysctl_rose_clear_request_timeout; @@ -583,6 +583,8 @@ sock_init_data(NULL, sk); + skb_queue_head_init(&rose->ack_queue); + sk->type = osk->type; sk->socket = osk->socket; sk->priority = osk->priority; @@ -598,8 +600,6 @@ init_timer(&rose->timer); init_timer(&rose->idletimer); - skb_queue_head_init(&rose->frag_queue); - rose->t1 = osk->protinfo.rose->t1; rose->t2 = osk->protinfo.rose->t2; rose->t3 = osk->protinfo.rose->t3; @@ -1068,7 +1068,9 @@ return -ENOTCONN; } - rose_output(sk, skb); /* Shove it onto the queue */ + skb_queue_tail(&sk->write_queue, skb); /* Shove it onto the queue */ + + rose_kick(sk); return len; } @@ -1210,7 +1212,7 @@ return 0; } - case SIOCRSL2CALL: + case SIOCRSSL2CALL: if (!suser()) return -EPERM; if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) ax25_listen_release(&rose_callsign, NULL); @@ -1218,6 +1220,11 @@ return -EFAULT; if (ax25cmp(&rose_callsign, &null_ax25_address) != 0) ax25_listen_register(&rose_callsign, NULL); + return 0; + + case SIOCRSGL2CALL: + if (copy_to_user((void *)arg, &rose_callsign, sizeof(ax25_address))) + return -EFAULT; return 0; case SIOCRSACCEPT: diff -u --recursive --new-file v2.1.67/linux/net/rose/rose_dev.c linux/net/rose/rose_dev.c --- v2.1.67/linux/net/rose/rose_dev.c Mon Jul 7 08:20:00 1997 +++ linux/net/rose/rose_dev.c Sun Nov 30 14:00:40 1997 @@ -221,14 +221,6 @@ /* New-style flags. */ dev->flags = 0; - dev->family = AF_INET; - -#ifdef CONFIG_INET - dev->pa_addr = in_aton("192.168.0.1"); - dev->pa_brdaddr = in_aton("192.168.0.255"); - dev->pa_mask = in_aton("255.255.255.0"); - dev->pa_alen = 4; -#endif if ((dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL)) == NULL) return -ENOMEM; diff -u --recursive --new-file v2.1.67/linux/net/rose/rose_in.c linux/net/rose/rose_in.c --- v2.1.67/linux/net/rose/rose_in.c Mon Jul 7 08:20:00 1997 +++ linux/net/rose/rose_in.c Sun Nov 30 14:00:40 1997 @@ -19,6 +19,7 @@ * ROSE 001 Jonathan(G4KLX) Cloned from nr_in.c * ROSE 002 Jonathan(G4KLX) Return cause and diagnostic codes from Clear Requests. * ROSE 003 Jonathan(G4KLX) New timer architecture. + * Removed M bit processing. */ #include @@ -46,43 +47,6 @@ #include #include -static int rose_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) -{ - struct sk_buff *skbo, *skbn = skb; - - rose_start_idletimer(sk); - - if (more) { - sk->protinfo.rose->fraglen += skb->len; - skb_queue_tail(&sk->protinfo.rose->frag_queue, skb); - return 0; - } - - if (!more && sk->protinfo.rose->fraglen > 0) { /* End of fragment */ - sk->protinfo.rose->fraglen += skb->len; - skb_queue_tail(&sk->protinfo.rose->frag_queue, skb); - - if ((skbn = alloc_skb(sk->protinfo.rose->fraglen, GFP_ATOMIC)) == NULL) - return 1; - - skbn->h.raw = skbn->data; - - skbo = skb_dequeue(&sk->protinfo.rose->frag_queue); - memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len); - kfree_skb(skbo, FREE_READ); - - while ((skbo = skb_dequeue(&sk->protinfo.rose->frag_queue)) != NULL) { - skb_pull(skbo, ROSE_MIN_LEN); - memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len); - kfree_skb(skbo, FREE_READ); - } - - sk->protinfo.rose->fraglen = 0; - } - - return sock_queue_rcv_skb(sk, skbn); -} - /* * State machine for state 1, Awaiting Call Accepted State. * The handling of the timer(s) is in file rose_timer.c. @@ -166,6 +130,7 @@ sk->protinfo.rose->vr = 0; sk->protinfo.rose->va = 0; sk->protinfo.rose->vl = 0; + rose_requeue_frames(sk); break; case ROSE_CLEAR_REQUEST: @@ -191,11 +156,9 @@ rose_start_t2timer(sk); rose_stop_idletimer(sk); } else { - if (sk->protinfo.rose->condition & ROSE_COND_PEER_RX_BUSY) { - sk->protinfo.rose->va = nr; - } else { - rose_check_iframes_acked(sk, nr); - } + rose_frames_acked(sk, nr); + if (frametype == ROSE_RNR) + rose_requeue_frames(sk); } break; @@ -213,15 +176,12 @@ rose_stop_idletimer(sk); break; } - if (sk->protinfo.rose->condition & ROSE_COND_PEER_RX_BUSY) { - sk->protinfo.rose->va = nr; - } else { - rose_check_iframes_acked(sk, nr); - } + rose_frames_acked(sk, nr); if (sk->protinfo.rose->condition & ROSE_COND_OWN_RX_BUSY) break; if (ns == sk->protinfo.rose->vr) { - if (rose_queue_rx_frame(sk, skb, m) == 0) { + rose_start_idletimer(sk); + if (sock_queue_rcv_skb(sk, skb) == 0) { sk->protinfo.rose->vr = (sk->protinfo.rose->vr + 1) % ROSE_MODULUS; queued = 1; } else { @@ -270,6 +230,7 @@ sk->protinfo.rose->vs = 0; sk->protinfo.rose->vl = 0; sk->protinfo.rose->state = ROSE_STATE_3; + rose_requeue_frames(sk); break; case ROSE_CLEAR_REQUEST: diff -u --recursive --new-file v2.1.67/linux/net/rose/rose_link.c linux/net/rose/rose_link.c --- v2.1.67/linux/net/rose/rose_link.c Mon Jul 7 08:20:00 1997 +++ linux/net/rose/rose_link.c Sun Nov 30 14:00:40 1997 @@ -113,7 +113,7 @@ else rose_call = &rose_callsign; - neigh->ax25 = ax25_send_frame(skb, 256, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); + neigh->ax25 = ax25_send_frame(skb, 0, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev); return (neigh->ax25 != NULL); } diff -u --recursive --new-file v2.1.67/linux/net/rose/rose_out.c linux/net/rose/rose_out.c --- v2.1.67/linux/net/rose/rose_out.c Mon Jul 7 08:20:00 1997 +++ linux/net/rose/rose_out.c Sun Nov 30 14:00:40 1997 @@ -12,6 +12,7 @@ * History * ROSE 001 Jonathan(G4KLX) Cloned from nr_out.c * ROSE 003 Jonathan(G4KLX) New timer architecture. + * Removed M bit processing. */ #include @@ -38,52 +39,6 @@ #include #include -/* - * This is where all ROSE frames pass; - */ -void rose_output(struct sock *sk, struct sk_buff *skb) -{ - struct sk_buff *skbn; - unsigned char header[ROSE_MIN_LEN]; - int err, frontlen, len; - - if (skb->len - ROSE_MIN_LEN > ROSE_MAX_PACKET_SIZE) { - /* Save a copy of the Header */ - memcpy(header, skb->data, ROSE_MIN_LEN); - skb_pull(skb, ROSE_MIN_LEN); - - frontlen = skb_headroom(skb); - - while (skb->len > 0) { - if ((skbn = sock_alloc_send_skb(sk, frontlen + ROSE_MAX_PACKET_SIZE, 0, 0, &err)) == NULL) - return; - - skb_reserve(skbn, frontlen); - - len = (ROSE_MAX_PACKET_SIZE > skb->len) ? skb->len : ROSE_MAX_PACKET_SIZE; - - /* Copy the user data */ - memcpy(skb_put(skbn, len), skb->data, len); - skb_pull(skb, len); - - /* Duplicate the Header */ - skb_push(skbn, ROSE_MIN_LEN); - memcpy(skbn->data, header, ROSE_MIN_LEN); - - if (skb->len > 0) - skbn->data[2] |= ROSE_M_BIT; - - skb_queue_tail(&sk->write_queue, skbn); /* Throw it on the queue */ - } - - kfree_skb(skb, FREE_WRITE); - } else { - skb_queue_tail(&sk->write_queue, skb); /* Throw it on the queue */ - } - - rose_kick(sk); -} - /* * This procedure is passed a buffer descriptor for an iframe. It builds * the rest of the control part of the frame and then writes it out. @@ -103,8 +58,8 @@ void rose_kick(struct sock *sk) { - struct sk_buff *skb; - unsigned short end; + struct sk_buff *skb, *skbn; + unsigned short start, end; if (sk->protinfo.rose->state != ROSE_STATE_3) return; @@ -115,11 +70,14 @@ if (skb_peek(&sk->write_queue) == NULL) return; - end = (sk->protinfo.rose->va + sysctl_rose_window_size) % ROSE_MODULUS; + start = (skb_peek(&sk->protinfo.rose->ack_queue) == NULL) ? sk->protinfo.rose->va : sk->protinfo.rose->vs; + end = (sk->protinfo.rose->va + sysctl_rose_window_size) % ROSE_MODULUS; - if (sk->protinfo.rose->vs == end) + if (start == end) return; + sk->protinfo.rose->vs = start; + /* * Transmit data until either we're out of data to send or * the window is full. @@ -128,13 +86,25 @@ skb = skb_dequeue(&sk->write_queue); do { + if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + skb_queue_head(&sk->write_queue, skb); + break; + } + + skb_set_owner_w(skbn, sk); + /* - * Transmit the frame. + * Transmit the frame copy. */ - rose_send_iframe(sk, skb); + rose_send_iframe(sk, skbn); sk->protinfo.rose->vs = (sk->protinfo.rose->vs + 1) % ROSE_MODULUS; + /* + * Requeue the original data frame. + */ + skb_queue_tail(&sk->protinfo.rose->ack_queue, skb); + } while (sk->protinfo.rose->vs != end && (skb = skb_dequeue(&sk->write_queue)) != NULL); sk->protinfo.rose->vl = sk->protinfo.rose->vr; @@ -159,16 +129,6 @@ sk->protinfo.rose->condition &= ~ROSE_COND_ACK_PENDING; rose_stop_timer(sk); -} - -void rose_check_iframes_acked(struct sock *sk, unsigned short nr) -{ - if (sk->protinfo.rose->vs == nr) { - sk->protinfo.rose->va = nr; - } else { - if (sk->protinfo.rose->va != nr) - sk->protinfo.rose->va = nr; - } } #endif diff -u --recursive --new-file v2.1.67/linux/net/rose/rose_route.c linux/net/rose/rose_route.c --- v2.1.67/linux/net/rose/rose_route.c Mon Jul 7 08:20:00 1997 +++ linux/net/rose/rose_route.c Sun Nov 30 14:00:40 1997 @@ -63,7 +63,7 @@ */ static int rose_add_node(struct rose_route_struct *rose_route, struct device *dev) { - struct rose_node *rose_node, *rose_tmpn, *rose_tmpp; + struct rose_node *rose_node; struct rose_neigh *rose_neigh; unsigned long flags; int i; @@ -116,55 +116,18 @@ restore_flags(flags); } - /* - * This is a new node to be inserted into the list. Find where it needs - * to be inserted into the list, and insert it. We want to be sure - * to order the list in descending order of mask size to ensure that - * later when we are searching this list the first match will be the - * best match. - */ if (rose_node == NULL) { - rose_tmpn = rose_node_list; - rose_tmpp = NULL; - - while (rose_tmpn != NULL) { - if (rose_tmpn->mask > rose_route->mask) { - rose_tmpp = rose_tmpn; - rose_tmpn = rose_tmpn->next; - } else { - break; - } - } - - /* create new node */ if ((rose_node = kmalloc(sizeof(*rose_node), GFP_ATOMIC)) == NULL) return -ENOMEM; - rose_node->address = rose_route->address; - rose_node->mask = rose_route->mask; - rose_node->count = 1; + rose_node->address = rose_route->address; + rose_node->mask = rose_route->mask; + rose_node->count = 1; rose_node->neighbour[0] = rose_neigh; save_flags(flags); cli(); - - if (rose_tmpn == NULL) { - if (rose_tmpp == NULL) { /* Empty list */ - rose_node_list = rose_node; - rose_node->next = NULL; - } else { - rose_tmpp->next = rose_node; - rose_node->next = NULL; - } - } else { - if (rose_tmpp == NULL) { /* 1st node */ - rose_node->next = rose_node_list; - rose_node_list = rose_node; - } else { - rose_tmpp->next = rose_node; - rose_node->next = rose_tmpn; - } - } - + rose_node->next = rose_node_list; + rose_node_list = rose_node; restore_flags(flags); rose_neigh->count++; @@ -487,19 +450,28 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, unsigned char *diagnostic) { struct rose_node *node; + struct rose_neigh *neigh; int failed = 0; + int mask = 0; int i; - for (node = rose_node_list; node != NULL; node = node->next) { + for (neigh = NULL, node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { - for (i = 0; i < node->count; i++) { - if (!rose_ftimer_running(node->neighbour[i])) - return node->neighbour[i]; - else - failed = 1; + if (node->mask > mask) { + mask = node->mask; + + for (i = 0; i < node->count; i++) { + if (!rose_ftimer_running(node->neighbour[i])) + neigh = node->neighbour[i]; + else + failed = 1; + } } } } + + if (neigh != NULL) + return neigh; if (failed) { *cause = ROSE_OUT_OF_ORDER; diff -u --recursive --new-file v2.1.67/linux/net/rose/rose_subr.c linux/net/rose/rose_subr.c --- v2.1.67/linux/net/rose/rose_subr.c Mon Jul 7 08:20:00 1997 +++ linux/net/rose/rose_subr.c Sun Nov 30 14:00:40 1997 @@ -49,8 +49,47 @@ while ((skb = skb_dequeue(&sk->write_queue)) != NULL) kfree_skb(skb, FREE_WRITE); - while ((skb = skb_dequeue(&sk->protinfo.rose->frag_queue)) != NULL) - kfree_skb(skb, FREE_READ); + while ((skb = skb_dequeue(&sk->protinfo.rose->ack_queue)) != NULL) + kfree_skb(skb, FREE_WRITE); +} + +/* + * This routine purges the input queue of those frames that have been + * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the + * SDL diagram. + */ +void rose_frames_acked(struct sock *sk, unsigned short nr) +{ + struct sk_buff *skb; + + /* + * Remove all the ack-ed frames from the ack queue. + */ + if (sk->protinfo.rose->va != nr) { + while (skb_peek(&sk->protinfo.rose->ack_queue) != NULL && sk->protinfo.rose->va != nr) { + skb = skb_dequeue(&sk->protinfo.rose->ack_queue); + kfree_skb(skb, FREE_WRITE); + sk->protinfo.rose->va = (sk->protinfo.rose->va + 1) % ROSE_MODULUS; + } + } +} + +void rose_requeue_frames(struct sock *sk) +{ + struct sk_buff *skb, *skb_prev = NULL; + + /* + * Requeue all the un-ack-ed frames on the output queue to be picked + * up by rose_kick. This arrangement handles the possibility of an + * empty output queue. + */ + while ((skb = skb_dequeue(&sk->protinfo.rose->ack_queue)) != NULL) { + if (skb_prev == NULL) + skb_queue_head(&sk->write_queue, skb); + else + skb_append(skb_prev, skb); + skb_prev = skb; + } } /* diff -u --recursive --new-file v2.1.67/linux/net/sched/Makefile linux/net/sched/Makefile --- v2.1.67/linux/net/sched/Makefile Wed Dec 31 16:00:00 1969 +++ linux/net/sched/Makefile Sun Nov 30 14:00:40 1997 @@ -0,0 +1,71 @@ +# +# Makefile for the Linux Traffic Control Unit. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := sched.o + +O_OBJS := sch_generic.o + +ifeq ($(CONFIG_NET_SCH_CBQ), y) +O_OBJS += sch_cbq.o +else + ifeq ($(CONFIG_NET_SCH_CBQ), m) + M_OBJS += sch_cbq.o + endif +endif + +ifeq ($(CONFIG_NET_SCH_CSZ), y) +O_OBJS += sch_csz.o +else + ifeq ($(CONFIG_NET_SCH_CSZ), m) + M_OBJS += sch_csz.o + endif +endif + +ifeq ($(CONFIG_NET_SCH_SFQ), y) +O_OBJS += sch_sfq.o +else + ifeq ($(CONFIG_NET_SCH_SFQ), m) + M_OBJS += sch_sfq.o + endif +endif + +ifeq ($(CONFIG_NET_SCH_RED), y) +O_OBJS += sch_red.o +else + ifeq ($(CONFIG_NET_SCH_RED), m) + M_OBJS += sch_red.o + endif +endif + +ifeq ($(CONFIG_NET_SCH_TBF), y) +O_OBJS += sch_tbf.o +else + ifeq ($(CONFIG_NET_SCH_TBF), m) + M_OBJS += sch_tbf.o + endif +endif + + +ifeq ($(CONFIG_NET_SCH_PFIFO), y) +O_OBJS += sch_fifo.o +else + ifeq ($(CONFIG_NET_SCH_PFIFO), m) + M_OBJS += sch_fifo.o + endif +endif + +ifeq ($(CONFIG_NET_SCH_PRIO), y) +O_OBJS += sch_prio.o +else + ifeq ($(CONFIG_NET_SCH_PRIO), m) + M_OBJS += sch_prio.o + endif +endif + +include $(TOPDIR)/Rules.make diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_cbq.c linux/net/sched/sch_cbq.c --- v2.1.67/linux/net/sched/sch_cbq.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_cbq.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,839 @@ +/* + * net/sched/sch_cbq.c Class-Based Queueing discipline. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Class-Based Queueing (CBQ) algorithm. + ======================================= + + Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource + Management Models for Packet Networks", + IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 + + [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995 + + [3] Sally Floyd, "Notes on Class-Based Queueing: Setting + Parameters", 1996 + + Algorithm skeleton is taken from from NS simulator cbq.cc. + + ----------------------------------------------------------------------- + + Differences from NS version. + + --- WRR algorith is different. Our version looks more reasonable :-) + and fair when quanta are allowed to be less than MTU. + + --- cl->aveidle is REALLY limited from below by cl->minidle. + Seems, it was bug in NS. + + --- Purely lexical change: "depth" -> "level", "maxdepth" -> "toplevel". + When depth increases we expect, that the thing becomes lower, does not it? :-) + Besides that, "depth" word is semantically overloaded --- + "token bucket depth", "sfq depth"... Besides that, the algorithm + was called "top-LEVEL sharing". + + PROBLEM. + + --- Linux has no EOI event at the moment, so that we cannot + estimate true class idle time. Three workarounds are possible, + all of them have drawbacks: + + 1. (as now) Consider the next dequeue event as sign that + previous packet is finished. It is wrong because of ping-pong + buffers, but on permanently loaded link it is true. + 2. (NS approach) Use as link busy time estimate skb->leb/"physical + bandwidth". Even more wrong f.e. on ethernet real busy time much + higher because of collisions. + 3. (seems, the most clever) Split net bh to two parts: + NETRX_BH (for received packets) and preserve NET_BH for transmitter. + It will not require driver changes (NETRX_BH flag will be set + in netif_rx), but will allow to trace EOIs more precisely + and will save useless checks in net_bh. Besides that we will + have to eliminate random calling hard_start_xmit with dev->tbusy flag + (done) and to drop failure_q --- i.e. if !dev->tbusy hard_start_xmit + MUST succeed; failed packets will be dropped on the floor. +*/ + +#define CBQ_TOPLEVEL_SHARING +/* #define CBQ_NO_TRICKERY */ + +#define CBQ_CLASSIFIER(skb, q) ((q)->fallback_class) + +struct cbq_class +{ +/* Parameters */ + int priority; /* priority */ +#ifdef CBQ_TOPLEVEL_SHARING + int level; /* level of the class in hierarchy: + 0 for leaf classes, and maximal + level of childrens + 1 for nodes. + */ +#endif + + long maxidle; /* Class paramters: see below. */ + long minidle; + int filter_log; +#ifndef CBQ_NO_TRICKERY + long extradelay; +#endif + + long quantum; /* Allotment per WRR round */ + long rquantum; /* Relative allotment: see below */ + + int cell_log; + unsigned long L_tab[256]; + + struct Qdisc *qdisc; /* ptr to CBQ discipline */ + struct cbq_class *root; /* Ptr to root class; + root can be not unique. + */ + struct cbq_class *parent; /* Ptr to parent in the class tree */ + struct cbq_class *borrow; /* NULL if class is bandwidth limited; + parent otherwise */ + + struct Qdisc *q; /* Elementary queueing discipline */ + struct cbq_class *next; /* next class in this priority band */ + + struct cbq_class *next_alive; /* next class with backlog in this priority band */ + +/* Variables */ + psched_time_t last; + psched_time_t undertime; + long avgidle; + long deficit; /* Saved deficit for WRR */ + char awake; /* Class is in alive list */ + +#if 0 + void (*overlimit)(struct cbq_class *cl); +#endif +}; + +#define L2T(cl,len) ((cl)->L_tab[(len)>>(cl)->cell_log]) + +struct cbq_sched_data +{ + struct cbq_class *classes[CBQ_MAXPRIO]; /* List of all classes */ + int nclasses[CBQ_MAXPRIO]; + unsigned quanta[CBQ_MAXPRIO]; + unsigned mtu; + int cell_log; + unsigned long L_tab[256]; + struct cbq_class *fallback_class; + + unsigned activemask; + struct cbq_class *active[CBQ_MAXPRIO]; /* List of all classes + with backlog */ + struct cbq_class *last_sent; + int last_sent_len; + + psched_time_t now; /* Cached timestamp */ + + struct timer_list wd_timer; /* Wathchdog timer, that + started when CBQ has + backlog, but cannot + transmit just now */ + unsigned long wd_expires; +#ifdef CBQ_TOPLEVEL_SHARING + struct cbq_class *borrowed; + int toplevel; +#endif +}; + +/* + WRR quanta + ---------- + + cl->quantum is number added to class allotment on every round. + cl->rquantum is "relative" quantum. + + For real-time classes: + + cl->quantum = (cl->rquantum*q->nclasses[prio]*q->mtu)/q->quanta[prio] + + where q->quanta[prio] is sum of all rquanta for given priority. + cl->rquantum can be identified with absolute rate of the class + in arbitrary units (f.e. bytes/sec) + + In this case, delay introduced by round-robin was estimated by + Sally Floyd [2] as: + + D = q->nclasses*q->mtu/(bandwidth/2) + + Note, that D does not depend on class rate (it is very bad), + but not much worse than Gallager-Parekh estimate for CSZ + C/R = q->mtu/rate, when real-time classes have close rates. + + For not real-time classes this folmula is not necessary, + so that cl->quantum can be set to any reasonable not zero value. + Apparently, it should be proportional to class rate, if the + rate is not zero. +*/ + +/* + maxidle, minidle, extradelay + ---------------------------- + + CBQ estimator calculates smoothed class idle time cl->aveidle, + considering class as virtual interface with corresponding bandwidth. + When cl->aveidle wants to be less than zero, class is overlimit. + When it is positive, class is underlimit. + + * maxidle bounds aveidle from above. + It controls maximal length of burst in this class after + long period of idle time. Burstness of active class + is controlled by filter constant cl->filter_log, + but this number is related to burst length only indirectly. + + * minidle is a negative number, normally set to zero. + Setting it to not zero value allows avgidle to drop + below zero, effectively penalizing class, when it is overlimit. + When the class load will decrease, it will take a time to + raise negative avgidle to put the class at limit. + It should be set to zero for leaf classes. + + * extradelay is penalty in delay, when a class goes overlimit. + I believe this parameter is useless and confusing. + Setting it to not zero forces class to accumulate + its "idleness" for extradelay and then send BURST of packets + until going to overlimit again. Non-sense. + + For details see [1] and [3]. + + Really, minidle and extradelay are irrelevant to real scheduling + task. As I understand, SF&VJ introduced them to experiment + with CBQ simulator in attempts to fix erratic behaviour + of ancestor-only (and, partially, top-level) algorithm. + + WARNING. + + User passes them measured in usecs, but cl->minidle, + cl->maxidle and cl->aveidle are scaled with cl->filter_log + in the text of the scheduler. +*/ + +/* + A packet has just been enqueued on the empty class. + cbq_wakeup_class adds it to the tail of active class list + of its priority band. + */ + +static __inline__ void cbq_wakeup_class(struct cbq_class *cl) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; + int prio = cl->priority; + struct cbq_class *cl_tail; + + cl->awake = 1; + + cl_tail = q->active[prio]; + q->active[prio] = cl; + + if (cl_tail != NULL) { + cl->next_alive = cl_tail->next_alive; + cl->deficit = 0; + } else { + cl->next_alive = cl; + q->activemask |= (1<deficit = cl->quantum; + } +} + +static int +cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl = CBQ_CLASSIFIER(skb, q); + + if (cl->q->enqueue(skb, cl->q) == 1) { + sch->q.qlen++; + +#ifdef CBQ_TOPLEVEL_SHARING + if (q->toplevel > 0) { + psched_time_t now; + PSCHED_GET_TIME(now); + if (PSCHED_TLESS(cl->undertime, now)) + q->toplevel = 0; + else if (q->toplevel > 1 && cl->borrow && + PSCHED_TLESS(cl->borrow->undertime, now)) + q->toplevel = 1; + } +#endif + if (!cl->awake) + cbq_wakeup_class(cl); + return 1; + } + return 0; +} + +static __inline__ void cbq_delay(struct cbq_sched_data *q, struct cbq_class *cl) +{ + long delay; + + delay = PSCHED_TDIFF(cl->undertime, q->now); + if (q->wd_expires == 0 || q->wd_expires - delay > 0) + q->wd_expires = delay; +} + +static void cbq_watchdog(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc*)arg; + struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data; + + q->wd_timer.expires = 0; + q->wd_timer.function = NULL; + qdisc_wakeup(sch->dev); +} + +static __inline__ void +cbq_update(struct cbq_sched_data *q) +{ + struct cbq_class *cl; + + for (cl = q->last_sent; cl; cl = cl->parent) { + long avgidle = cl->avgidle; + long idle; + + /* + (now - last) is total time between packet right edges. + (last_pktlen/rate) is "virtual" busy time, so that + + idle = (now - last) - last_pktlen/rate + */ + + idle = PSCHED_TDIFF(q->now, cl->last) + - L2T(cl, q->last_sent_len); + + /* true_avgidle := (1-W)*true_avgidle + W*idle, + where W=2^{-filter_log}. But cl->avgidle is scaled: + cl->avgidle == true_avgidle/W, + hence: + */ + avgidle += idle - (avgidle>>cl->filter_log); + + if (avgidle <= 0) { + /* Overlimit or at-limit */ +#ifdef CBQ_NO_TRICKERY + avgidle = 0; +#else + if (avgidle < cl->minidle) + avgidle = cl->minidle; +#endif + + /* This line was missing in NS. */ + cl->avgidle = avgidle; + + /* Calculate expected time, when this class + will be allowed to send. + It will occur, when: + (1-W)*true_avgidle + W*delay = 0, i.e. + idle = (1/W - 1)*(-true_avgidle) + or + idle = (1 - W)*(-cl->avgidle); + + That is not all. + We want to set undertime to the moment, when + the class is allowed to start next transmission i.e. + (undertime + next_pktlen/phys_bandwidth) + - now - next_pktlen/rate = idle + or + undertime = now + idle + next_pktlen/rate + - next_pktlen/phys_bandwidth + + We do not know next packet length, but can + estimate it with average packet length + or current packet_length. + */ + + idle = (-avgidle) - ((-avgidle) >> cl->filter_log); + idle += L2T(q, q->last_sent_len); + idle -= L2T(cl, q->last_sent_len); + PSCHED_TADD2(q->now, idle, cl->undertime); +#ifndef CBQ_NO_TRICKERY + /* Do not forget extra delay :-) */ + PSCHED_TADD(cl->undertime, cl->extradelay); +#endif + } else { + /* Underlimit */ + + PSCHED_SET_PASTPERFECT(cl->undertime); + if (avgidle > cl->maxidle) + cl->avgidle = cl->maxidle; + else + cl->avgidle = avgidle; + } + cl->last = q->now; + } + +#ifdef CBQ_TOPLEVEL_SHARING + cl = q->last_sent; + + if (q->borrowed && q->toplevel >= q->borrowed->level) { + if (cl->q->q.qlen <= 1 || PSCHED_TLESS(q->now, q->borrowed->undertime)) + q->toplevel = CBQ_MAXLEVEL; + else if (q->borrowed != cl) + q->toplevel = q->borrowed->level; + } +#endif + + q->last_sent = NULL; +} + +static __inline__ int +cbq_under_limit(struct cbq_class *cl) +{ + struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data; + struct cbq_class *this_cl = cl; + + if (PSCHED_IS_PASTPERFECT(cl->undertime) || cl->parent == NULL) + return 1; + + if (PSCHED_TLESS(cl->undertime, q->now)) { + q->borrowed = cl; + return 1; + } + + while (!PSCHED_IS_PASTPERFECT(cl->undertime) && + PSCHED_TLESS(q->now, cl->undertime)) { + cl = cl->borrow; + if (cl == NULL +#ifdef CBQ_TOPLEVEL_SHARING + || cl->level > q->toplevel +#endif + ) { +#if 0 + this_cl->overlimit(this_cl); +#else + cbq_delay(q, this_cl); +#endif + return 0; + } + } + q->borrowed = cl; + return 1; +} + +static __inline__ struct sk_buff * +cbq_dequeue_prio(struct Qdisc *sch, int prio, int fallback) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl_tail, *cl_prev, *cl; + struct sk_buff *skb; + int deficit; + + cl_tail = cl_prev = q->active[prio]; + cl = cl_prev->next_alive; + + do { + deficit = 0; + + /* Start round */ + do { + /* Class is empty */ + if (cl->q->q.qlen == 0) + goto skip_class; + + if (fallback) { + /* Fallback pass: all classes are overlimit; + we send from the first class that is allowed + to borrow. + */ + + if (cl->borrow == NULL) + goto skip_class; + } else { + /* Normal pass: check that class is under limit */ + if (!cbq_under_limit(cl)) + goto skip_class; + } + + if (cl->deficit <= 0) { + /* Class exhausted its allotment per this + round. + */ + deficit = 1; + goto next_class; + } + + skb = cl->q->dequeue(cl->q); + + /* Class did not give us any skb :-( + It could occur if cl->q == "tbf" + */ + if (skb == NULL) + goto skip_class; + + cl->deficit -= skb->len; + q->last_sent = cl; + q->last_sent_len = skb->len; + + if (cl->deficit <= 0) { + q->active[prio] = cl; + cl = cl->next_alive; + cl->deficit += cl->quantum; + } + return skb; + +skip_class: + cl->deficit = 0; + + if (cl->q->q.qlen == 0) { + /* Class is empty, declare it dead */ + cl_prev->next_alive = cl->next_alive; + cl->awake = 0; + + /* Did cl_tail point to it? */ + if (cl == cl_tail) { + /* Repair it! */ + cl_tail = cl_prev; + + /* Was it the last class in this band? */ + if (cl == cl_tail) { + /* Kill the band! */ + q->active[prio] = NULL; + q->activemask &= ~(1<next_alive; + cl->deficit += cl->quantum; + } while (cl_prev != cl_tail); + } while (deficit); + + q->active[prio] = cl_prev; + + return NULL; +} + +static __inline__ struct sk_buff * +cbq_dequeue_1(struct Qdisc *sch, int fallback) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct sk_buff *skb; + unsigned activemask; + + activemask = q->activemask; + while (activemask) { + int prio = ffz(~activemask); + activemask &= ~(1<data; + + PSCHED_GET_TIME(q->now); + + if (q->last_sent) + cbq_update(q); + + q->wd_expires = 0; + + skb = cbq_dequeue_1(sch, 0); + if (skb) + return skb; + + /* All the classes are overlimit. + Search for overlimit class, which is allowed to borrow + and use it as fallback case. + */ + +#ifdef CBQ_TOPLEVEL_SHARING + q->toplevel = CBQ_MAXLEVEL; +#endif + + skb = cbq_dequeue_1(sch, 1); + if (skb) + return skb; + + /* No packets in scheduler or nobody wants to give them to us :-( + Sigh... start watchdog timer in the last case. */ + + if (sch->q.qlen && q->wd_expires) { + if (q->wd_timer.function) + del_timer(&q->wd_timer); + q->wd_timer.function = cbq_watchdog; + q->wd_timer.expires = jiffies + PSCHED_US2JIFFIE(q->wd_expires); + add_timer(&q->wd_timer); + } + return NULL; +} + +/* CBQ class maintanance routines */ + +static void cbq_adjust_levels(struct cbq_class *this) +{ + struct cbq_class *cl; + + for (cl = this->parent; cl; cl = cl->parent) { + if (cl->level > this->level) + return; + cl->level = this->level + 1; + this = cl; + } +} + +static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) +{ + struct cbq_class *cl; + + if (q->quanta[prio] == 0) + return; + + for (cl = q->classes[prio]; cl; cl = cl->next) { + if (cl->rquantum) + cl->quantum = (cl->rquantum*q->mtu*q->nclasses[prio])/ + q->quanta[prio]; + } +} + +static __inline__ int cbq_unlink_class(struct cbq_class *this) +{ + struct cbq_class *cl, **clp; + struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; + + for (clp = &q->classes[this->priority]; (cl = *clp) != NULL; + clp = &cl->next) { + if (cl == this) { + *clp = cl->next; + return 0; + } + } + return -ENOENT; +} + +static int cbq_prune(struct cbq_class *this) +{ + struct cbq_class *cl; + int prio = this->priority; + struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; + + qdisc_reset(this->q); + + if (cbq_unlink_class(this)) + return -ENOENT; + + if (this->awake) { + struct cbq_class *cl_prev = q->active[prio]; + do { + cl = cl_prev->next_alive; + if (cl == this) { + cl_prev->next_alive = cl->next_alive; + + if (cl == q->active[prio]) { + q->active[prio] = cl; + if (cl == q->active[prio]) { + q->active[prio] = NULL; + q->activemask &= ~(1<next_alive; + cl->deficit += cl->quantum; + break; + } + } while ((cl_prev = cl) != q->active[prio]); + } + + --q->nclasses[prio]; + if (this->rquantum) { + q->quanta[prio] -= this->rquantum; + cbq_normalize_quanta(q, prio); + } + + if (q->fallback_class == this) + q->fallback_class = NULL; + + this->parent = NULL; + this->borrow = NULL; + this->root = this; + this->qdisc = NULL; + return 0; +} + +static int cbq_graft(struct cbq_class *this, struct cbq_class *parent) +{ + struct cbq_class *cl, **clp; + int prio = this->priority; + struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data; + + qdisc_reset(this->q); + + + for (clp = &q->classes[prio]; (cl = *clp) != NULL; clp = &cl->next) { + if (cl == this) + return -EBUSY; + } + + cl->next = NULL; + *clp = cl; + + cl->parent = parent; + cl->borrow = parent; + cl->root = parent ? parent->root : cl; + + ++q->nclasses[prio]; + if (this->rquantum) { + q->quanta[prio] += this->rquantum; + cbq_normalize_quanta(q, prio); + } + + cbq_adjust_levels(this); + + return 0; +} + + +static void +cbq_reset(struct Qdisc* sch) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl; + int prio; + + q->activemask = 0; + q->last_sent = NULL; + if (q->wd_timer.function) { + del_timer(&q->wd_timer); + q->wd_timer.expires = 0; + q->wd_timer.function = NULL; + } +#ifdef CBQ_TOPLEVEL_SHARING + q->toplevel = CBQ_MAXLEVEL; +#endif + + for (prio = 0; prio < CBQ_MAXPRIO; prio++) { + q->active[prio] = NULL; + + for (cl = q->classes[prio]; cl; cl = cl->next) { + qdisc_reset(cl->q); + + cl->next_alive = NULL; + PSCHED_SET_PASTPERFECT(cl->undertime); + cl->avgidle = 0; + cl->deficit = 0; + cl->awake = 0; + } + } +} + +static void +cbq_destroy(struct Qdisc* sch) +{ + struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data; + struct cbq_class *cl, **clp; + int prio; + + for (prio = 0; prio < CBQ_MAXPRIO; prio++) { + struct cbq_class *cl_head = q->classes[prio]; + + for (clp = &cl_head; (cl=*clp) != NULL; clp = &cl->next) { + qdisc_destroy(cl->q); + kfree(cl); + } + } +} + +static int cbq_control(struct Qdisc *sch, void *arg) +{ + struct cbq_sched_data *q; + + q = (struct cbq_sched_data *)sch->data; + + /* Do attachment here. It is the last thing to do. */ + + return -EINVAL; +} + +static int cbq_init(struct Qdisc *sch, void *arg) +{ + struct cbq_sched_data *q; + struct cbqctl *ctl = (struct cbqctl*)arg; + + q = (struct cbq_sched_data *)sch->data; + init_timer(&q->wd_timer); + q->wd_timer.data = (unsigned long)sch; +#ifdef CBQ_TOPLEVEL_SHARING + q->toplevel = CBQ_MAXLEVEL; +#endif + + return 0; +} + + +struct Qdisc_ops cbq_ops = +{ + NULL, + "cbq", + 0, + sizeof(struct cbq_sched_data), + cbq_enqueue, + cbq_dequeue, + cbq_reset, + cbq_destroy, + cbq_init, + cbq_control, +}; + +#ifdef MODULE +int init_module(void) +{ + int err; + + /* Load once and never free it. */ + MOD_INC_USE_COUNT; + + err = register_qdisc(&cbq_ops); + if (err) + MOD_DEC_USE_COUNT; + return err; +} + +void cleanup_module(void) +{ +} +#endif diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_csz.c linux/net/sched/sch_csz.c --- v2.1.67/linux/net/sched/sch_csz.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_csz.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,832 @@ +/* + * net/sched/sch_csz.c Clark-Shenker-Zhang scheduler. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Clark-Shenker-Zhang algorithm. + ======================================= + + SOURCE. + + David D. Clark, Scott Shenker and Lixia Zhang + "Supporting Real-Time Applications in an Integrated Services Packet + Network: Architecture and Mechanism". + + CBQ presents a flexible universal algorithm for packet scheduling, + but it has pretty poor delay characteristics. + Round-robin scheduling and link-sharing goals + apparently contradict to minimization of network delay and jitter. + Moreover, correct handling of predicted flows seems to be + impossible in CBQ. + + CSZ presents more precise but less flexible and less efficient + approach. As I understand, the main idea is to create + WFQ flows for each guaranteed service and to allocate + the rest of bandwith to dummy flow-0. Flow-0 comprises + the predicted services and the best effort traffic; + it is handled by a priority scheduler with the highest + priority band allocated for predicted services, and the rest --- + to the best effort packets. + + Note, that in CSZ flows are NOT limited to their bandwidth. + It is supposed, that flow passed admission control at the edge + of QoS network and it more need no shaping. Any attempt to improve + the flow or to shape it to a token bucket at intermediate hops + will introduce undesired delays and raise jitter. + + At the moment CSZ is the only scheduler that provides + real guaranteed service. Another schemes (including CBQ) + do not provide guaranteed delay and randomize jitter. + There exists the statement (Sally Floyd), that delay + can be estimated by a IntServ compliant formulae. + This result is true formally, but it is wrong in principle. + At first, it ignores delays introduced by link sharing. + And the second (and main) it limits bandwidth, + it is fatal flaw. + + ALGORITHM. + + --- Notations. + + $B$ is link bandwidth (bits/sec). + + $I$ is set of all flows, including flow $0$. + Every flow $a \in I$ has associated bandwidth slice $r_a < 1$ and + $\sum_{a \in I} r_a = 1$. + + --- Flow model. + + Let $m_a$ is number of backlogged bits in flow $a$. + The flow is {\em active }, if $m_a > 0$. + This number is discontinuous function of time; + when a packet $i$ arrives: + \[ + m_a(t_i+0) - m_a(t_i-0) = L^i, + \] + where $L^i$ is the length of arrived packet. + The flow queue is drained continuously until $m_a == 0$: + \[ + {d m_a \over dt} = - { B r_a \over \sum_{b \in A} r_b}. + \] + I.e. flow rates are their allocated rates proportionally + scaled to take all available link bandwidth. Apparently, + it is not the only possible policy. F.e. CBQ classes + without borrowing would be modelled by: + \[ + {d m_a \over dt} = - B r_a . + \] + More complicated hierarchical bandwidth allocation + policies are possible, but, unfortunately, basic + flows equation have simple solution only for proportional + scaling. + + --- Departure times. + + We calculate time until the last bit of packet will be sent: + \[ + E_a^i(t) = { m_a(t_i) - \delta_a(t) \over r_a }, + \] + where $\delta_a(t)$ is number of bits drained since $t_i$. + We have to evaluate $E_a^i$ for all queued packets, + then find packet with minimal $E_a^i$ and send it. + + It sounds good, but direct implementation of the algorithm + is absolutely infeasible. Luckily, if flow rates + are scaled proportionally, the equations have simple solution. + + The differential equation for $E_a^i$ is + \[ + {d E_a^i (t) \over dt } = - { d \delta_a(t) \over dt} { 1 \over r_a} = + { B \over \sum_{b \in A} r_b} + \] + with initial condition + \[ + E_a^i (t_i) = { m_a(t_i) \over r_a } . + \] + + Let's introduce an auxiliary function $R(t)$: + + --- Round number. + + Consider the following model: we rotate over active flows, + sending $r_a B$ bits from every flow, so that we send + $B \sum_{a \in A} r_a$ bits per round, that takes + $\sum_{a \in A} r_a$ seconds. + + Hence, $R(t)$ (round number) is monotonically increasing + linear function of time when $A$ is not changed + \[ + { d R(t) \over dt } = { 1 \over \sum_{a \in A} r_a } + \] + and it is continuous when $A$ changes. + + The central observation is that the quantity + $F_a^i = R(t) + E_a^i(t)/B$ does not depend on time at all! + $R(t)$ does not depend on flow, so that $F_a^i$ can be + calculated only once on packet arrival, and we need not + recalculation of $E$ numbers and resorting queues. + Number $F_a^i$ is called finish number of the packet. + It is just value of $R(t)$, when the last bit of packet + will be sent out. + + Maximal finish number on flow is called finish number of flow + and minimal one is "start number of flow". + Apparently, flow is active if and only if $F_a \leq R$. + + When packet of length $L_i$ bit arrives to flow $a$ at time $t_i$, + we calculate number $F_a^i$ as: + + If flow was inactive ($F_a < R$): + $F_a^i = R(t) + {L_i \over B r_a}$ + otherwise + $F_a^i = F_a + {L_i \over B r_a}$ + + These equations complete the algorithm specification. + + It looks pretty hairy, but there exists a simple + procedure for solving these equations. + See procedure csz_update(), that is a generalization of + algorithm from S. Keshav's thesis Chapter 3 + "Efficient Implementation of Fair Queeing". + + NOTES. + + * We implement only the simplest variant of CSZ, + when flow-0 is explicit 4band priority fifo. + It is bad, but we need "peek" operation in addition + to "dequeue" to implement complete CSZ. + I do not want to make it, until it is not absolutely + necessary. + + * A primitive support for token bucket filtering + presents too. It directly contradicts to CSZ, but + though the Internet is on the globe ... :-) + yet "the edges of the network" really exist. + + BUGS. + + * Fixed point arithmetic is overcomplicated, suboptimal and even + wrong. Check it later. +*/ + + +/* This number is arbitrary */ + +#define CSZ_MAX_GUARANTEED 16 + +#define CSZ_FLOW_ID(skb) (CSZ_MAX_GUARANTEED) + +struct csz_head +{ + struct csz_head *snext; + struct csz_head *sprev; + struct csz_head *fnext; + struct csz_head *fprev; +}; + +struct csz_flow +{ + struct csz_head *snext; + struct csz_head *sprev; + struct csz_head *fnext; + struct csz_head *fprev; + +/* Parameters */ + unsigned long rate; /* Flow rate. Fixed point is at rate_log */ + unsigned long *L_tab; /* Lookup table for L/(B*r_a) values */ + unsigned long max_bytes; /* Maximal length of queue */ +#ifdef CSZ_PLUS_TBF + unsigned long depth; /* Depth of token bucket, normalized + as L/(B*r_a) */ +#endif + +/* Variables */ +#ifdef CSZ_PLUS_TBF + unsigned long tokens; /* Tokens number: usecs */ + psched_time_t t_tbf; + unsigned long R_tbf; + int throttled; +#endif + unsigned peeked; + unsigned long start; /* Finish number of the first skb */ + unsigned long finish; /* Finish number of the flow */ + + struct sk_buff_head q; /* FIFO queue */ +}; + +#define L2R(q,f,L) ((f)->L_tab[(L)>>(q)->cell_log]) + +struct csz_sched_data +{ +/* Parameters */ + unsigned char cell_log; /* 1< 2.1sec is MAXIMAL value */ + +/* Variables */ +#ifdef CSZ_PLUS_TBF + struct timer_list wd_timer; + long wd_expires; +#endif + psched_time_t t_c; /* Time check-point */ + unsigned long R_c; /* R-number check-point */ + unsigned long rate; /* Current sum of rates of active flows */ + struct csz_head s; /* Flows sorted by "start" */ + struct csz_head f; /* Flows sorted by "finish" */ + + struct sk_buff_head other[4];/* Predicted (0) and the best efforts + classes (1,2,3) */ + struct csz_flow flow[CSZ_MAX_GUARANTEED]; /* Array of flows */ +}; + +/* These routines (csz_insert_finish and csz_insert_start) are + the most time consuming part of all the algorithm. + + We insert to sorted list, so that time + is linear with respect to number of active flows in the worst case. + Note that we have not very large number of guaranteed flows, + so that logarithmic algorithms (heap etc.) are useless, + they are slower than linear one when length of list <= 32. + + Heap would take sence if we used WFQ for best efforts + flows, but SFQ is better choice in this case. + */ + + +/* Insert flow "this" to the list "b" before + flow with greater finish number. + */ + +#if 0 +/* Scan forward */ +extern __inline__ void csz_insert_finish(struct csz_head *b, + struct csz_flow *this) +{ + struct csz_head *f = b->fnext; + unsigned long finish = this->finish; + + while (f != b) { + if (((struct csz_flow*)f)->finish - finish > 0) + break; + f = f->fnext; + } + this->fnext = f; + this->fprev = f->fprev; + this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this; +} +#else +/* Scan backward */ +extern __inline__ void csz_insert_finish(struct csz_head *b, + struct csz_flow *this) +{ + struct csz_head *f = b->fprev; + unsigned long finish = this->finish; + + while (f != b) { + if (((struct csz_flow*)f)->finish - finish <= 0) + break; + f = f->fprev; + } + this->fnext = f->fnext; + this->fprev = f; + this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this; +} +#endif + +/* Insert flow "this" to the list "b" before + flow with greater start number. + */ + +extern __inline__ void csz_insert_start(struct csz_head *b, + struct csz_flow *this) +{ + struct csz_head *f = b->snext; + unsigned long start = this->start; + + while (f != b) { + if (((struct csz_flow*)f)->start - start > 0) + break; + f = f->snext; + } + this->snext = f; + this->sprev = f->sprev; + this->snext->sprev = this->sprev->snext = (struct csz_head*)this; +} + + +/* Calculate and return current round number. + It is another time consuming part, but + it is impossible to avoid it. + + Fixed point arithmetic is not ... does not ... Well, it is just CRAP. + */ + +static unsigned long csz_update(struct Qdisc *sch) +{ + struct csz_sched_data *q = (struct csz_sched_data*)sch->data; + struct csz_flow *a; + unsigned long F; + unsigned long tmp; + psched_time_t now; + unsigned long delay; + unsigned long R_c; + + PSCHED_GET_TIME(now); + delay = PSCHED_TDIFF_SAFE(now, q->t_c, 0, goto do_reset); + + if (delay>>q->delta_log) { +do_reset: + /* Delta is too large. + It is possible if MTU/BW > 1<delta_log + (i.e. configuration error) or because of hardware + fault. We have no choice... + */ + qdisc_reset(sch); + return 0; + } + + q->t_c = now; + + for (;;) { + a = (struct csz_flow*)q->f.fnext; + + /* No more active flows. Reset R and exit. */ + if (a == (struct csz_flow*)&q->f) { +#ifdef CSZ_DEBUG + if (q->rate) { + printk("csz_update: rate!=0 on inactive csz\n"); + q->rate = 0; + } +#endif + q->R_c = 0; + return 0; + } + + F = a->finish; + +#ifdef CSZ_DEBUG + if (q->rate == 0) { + printk("csz_update: rate=0 on active csz\n"); + goto do_reset; + } +#endif + + /* + * tmp = (t - q->t_c)/q->rate; + */ + + tmp = ((delay<<(31-q->delta_log))/q->rate)>>(31-q->delta_log+q->R_log); + + tmp += q->R_c; + + /* OK, this flow (and all flows with greater + finish numbers) is still active */ + if (F - tmp > 0) + break; + + /* It is more not active */ + + a->fprev->fnext = a->fnext; + a->fnext->fprev = a->fprev; + + /* + * q->t_c += (F - q->R_c)*q->rate + */ + + tmp = ((F-q->R_c)*q->rate)<R_log; + R_c = F; + q->rate -= a->rate; + + if (delay - tmp >= 0) { + delay -= tmp; + continue; + } + delay = 0; + } + + q->R_c = tmp; + return tmp; +} + +static int +csz_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + unsigned flow_id = CSZ_FLOW_ID(skb); + unsigned long R; + int prio; + struct csz_flow *this; + + if (flow_id >= CSZ_MAX_GUARANTEED) { + prio = flow_id - CSZ_MAX_GUARANTEED; + flow_id = 0; + } + + this = &q->flow[flow_id]; + if (this->q.qlen >= this->max_bytes || this->L_tab == NULL) { + kfree_skb(skb, FREE_WRITE); + return 0; + } + + R = csz_update(sch); + + if (this->finish - R >= 0) { + /* It was active */ + this->finish += L2R(q,this,skb->len); + } else { + /* It is inactive; activate it */ + this->finish = R + L2R(q,this,skb->len); + q->rate += this->rate; + csz_insert_finish(&q->f, this); + } + + /* If this flow was empty, remember start number + and insert it into start queue */ + if (this->q.qlen == 0) { + this->start = this->finish; + csz_insert_start(&q->s, this); + } + if (flow_id) + skb_queue_tail(&this->q, skb); + else + skb_queue_tail(&q->other[prio], skb); + sch->q.qlen++; + return 1; +} + +static __inline__ struct sk_buff * +skb_dequeue_best(struct csz_sched_data * q) +{ + int i; + struct sk_buff *skb; + + for (i=0; i<4; i++) { + skb = skb_dequeue(&q->other[i]); + if (skb) { + q->flow[0].q.qlen--; + return skb; + } + } + return NULL; +} + +static __inline__ struct sk_buff * +skb_peek_best(struct csz_sched_data * q) +{ + int i; + struct sk_buff *skb; + + for (i=0; i<4; i++) { + skb = skb_peek(&q->other[i]); + if (skb) + return skb; + } + return NULL; +} + +#ifdef CSZ_PLUS_TBF + +static void csz_watchdog(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc*)arg; + struct csz_sched_data *q = (struct csz_sched_data*)sch->data; + + q->wd_timer.expires = 0; + q->wd_timer.function = NULL; + + qdisc_wakeup(sch->dev); +} + +static __inline__ void +csz_move_queue(struct csz_flow *this, long delta) +{ + this->fprev->fnext = this->fnext; + this->fnext->fprev = this->fprev; + + this->start += delta; + this->finish += delta; + + csz_insert_finish(this); +} + +static __inline__ int csz_enough_tokens(struct csz_sched_data *q, + struct csz_flow *this, + struct sk_buff *skb) +{ + long toks; + long shift; + psched_time_t now; + + PSCHED_GET_TIME(now); + + toks = PSCHED_TDIFF(now, t_tbf) + this->tokens - L2R(q,this,skb->len); + + shift = 0; + if (this->throttled) { + /* Remember aposteriory delay */ + + unsigned long R = csz_update(q); + shift = R - this->R_tbf; + this->R_tbf = R; + } + + if (toks >= 0) { + /* Now we have enough tokens to proceed */ + + this->tokens = toks <= this->depth ? toks ? this->depth; + this->t_tbf = now; + + if (!this->throttled) + return 1; + + /* Flow was throttled. Update its start&finish numbers + with delay calculated aposteriori. + */ + + this->throttled = 0; + if (shift > 0) + csz_move_queue(this, shift); + return 1; + } + + if (!this->throttled) { + /* Flow has just been throttled; remember + current round number to calculate aposteriori delay + */ + this->throttled = 1; + this->R_tbf = csz_update(q); + } + + /* Move all the queue to the time when it will be allowed to send. + We should translate time to round number, but it is impossible, + so that we made the most conservative estimate i.e. we suppose + that only this flow is active and, hence, R = t. + Really toks <= R <= toks/r_a. + + This apriory shift in R will be adjusted later to reflect + real delay. We cannot avoid it because of: + - throttled flow continues to be active from the viewpoint + of CSZ, so that it would acquire highest priority, + if you not adjusted start numbers. + - Eventually, finish number would become less than round + number and flow were declared inactive. + */ + + toks = -toks; + + /* Remeber, that we should start watchdog */ + if (toks < q->wd_expires) + q->wd_expires = toks; + + toks >>= q->R_log; + shift += toks; + if (shift > 0) { + this->R_tbf += toks; + csz_move_queue(this, shift); + } + csz_insert_start(this); + return 0; +} +#endif + + +static struct sk_buff * +csz_dequeue(struct Qdisc* sch) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + struct sk_buff *skb; + struct csz_flow *this; + +#ifdef CSZ_PLUS_TBF + q->wd_expires = 0; +#endif + this = (struct csz_flow*)q->s.snext; + + while (this != (struct csz_flow*)&q->s) { + + /* First of all: unlink from start list */ + this->sprev->snext = this->snext; + this->snext->sprev = this->sprev; + + if (this != &q->flow[0]) { /* Guaranteed flow */ + skb = __skb_dequeue(&this->q); + if (skb) { +#ifdef CSZ_PLUS_TBF + if (this->depth) { + if (!csz_enough_tokens(q, this, skb)) + continue; + } +#endif + if (this->q.qlen) { + struct sk_buff *nskb = skb_peek(&this->q); + this->start += L2R(q,this,nskb->len); + csz_insert_start(&q->s, this); + } + sch->q.qlen--; + return skb; + } + } else { /* Predicted or best effort flow */ + skb = skb_dequeue_best(q); + if (skb) { + unsigned peeked = this->peeked; + this->peeked = 0; + + if (--this->q.qlen) { + struct sk_buff *nskb; + unsigned dequeued = L2R(q,this,skb->len); + + /* We got not the same thing that + peeked earlier; adjust start number + */ + if (peeked != dequeued && peeked) + this->start += dequeued - peeked; + + nskb = skb_peek_best(q); + peeked = L2R(q,this,nskb->len); + this->start += peeked; + this->peeked = peeked; + csz_insert_start(&q->s, this); + } + sch->q.qlen--; + return skb; + } + } + } +#ifdef CSZ_PLUS_TBF + /* We are about to return no skb. + Schedule watchdog timer, if it occured because of shaping. + */ + if (q->wd_expires) { + if (q->wd_timer.function) + del_timer(&q->wd_timer); + q->wd_timer.function = csz_watchdog; + q->wd_timer.expires = jiffies + PSCHED_US2JIFFIE(q->wd_expires); + add_timer(&q->wd_timer); + } +#endif + return NULL; +} + +static void +csz_reset(struct Qdisc* sch) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + struct sk_buff *skb; + int i; + + for (i=0; i<4; i++) + while ((skb=skb_dequeue(&q->other[i])) != NULL) + kfree_skb(skb, 0); + + for (i=0; iflow + i; + while ((skb = skb_dequeue(&this->q)) != NULL) + kfree_skb(skb, FREE_WRITE); + this->snext = this->sprev = + this->fnext = this->fprev = (struct csz_head*)this; + this->start = this->finish = 0; + } + q->s.snext = q->s.sprev = &q->s; + q->f.fnext = q->f.fprev = &q->f; + q->R_c = 0; +#ifdef CSZ_PLUS_TBF + PSCHED_GET_TIME(&q->t_tbf); + q->tokens = q->depth; + if (q->wd_timer.function) { + del_timer(&q->wd_timer); + q->wd_timer.function = NULL; + } +#endif + sch->q.qlen = 0; +} + +static void +csz_destroy(struct Qdisc* sch) +{ +/* + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + int i; + + for (i=0; i<4; i++) + qdisc_destroy(q->other[i]); + */ +} + +static int csz_init(struct Qdisc *sch, void *arg) +{ + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + struct cszinitctl *ctl = (struct cszinitctl*)arg; + int i; + + for (i=0; i<4; i++) + skb_queue_head_init(&q->other[i]); + + for (i=0; iflow + i; + skb_queue_head_init(&this->q); + this->snext = this->sprev = + this->fnext = this->fprev = (struct csz_head*)this; + this->start = this->finish = 0; + } + q->s.snext = q->s.sprev = &q->s; + q->f.fnext = q->f.fprev = &q->f; + q->R_c = 0; +#ifdef CSZ_PLUS_TBF + init_timer(&q->wd_timer); + q->wd_timer.data = (unsigned long)sch; +#endif + if (ctl) { + if (ctl->flows != CSZ_MAX_GUARANTEED) + return -EINVAL; + q->cell_log = ctl->cell_log; + } + return 0; +} + +static int csz_control(struct Qdisc *sch, struct pschedctl *gctl) +{ +/* + struct csz_sched_data *q = (struct csz_sched_data *)sch->data; + struct cszctl *ctl = (struct cszctl*)gctl->arg; + struct sk_buff *skb; + int i; + + if (op == PSCHED_TC_ATTACH) { + + } +*/ + return 0; +} + + + + +struct Qdisc_ops csz_ops = +{ + NULL, + "csz", + 0, + sizeof(struct csz_sched_data), + csz_enqueue, + csz_dequeue, + csz_reset, + csz_destroy, + csz_init, + csz_control, +}; + + +#ifdef MODULE +#include +int init_module(void) +{ + int err; + + /* Load once and never free it. */ + MOD_INC_USE_COUNT; + + err = register_qdisc(&csz_ops); + if (err) + MOD_DEC_USE_COUNT; + return err; +} + +void cleanup_module(void) +{ +} +#endif diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_fifo.c linux/net/sched/sch_fifo.c --- v2.1.67/linux/net/sched/sch_fifo.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_fifo.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,179 @@ +/* + * net/sched/sch_fifo.c Simple FIFO "scheduler" + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* 1 band FIFO pseudo-"scheduler" */ + +struct fifo_sched_data +{ + int qmaxbytes; + int qmaxlen; + int qbytes; +}; + +static int +bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; + + if (q->qbytes <= q->qmaxbytes) { + skb_queue_tail(&sch->q, skb); + q->qbytes += skb->len; + return 0; + } + kfree_skb(skb, FREE_WRITE); + return 1; +} + +static struct sk_buff * +bfifo_dequeue(struct Qdisc* sch) +{ + struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; + struct sk_buff *skb; + + skb = skb_dequeue(&sch->q); + if (skb) + q->qbytes -= skb->len; + return skb; +} + +static void +bfifo_reset(struct Qdisc* sch) +{ + struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; + struct sk_buff *skb; + + while((skb=skb_dequeue(&sch->q)) != NULL) { + q->qbytes -= skb->len; + kfree_skb(skb,FREE_WRITE); + } + if (q->qbytes) { + printk("fifo_reset: qbytes=%d\n", q->qbytes); + q->qbytes = 0; + } +} + +static int +pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data; + + if (sch->q.qlen <= q->qmaxlen) { + skb_queue_tail(&sch->q, skb); + return 0; + } + kfree_skb(skb, FREE_WRITE); + return 1; +} + +static struct sk_buff * +pfifo_dequeue(struct Qdisc* sch) +{ + return skb_dequeue(&sch->q); +} + +static void +pfifo_reset(struct Qdisc* sch) +{ + struct sk_buff *skb; + + while((skb=skb_dequeue(&sch->q))!=NULL) + kfree_skb(skb,FREE_WRITE); +} + + +static int fifo_init(struct Qdisc *sch, void *arg /* int bytes, int pkts */) +{ + struct fifo_sched_data *q; +/* + struct device *dev = sch->dev; + */ + + q = (struct fifo_sched_data *)sch->data; +/* + if (pkts<0) + pkts = dev->tx_queue_len; + if (bytes<0) + bytes = pkts*dev->mtu; + q->qmaxbytes = bytes; + q->qmaxlen = pkts; + */ + return 0; +} + +struct Qdisc_ops pfifo_ops = +{ + NULL, + "pfifo", + 0, + sizeof(struct fifo_sched_data), + pfifo_enqueue, + pfifo_dequeue, + pfifo_reset, + NULL, + fifo_init, +}; + +struct Qdisc_ops bfifo_ops = +{ + NULL, + "pfifo", + 0, + sizeof(struct fifo_sched_data), + bfifo_enqueue, + bfifo_dequeue, + bfifo_reset, + NULL, + fifo_init, +}; + +#ifdef MODULE +#include +int init_module(void) +{ + int err; + + /* Load once and never free it. */ + MOD_INC_USE_COUNT; + + err = register_qdisc(&pfifo_ops); + if (err == 0) { + err = register_qdisc(&bfifo_ops); + if (err) + unregister_qdisc(&pfifo_ops); + } + if (err) + MOD_DEC_USE_COUNT; + return err; +} + +void cleanup_module(void) +{ +} +#endif diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_generic.c linux/net/sched/sch_generic.c --- v2.1.67/linux/net/sched/sch_generic.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_generic.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,541 @@ +/* + * net/sched/sch_generic.c Generic packet scheduler routines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct Qdisc_head qdisc_head = { &qdisc_head }; + +static struct Qdisc_ops *qdisc_base = NULL; + +/* NOTES. + + Every discipline has two major routines: enqueue and dequeue. + + ---dequeue + + dequeue usually returns a skb to send. It is allowed to return NULL, + but it does not mean that queue is empty, it just means that + discipline does not want to send anything this time. + Queue is really empty if q->q.qlen == 0. + For complicated disciplines with multiple queues q->q is not + real packet queue, but however q->q.qlen must be valid. + + ---enqueue + + enqueue returns number of enqueued packets i.e. this number is 1, + if packet was enqueued sucessfully and <1 if something (not + necessary THIS packet) was dropped. + + */ + +int register_qdisc(struct Qdisc_ops *qops) +{ + struct Qdisc_ops *q, **qp; + for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) + if (strcmp(qops->id, q->id) == 0) + return -EEXIST; + qops->next = NULL; + qops->refcnt = 0; + *qp = qops; + return 0; +} + +int unregister_qdisc(struct Qdisc_ops *qops) +{ + struct Qdisc_ops *q, **qp; + for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) + if (q == qops) + break; + if (!q) + return -ENOENT; + *qp = q->next; + return 0; +} + +struct Qdisc *qdisc_lookup(int handle) +{ + return NULL; +} + + +/* "NOOP" scheduler: the best scheduler, recommended for all interfaces + in all curcumstances. It is difficult to invent anything more + fast or cheap. + */ + +static int +noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) +{ + kfree_skb(skb, FREE_WRITE); + return 0; +} + +static struct sk_buff * +noop_dequeue(struct Qdisc * qdisc) +{ + return NULL; +} + +struct Qdisc noop_qdisc = +{ + { NULL }, + noop_enqueue, + noop_dequeue, +}; + +struct Qdisc noqueue_qdisc = +{ + { NULL }, + NULL, + NULL, +}; + + +/* 3-band FIFO queue: old style, but should be a bit faster (several CPU insns) */ + +static int +pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +{ + const static u8 prio2band[8] = { 1, 2, 2, 2, 1, 2, 0, 0 }; + struct sk_buff_head *list; + + list = ((struct sk_buff_head*)qdisc->data) + prio2band[skb->priority&7]; + + if (list->qlen <= skb->dev->tx_queue_len) { + skb_queue_tail(list, skb); + return 1; + } + qdisc->dropped++; + kfree_skb(skb, FREE_WRITE); + return 0; +} + +static struct sk_buff * +pfifo_fast_dequeue(struct Qdisc* qdisc) +{ + int prio; + struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff *skb; + + for (prio = 0; prio < 3; prio++, list++) { + skb = skb_dequeue(list); + if (skb) + return skb; + } + return NULL; +} + +static void +pfifo_fast_reset(struct Qdisc* qdisc) +{ + int prio; + struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + + for (prio=0; prio < 3; prio++) + skb_queue_purge(list+prio); +} + +static int pfifo_fast_init(struct Qdisc *qdisc, void *arg) +{ + int i; + struct sk_buff_head *list; + + list = ((struct sk_buff_head*)qdisc->data); + + for(i=0; i<3; i++) + skb_queue_head_init(list+i); + + return 0; +} + +static struct Qdisc_ops pfifo_fast_ops = +{ + NULL, + "pfifo_fast", + 1, + 3 * sizeof(struct sk_buff_head), + pfifo_fast_enqueue, + pfifo_fast_dequeue, + pfifo_fast_reset, + NULL, + pfifo_fast_init +}; + +static struct Qdisc * +qdisc_alloc(struct device *dev, struct Qdisc_ops *ops, void *arg) +{ + struct Qdisc *sch; + int size = sizeof(*sch) + ops->priv_size; + + sch = kmalloc(size, GFP_KERNEL); + if (!sch) + return NULL; + memset(sch, 0, size); + + skb_queue_head_init(&sch->q); + skb_queue_head_init(&sch->failure_q); + sch->ops = ops; + sch->enqueue = ops->enqueue; + sch->dequeue = ops->dequeue; + sch->dev = dev; + if (ops->init && ops->init(sch, arg)) + return NULL; + ops->refcnt++; + return sch; +} + +void qdisc_reset(struct Qdisc *qdisc) +{ + struct Qdisc_ops *ops = qdisc->ops; + if (ops) { + start_bh_atomic(); + if (ops->reset) + ops->reset(qdisc); + skb_queue_purge(&qdisc->failure_q); + end_bh_atomic(); + } +} + +void qdisc_destroy(struct Qdisc *qdisc) +{ + struct Qdisc_ops *ops = qdisc->ops; + if (ops) { + start_bh_atomic(); + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + skb_queue_purge(&qdisc->failure_q); + ops->refcnt--; + end_bh_atomic(); + kfree(qdisc); + } +} + +static void dev_do_watchdog(unsigned long dummy); + +static struct timer_list dev_watchdog = + { NULL, NULL, 0L, 0L, &dev_do_watchdog }; + +static void dev_do_watchdog(unsigned long dummy) +{ + struct Qdisc_head *h; + + for (h = qdisc_head.forw; h != &qdisc_head; h = h->forw) { + struct Qdisc *q = (struct Qdisc*)h; + struct device *dev = q->dev; + if (dev->tbusy && jiffies - q->tx_last > q->tx_timeo) { + qdisc_restart(dev); + } + } + dev_watchdog.expires = jiffies + 5*HZ; + add_timer(&dev_watchdog); +} + + +void dev_activate(struct device *dev) +{ + /* No queueing discipline is attached to device; + create default one i.e. pfifo_fast for devices, + which need queueing and noqueue_qdisc for + virtual intrfaces + */ + + if (dev->qdisc_sleeping == &noop_qdisc) { + if (dev->tx_queue_len) { + struct Qdisc *qdisc; + qdisc = qdisc_alloc(dev, &pfifo_fast_ops, NULL); + if (qdisc == NULL) + return; + dev->qdisc_sleeping = qdisc; + } else + dev->qdisc_sleeping = &noqueue_qdisc; + } + + start_bh_atomic(); + if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) { + dev->qdisc->tx_timeo = 5*HZ; + dev->qdisc->tx_last = jiffies - dev->qdisc->tx_timeo; + if (!dev_watchdog.expires) { + dev_watchdog.expires = jiffies + 5*HZ; + add_timer(&dev_watchdog); + } + } + end_bh_atomic(); +} + +void dev_deactivate(struct device *dev) +{ + struct Qdisc *qdisc; + + start_bh_atomic(); + + qdisc = dev->qdisc; + dev->qdisc = &noop_qdisc; + + qdisc_reset(qdisc); + + if (qdisc->h.forw) { + struct Qdisc_head **hp, *h; + + for (hp = &qdisc_head.forw; (h = *hp) != &qdisc_head; hp = &h->forw) { + if (h == &qdisc->h) { + *hp = h->forw; + break; + } + } + } + + end_bh_atomic(); +} + +void dev_init_scheduler(struct device *dev) +{ + dev->qdisc = &noop_qdisc; + dev->qdisc_sleeping = &noop_qdisc; +} + +void dev_shutdown(struct device *dev) +{ + struct Qdisc *qdisc; + + start_bh_atomic(); + qdisc = dev->qdisc_sleeping; + dev->qdisc_sleeping = &noop_qdisc; + qdisc_destroy(qdisc); + end_bh_atomic(); +} + +void dev_set_scheduler(struct device *dev, struct Qdisc *qdisc) +{ + struct Qdisc *oqdisc; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + start_bh_atomic(); + oqdisc = dev->qdisc_sleeping; + + /* Destroy old scheduler */ + if (oqdisc) + qdisc_destroy(oqdisc); + + /* ... and attach new one */ + dev->qdisc_sleeping = qdisc; + dev->qdisc = &noop_qdisc; + end_bh_atomic(); + + if (dev->flags & IFF_UP) + dev_activate(dev); +} + +/* Kick the queue "q". + Note, that this procedure is called by watchdog timer, so that + we do not check dev->tbusy flag here. + + Returns: 0 - queue is empty. + >0 - queue is not empty, but throttled. + <0 - queue is not empty. Device is throttled, if dev->tbusy != 0. + + NOTE: Called only from NET BH +*/ + + +int qdisc_restart(struct device *dev) +{ + struct Qdisc *q = dev->qdisc; + struct sk_buff *skb; + + skb = skb_dequeue(&q->failure_q); + if (!skb) { + skb = q->dequeue(q); + if (netdev_nit && skb) + dev_queue_xmit_nit(skb,dev); + } + if (skb) { + if (dev->hard_start_xmit(skb, dev) == 0) { + q->tx_last = jiffies; + return -1; + } +#if 0 + if (net_ratelimit()) + printk(KERN_DEBUG "netdevice %s defers output.\n", dev->name); +#endif + skb_queue_head(&q->failure_q, skb); + return -1; + } + return q->q.qlen; +} + +void qdisc_run_queues(void) +{ + struct Qdisc_head **hp, *h; + + hp = &qdisc_head.forw; + while ((h = *hp) != &qdisc_head) { + int res = -1; + struct Qdisc *q = (struct Qdisc*)h; + struct device *dev = q->dev; + + while (!dev->tbusy && (res = qdisc_restart(dev)) < 0) + /* NOTHING */; + + /* The explanation is necessary here. + qdisc_restart called dev->hard_start_xmit, + if device is virtual, it could trigger one more + dev_queue_xmit and new device could appear + in active chain. In this case we cannot unlink + empty queue, because we lost back pointer. + No problem, we will unlink it during the next round. + */ + + if (res == 0 && *hp == h) { + *hp = h->forw; + h->forw = NULL; + continue; + } + hp = &h->forw; + } +} + + +int tc_init(struct pschedctl *pctl) +{ + struct Qdisc *q; + struct Qdisc_ops *qops; + + if (pctl->handle) { + q = qdisc_lookup(pctl->handle); + if (q == NULL) + return -ENOENT; + qops = q->ops; + if (pctl->ifindex && q->dev->ifindex != pctl->ifindex) + return -EINVAL; + } + return -EINVAL; +} + +int tc_destroy(struct pschedctl *pctl) +{ + return -EINVAL; +} + +int tc_attach(struct pschedctl *pctl) +{ + return -EINVAL; +} + +int tc_detach(struct pschedctl *pctl) +{ + return -EINVAL; +} + + +int psched_ioctl(void *arg) +{ + struct pschedctl ctl; + struct pschedctl *pctl = &ctl; + int err; + + if (copy_from_user(&ctl, arg, sizeof(ctl))) + return -EFAULT; + + if (ctl.arglen > 0) { + pctl = kmalloc(sizeof(ctl) + ctl.arglen, GFP_KERNEL); + if (pctl == NULL) + return -ENOBUFS; + memcpy(pctl, &ctl, sizeof(ctl)); + if (copy_from_user(pctl->args, ((struct pschedctl*)arg)->args, ctl.arglen)) { + kfree(pctl); + return -EFAULT; + } + } + + rtnl_lock(); + + switch (ctl.command) { + case PSCHED_TC_INIT: + err = tc_init(pctl); + break; + case PSCHED_TC_DESTROY: + err = tc_destroy(pctl); + break; + case PSCHED_TC_ATTACH: + err = tc_attach(pctl); + break; + case PSCHED_TC_DETACH: + err = tc_detach(pctl); + break; + default: + err = -EINVAL; + } + + rtnl_unlock(); + + if (pctl != &ctl) + kfree(pctl); + return err; +} + +__initfunc(int pktsched_init(void)) +{ +#define INIT_QDISC(name) { \ + extern struct Qdisc_ops name##_ops; \ + register_qdisc(&##name##_ops); \ + } + + skb_queue_head_init(&noop_qdisc.failure_q); + skb_queue_head_init(&noqueue_qdisc.failure_q); + + register_qdisc(&pfifo_fast_ops); +#ifdef CONFIG_NET_SCH_CBQ + INIT_QDISC(cbq); +#endif +#ifdef CONFIG_NET_SCH_CSZ + INIT_QDISC(csz); +#endif +#ifdef CONFIG_NET_SCH_RED + INIT_QDISC(red); +#endif +#ifdef CONFIG_NET_SCH_SFQ + INIT_QDISC(sfq); +#endif +#ifdef CONFIG_NET_SCH_TBF + INIT_QDISC(tbf); +#endif +#ifdef CONFIG_NET_SCH_PFIFO + INIT_QDISC(pfifo); + INIT_QDISC(bfifo); +#endif +#ifdef CONFIG_NET_SCH_PRIO + INIT_QDISC(prio); +#endif + return 0; +} diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_prio.c linux/net/sched/sch_prio.c --- v2.1.67/linux/net/sched/sch_prio.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_prio.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,146 @@ +/* + * net/sched/sch_prio.c Simple 3-band priority "scheduler". + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* New N-band generic scheduler */ + +struct prio_sched_data +{ + int qbytes; + int bands; + u8 prio2band[8]; + struct Qdisc *queues[8]; +}; + +static int +prio_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + int prio = q->prio2band[skb->priority&7]; + struct Qdisc *qdisc; + + qdisc = q->queues[prio]; + if (qdisc->enqueue(skb, qdisc) == 0) { + q->qbytes += skb->len; + sch->q.qlen++; + return 0; + } + return 1; +} + +static struct sk_buff * +prio_dequeue(struct Qdisc* sch) +{ + struct sk_buff *skb; + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + int prio; + struct Qdisc *qdisc; + + for (prio = 0; prio < q->bands; prio++) { + qdisc = q->queues[prio]; + skb = qdisc->dequeue(qdisc); + if (skb) { + q->qbytes -= skb->len; + sch->q.qlen--; + return skb; + } + } + return NULL; + +} + +static void +prio_reset(struct Qdisc* sch) +{ + int prio; + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + + for (prio=0; priobands; prio++) + qdisc_reset(q->queues[prio]); + q->qbytes = 0; +} + +static void +prio_destroy(struct Qdisc* sch) +{ + int prio; + struct prio_sched_data *q = (struct prio_sched_data *)sch->data; + + for (prio=0; priobands; prio++) { + qdisc_destroy(q->queues[prio]); + q->queues[prio] = &noop_qdisc; + } +} + +static int prio_init(struct Qdisc *sch, void *arg) +{ + const static u8 prio2band[8] = { 1, 2, 2, 2, 1, 2, 0, 0 }; + struct prio_sched_data *q; + int i; + + q = (struct prio_sched_data *)sch->data; + q->bands = 3; + memcpy(q->prio2band, prio2band, sizeof(prio2band)); + for (i=0; ibands; i++) + q->queues[i] = &noop_qdisc; + return 0; +} + +struct Qdisc_ops prio_ops = +{ + NULL, + "prio", + 0, + sizeof(struct prio_sched_data), + prio_enqueue, + prio_dequeue, + prio_reset, + prio_destroy, + prio_init, +}; + +#ifdef MODULE +#include +int init_module(void) +{ + int err; + + /* Load once and never free it. */ + MOD_INC_USE_COUNT; + + err = register_qdisc(&prio_ops); + if (err) + MOD_DEC_USE_COUNT; + return err; +} + +void cleanup_module(void) +{ +} +#endif diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_red.c linux/net/sched/sch_red.c --- v2.1.67/linux/net/sched/sch_red.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_red.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,303 @@ +/* + * net/sched/sch_red.c Random Early Detection scheduler. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Random Early Detection (RED) algorithm. + ======================================= + + Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways + for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking. + + This file codes a "divisionless" version of RED algorithm + written down in Fig.17 of the paper. + +Short description. +------------------ + + When new packet arrives we calculate average queue length: + + avg = (1-W)*avg + W*current_queue_len, + + W is filter time constant (choosen as 2^(-Wlog)), controlling + inertia of algorithm. To allow larger bursts, W should be + decreased. + + if (avg > th_max) -> packet marked (dropped). + if (avg < th_min) -> packet passes. + if (th_min < avg < th_max) we calculate probability: + + Pb = max_P * (avg - th_min)/(th_max-th_min) + + and mark (drop) packet with this probability. + Pb changes from 0 (at avg==th_min) to max_P (avg==th_max). + max_P should be small (not 1!). + + NB. SF&VJ assumed that Pb[avg] is linear function. I think it + is wrong. I'd make: + P[th_min] = 0, P[th_max] = 1; + dP/davg[th_min] = 0, dP/davg[th_max] = infinity, or a large number. + + I choose max_P as a number between 0.01 and 0.1, so that + C1 = max_P/(th_max-th_min) is power of two: C1 = 2^(-C1log) + + Parameters, settable by user (with default values): + + qmaxbytes=256K - hard limit on queue length, should be chosen >qth_max + to allow packet bursts. This parameter does not + affect algorithm behaviour and can be chosen + arbitrarily high (well, less than ram size) + Really, this limit will never be achieved + if RED works correctly. + qth_min=32K + qth_max=128K - qth_max should be at least 2*qth_min + Wlog=8 - log(1/W). + Alog=Wlog - fixed point position in th_min and th_max. + Rlog=10 + C1log=24 - C1log = trueC1log+Alog-Rlog + so that trueC1log=22 and max_P~0.02 + + +NOTES: + +Upper bound on W. +----------------- + + If you want to allow bursts of L packets of size S, + you should choose W: + + L + 1 -th_min/S < (1-(1-W)^L)/W + + For th_min/S = 32 + + log(W) L + -1 33 + -2 35 + -3 39 + -4 46 + -5 57 + -6 75 + -7 101 + -8 135 + -9 190 + etc. + */ + +struct red_sched_data +{ +/* Parameters */ + unsigned long qmaxbytes; /* HARD maximal queue length */ + unsigned long qth_min; /* Min average length threshold: A scaled */ + unsigned long qth_max; /* Max average length threshold: A scaled */ + char Alog; /* Point position in average lengths */ + char Wlog; /* log(W) */ + char Rlog; /* random number bits */ + char C1log; /* log(1/C1) */ + char Slog; + char Stab[256]; + +/* Variables */ + unsigned long qbytes; /* Queue length in bytes */ + unsigned long qave; /* Average queue length: A scaled */ + int qcount; /* Packets since last random number generation */ + unsigned qR; /* Cached random number [0..1data; + + psched_time_t now; + + if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { + long us_idle; + PSCHED_SET_PASTPERFECT(q->qidlestart); + PSCHED_GET_TIME(now); + us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, (256<Slog)-1, 0); + +/* It is wrong, but I do not think that SF+VJ proposal is reasonable + and did not invented anything more clever 8) + + The problem: ideally, average length queue recalcultion should + be done over constant clock intervals. It is too expensive, so that + calculation is driven by outgoing packets. + When queue is idle we have to model this clock by hands. + + SF+VJ proposed to "generate" m = (idletime/bandwidth)*average_pkt_size + dummy packets as burst after idle time, i.e. + + q->qave *= (1-W)^m + + It is apparently overcomplicated solution (f.e. we have to precompute + a table to make this calculation for reasonable time) + I believe, that a simpler model may be used here, + but it is field for experiments. +*/ + q->qave >>= q->Stab[(us_idle>>q->Slog)&0xFF]; + } + + q->qave += ((q->qbytes<Alog) - q->qave) >> q->Wlog; + + if (q->qave < q->qth_min) { +enqueue: + q->qcount = -1; + if (q->qbytes <= q->qmaxbytes) { + skb_queue_tail(&sch->q, skb); + q->qbytes += skb->len; + return 1; + } +drop: + kfree_skb(skb, FREE_WRITE); + return 0; + } + if (q->qave >= q->qth_max) { + q->qcount = -1; + goto drop; + } + q->qcount++; + if (q->qcount++) { + if ((((q->qave - q->qth_min)*q->qcount)>>q->C1log) < q->qR) + goto enqueue; + q->qcount = 0; + q->qR = red_random(q->Rlog); + goto drop; + } + q->qR = red_random(q->Rlog); + goto enqueue; +} + +static struct sk_buff * +red_dequeue(struct Qdisc* sch) +{ + struct sk_buff *skb; + struct red_sched_data *q = (struct red_sched_data *)sch->data; + + skb = skb_dequeue(&sch->q); + if (skb) { + q->qbytes -= skb->len; + return skb; + } + PSCHED_GET_TIME(q->qidlestart); + return NULL; +} + +static void +red_reset(struct Qdisc* sch) +{ + struct red_sched_data *q = (struct red_sched_data *)sch->data; + struct sk_buff *skb; + + while((skb=skb_dequeue(&sch->q))!=NULL) { + q->qbytes -= skb->len; + kfree_skb(skb,FREE_WRITE); + } + if (q->qbytes) { + printk("red_reset: qbytes=%lu\n", q->qbytes); + q->qbytes = 0; + } + PSCHED_SET_PASTPERFECT(q->qidlestart); + q->qave = 0; + q->qcount = -1; +} + +static int red_init(struct Qdisc *sch, struct pschedctl *pctl) +{ + struct red_sched_data *q; + struct redctl *ctl = (struct redctl*)pctl->args; + + q = (struct red_sched_data *)sch->data; + + if (pctl->arglen < sizeof(struct redctl)) + return -EINVAL; + + q->Wlog = ctl->Wlog; + q->Alog = ctl->Alog; + q->Rlog = ctl->Rlog; + q->C1log = ctl->C1log; + q->Slog = ctl->Slog; + q->qth_min = ctl->qth_min; + q->qth_max = ctl->qth_max; + q->qmaxbytes = ctl->qmaxbytes; + memcpy(q->Stab, ctl->Stab, 256); + + q->qcount = -1; + PSCHED_SET_PASTPERFECT(q->qidlestart); + return 0; +} + +struct Qdisc_ops red_ops = +{ + NULL, + "red", + 0, + sizeof(struct red_sched_data), + red_enqueue, + red_dequeue, + red_reset, + NULL, + red_init, + NULL +}; + + +#ifdef MODULE +#include +int init_module(void) +{ + int err; + + /* Load once and never free it. */ + MOD_INC_USE_COUNT; + + err = register_qdisc(&red_ops); + if (err) + MOD_DEC_USE_COUNT; + return err; +} + +void cleanup_module(void) +{ +} +#endif diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_sfq.c linux/net/sched/sch_sfq.c --- v2.1.67/linux/net/sched/sch_sfq.c Wed Dec 31 16:00:00 1969 +++ linux/net/sched/sch_sfq.c Sun Nov 30 14:00:40 1997 @@ -0,0 +1,333 @@ +/* + * net/sched/sch_sfq.c Stochastic Fairness Queueing scheduler. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Stochastic Fairness Queuing algorithm. + ======================================= + + Source: + Paul E. McKenney "Stochastic Fairness Queuing", + IEEE INFOCOMM'90 Proceedings, San Francisco, 1990. + + Paul E. McKenney "Stochastic Fairness Queuing", + "Interworking: Research and Experience", v.2, 1991, p.113-131. + + + See also: + M. Shreedhar and George Varghese "Efficient Fair + Queuing using Deficit Round Robin", Proc. SIGCOMM 95. + + + It is not the thing that usually called (W)FQ nowadays. It does not + use any timestamp mechanism, but instead processes queues + in round-robin order. + + ADVANTAGE: + + - It is very cheap. Both CPU and memory requirements are minimal. + + DRAWBACKS: + + - "Stochastic" -> It is not 100% fair. + When hash collisions occur, several flows are considred as one. + + - "Round-robin" -> It introduces larger delays than virtual clock + based schemes, and should not be used for isolation interactive + traffic from non-interactive. It means, that this scheduler + should be used as leaf of CBQ or P3, which put interactive traffic + to higher priority band. + + We still need true WFQ for top level CSZ, but using WFQ + for the best effort traffic is absolutely pointless: + SFQ is superior for this purpose. + + IMPLEMENTATION: + This implementation limits maximal queue length to 128; + maximal mtu to 2^15-1; number of hash buckets to 1024. + The only goal of this restrictions was that all data + fitted to one 4K page :-). Struct sfq_sched_data is + organized in anti-cache manner: all the data for bucket + scattered over different locations. It is not good, + but it allowed to put it into 4K. + + It is easy to increase these values. +*/ + +#define SFQ_DEPTH 128 +#define SFQ_HASH_DIVISOR 1024 + +#define SFQ_HASH(a) 0 + +/* This type should contain at least SFQ_DEPTH*2 values */ +typedef unsigned char sfq_index; + +struct sfq_head +{ + sfq_index next; + sfq_index prev; +}; + +struct sfq_sched_data +{ +/* Parameters */ + unsigned quantum; /* Allotment per round: MUST BE >= MTU */ + +/* Variables */ + sfq_index tail; /* Index of current slot in round */ + sfq_index max_depth; /* Maximal depth */ + + sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ + sfq_index next[SFQ_DEPTH]; /* Active slots link */ + short allot[SFQ_DEPTH]; /* Current allotment per slot */ + unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */ + struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */ + struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ +}; + +extern __inline__ void sfq_link(struct sfq_sched_data *q, sfq_index x) +{ + sfq_index p, n; + int d = q->qs[x].qlen; + + p = d; + n = q->dep[d].next; + q->dep[x].next = n; + q->dep[x].prev = p; + q->dep[p].next = q->dep[n].prev = x; +} + +extern __inline__ void sfq_dec(struct sfq_sched_data *q, sfq_index x) +{ + sfq_index p, n; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + + if (n == p && q->max_depth == q->qs[x].qlen + 1) + q->max_depth--; + + sfq_link(q, x); +} + +extern __inline__ void sfq_inc(struct sfq_sched_data *q, sfq_index x) +{ + sfq_index p, n; + int d; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + d = q->qs[x].qlen; + if (q->max_depth < d) + q->max_depth = d; + + sfq_link(q, x); +} + +static __inline__ void sfq_drop(struct sfq_sched_data *q) +{ + struct sk_buff *skb; + sfq_index d = q->max_depth; + + /* Queue is full! Find the longest slot and + drop a packet from it */ + + if (d != 1) { + sfq_index x = q->dep[d].next; + skb = q->qs[x].prev; + __skb_unlink(skb, &q->qs[x]); + kfree_skb(skb, FREE_WRITE); + sfq_dec(q, x); +/* + sch->q.qlen--; + */ + return; + } + + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ + + d = q->next[q->tail]; + q->next[q->tail] = q->next[d]; + q->allot[q->next[d]] += q->quantum; + skb = q->qs[d].prev; + __skb_unlink(skb, &q->qs[d]); + kfree_skb(skb, FREE_WRITE); + sfq_dec(q, d); +/* + sch->q.qlen--; + */ + q->ht[q->hash[d]] = SFQ_DEPTH; + return; +} + +static int +sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; + unsigned hash = SFQ_HASH(skb); + sfq_index x; + + x = q->ht[hash]; + if (x == SFQ_DEPTH) { + q->ht[hash] = x = q->dep[SFQ_DEPTH].next; + q->hash[x] = hash; + } + __skb_queue_tail(&q->qs[x], skb); + sfq_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ + if (q->tail == SFQ_DEPTH) { /* It is the first flow */ + q->tail = x; + q->next[x] = x; + q->allot[x] = q->quantum; + } else { + q->next[x] = q->next[q->tail]; + q->next[q->tail] = x; + q->tail = x; + } + } + if (++sch->q.qlen < SFQ_DEPTH-1) + return 1; + + sfq_drop(q); + return 0; +} + +static struct sk_buff * +sfq_dequeue(struct Qdisc* sch) +{ + struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data; + struct sk_buff *skb; + sfq_index a, old_a; + + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; + + a = old_a = q->next[q->tail]; + + /* Grab packet */ + skb = __skb_dequeue(&q->qs[a]); + sfq_dec(q, a); + sch->q.qlen--; + + /* Is the slot empty? */ + if (q->qs[a].qlen == 0) { + a = q->next[a]; + if (a == old_a) { + q->tail = SFQ_DEPTH; + return skb; + } + q->next[q->tail] = a; + q->allot[a] += q->quantum; + } else if ((q->allot[a] -= skb->len) <= 0) { + q->tail = a; + a = q->next[a]; + q->allot[a] += q->quantum; + } + return skb; +} + +static void +sfq_reset(struct Qdisc* sch) +{ + struct sk_buff *skb; + + while ((skb = sfq_dequeue(sch)) != NULL) + kfree_skb(skb, FREE_WRITE); +} + + +static int sfq_open(struct Qdisc *sch, void *arg) +{ + struct sfq_sched_data *q; + int i; + + q = (struct sfq_sched_data *)sch->data; + + for (i=0; iht[i] = SFQ_DEPTH; + for (i=0; iqs[i]); + q->dep[i+SFQ_DEPTH].next = i+SFQ_DEPTH; + q->dep[i+SFQ_DEPTH].prev = i+SFQ_DEPTH; + } + q->max_depth = 0; + q->tail = SFQ_DEPTH; + q->quantum = sch->dev->mtu; + if (sch->dev->hard_header) + q->quantum += sch->dev->hard_header_len; + for (i=0; i + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Simple Token Bucket Filter. + ======================================= + + SOURCE. + + None. + + ALGORITHM. + + Sequence of packets satisfy token bucket filter with + rate $r$ and depth $b$, if all the numbers defined by: + \begin{eqnarray*} + n_0 &=& b, \\ + n_i &=& {\rm max} ( b, n_{i-1} + r*(t_i-t_{i-1}) - L_i ), + \end{eqnarray*} + where $t_i$ --- departure time of $i$-th packet and + $L_i$ -- its length, never less than zero. + + It is convenient to rescale $n_i$ by factor $r$, so + that the sequence has "canonical" form: + \[ + n_0 = b/r, + n_i = max { b/r, n_{i-1} + t_i - t_{i-1} - L_i/r }, + \] + + If a packet has n_i < 0, we throttle filter + by $-n_i$ usecs. + + NOTES. + + If TBF throttles, it starts watchdog timer, which will wake up it + after 0...10 msec. + If no new packets will arrive during this period, + or device will not be awaken by EOI for previous packet, + tbf could stop its activity for 10 msec. + + It means that tbf will sometimes introduce pathological + 10msec delays to flow corresponding to rate*10msec bytes. + For 10Mbit/sec flow it is about 12Kb, on 100Mbit/sec -- ~100Kb. + This number puts lower reasonbale bound on token bucket depth, + but even if depth is larger traffic is erratic at large rates. + + This problem is not specific for THIS implementation. Really, + there exists statement that any attempt to shape traffic + in transit will increase delays and jitter much more than + we expected naively. + + Particularily, it means that delay/jitter sensitive traffic + MUST NOT be shaped. Cf. CBQ (wrong) and CSZ (correct) approaches. +*/ + +struct tbf_sched_data +{ +/* Parameters */ + int cell_log; /* 1<= MTU/B */ + unsigned long max_bytes; /* Maximal length of backlog: bytes */ + +/* Variables */ + unsigned long bytes; /* Current length of backlog */ + unsigned long tokens; /* Current number of tokens */ + psched_time_t t_c; /* Time check-point */ + struct timer_list wd_timer; /* Watchdog timer */ +}; + +#define L2T(q,L) ((q)->L_tab[(L)>>(q)->cell_log]) + +static int +tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + + __skb_queue_tail(&sch->q, skb); + if ((q->bytes += skb->len) <= q->max_bytes) + return 1; + + /* Drop action: undo the things that we just made, + * i.e. make tail drop + */ + + __skb_unlink(skb, &sch->q); + q->bytes -= skb->len; + kfree_skb(skb, FREE_WRITE); + return 0; +} + +static void tbf_watchdog(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc*)arg; + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + + q->wd_timer.function = NULL; + + qdisc_wakeup(sch->dev); +} + + +static struct sk_buff * +tbf_dequeue(struct Qdisc* sch) +{ + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + struct sk_buff *skb; + + skb = __skb_dequeue(&sch->q); + + if (skb) { + psched_time_t now; + long toks; + + PSCHED_GET_TIME(now); + + toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->depth, 0) + + q->tokens - L2T(q,skb->len); + + if (toks >= 0) { + q->t_c = now; + q->tokens = toks <= q->depth ? toks : q->depth; + q->bytes -= skb->len; + return skb; + } + + /* Maybe, we have in queue a shorter packet, + which can be sent now. It sounds cool, + but, however, wrong in principle. + We MUST NOT reorder packets in these curcumstances. + + Really, if we splitted flow to independent + subflows, it would be very good solution. + Look at sch_csz.c. + */ + __skb_queue_head(&sch->q, skb); + + if (!sch->dev->tbusy) { + if (q->wd_timer.function) + del_timer(&q->wd_timer); + q->wd_timer.function = tbf_watchdog; + q->wd_timer.expires = jiffies + PSCHED_US2JIFFIE(-toks); + add_timer(&q->wd_timer); + } + } + return NULL; +} + + +static void +tbf_reset(struct Qdisc* sch) +{ + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sch->q)) != NULL) + kfree_skb(skb, FREE_WRITE); + q->bytes = 0; + PSCHED_GET_TIME(q->t_c); + q->tokens = q->depth; + if (q->wd_timer.function) { + del_timer(&q->wd_timer); + q->wd_timer.function = NULL; + } +} + +static int tbf_init(struct Qdisc* sch, void *arg) +{ + struct tbf_sched_data *q = (struct tbf_sched_data *)sch->data; + struct tbfctl *ctl = (struct tbfctl*)arg; + + PSCHED_GET_TIME(q->t_c); + init_timer(&q->wd_timer); + q->wd_timer.function = NULL; + q->wd_timer.data = (unsigned long)sch; + if (ctl) { + q->max_bytes = ctl->bytes; + q->depth = ctl->depth; + q->tokens = q->tokens; + q->cell_log = ctl->cell_log; + memcpy(q->L_tab, ctl->L_tab, 256*sizeof(unsigned long)); + } + return 0; +} + +struct Qdisc_ops tbf_ops = +{ + NULL, + "tbf", + 0, + sizeof(struct tbf_sched_data), + tbf_enqueue, + tbf_dequeue, + tbf_reset, + NULL, + tbf_init, + NULL, +}; + + +#ifdef MODULE +#include +int init_module(void) +{ + int err; + + /* Load once and never free it. */ + MOD_INC_USE_COUNT; + + err = register_qdisc(&tbf_ops); + if (err) + MOD_DEC_USE_COUNT; + return err; +} + +void cleanup_module(void) +{ +} +#endif diff -u --recursive --new-file v2.1.67/linux/net/socket.c linux/net/socket.c --- v2.1.67/linux/net/socket.c Sat Oct 25 02:44:18 1997 +++ linux/net/socket.c Sun Nov 30 14:00:40 1997 @@ -74,18 +74,16 @@ #include #include #include +#include #if defined(CONFIG_KERNELD) && defined(CONFIG_NET) #include #endif -#include - #include #include #include -#include #include #include #include @@ -103,7 +101,8 @@ size_t size, loff_t *ppos); static int sock_close(struct inode *inode, struct file *file); -static unsigned int sock_poll(struct file *file, poll_table *wait); +static unsigned int sock_poll(struct file *file, + struct poll_table_struct *wait); static int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); static int sock_fasync(struct file *filp, int on); @@ -1158,8 +1157,11 @@ * skbuff accounting stops it from going too far. * I hope this is correct. */ - if (msg_sys.msg_controllen > sizeof(ctl) && - msg_sys.msg_controllen <= 256) + if (msg_sys.msg_controllen > 256) { + err = -EINVAL; + goto failed2; + } + if (msg_sys.msg_controllen > sizeof(ctl)) { ctl_buf = kmalloc(msg_sys.msg_controllen, GFP_KERNEL); if (ctl_buf == NULL) @@ -1176,11 +1178,11 @@ msg_sys.msg_control = ctl_buf; } msg_sys.msg_flags = flags; - if (current->files->fd[fd]->f_flags & O_NONBLOCK) - msg_sys.msg_flags |= MSG_DONTWAIT; if ((sock = sockfd_lookup(fd,&err))!=NULL) { + if (current->files->fd[fd]->f_flags & O_NONBLOCK) + msg_sys.msg_flags |= MSG_DONTWAIT; err = sock_sendmsg(sock, &msg_sys, total_len); sockfd_put(sock); } @@ -1246,11 +1248,10 @@ cmsg_ptr = (unsigned long)msg_sys.msg_control; msg_sys.msg_flags = 0; - if (current->files->fd[fd]->f_flags&O_NONBLOCK) - flags |= MSG_DONTWAIT; - if ((sock = sockfd_lookup(fd, &err))!=NULL) { + if (current->files->fd[fd]->f_flags&O_NONBLOCK) + flags |= MSG_DONTWAIT; err=sock_recvmsg(sock, &msg_sys, total_len, flags); if(err>=0) len=err; @@ -1392,9 +1393,10 @@ int sock_register(struct net_proto_family *ops) { - if (ops->family < 0 || ops->family >= NPROTO) - return -1; - + if (ops->family >= NPROTO) { + printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); + return -ENOBUFS; + } net_families[ops->family]=ops; return 0; } @@ -1450,13 +1452,6 @@ sk_init(); - /* - * The netlink device handler may be needed early. - */ - -#ifdef CONFIG_NETLINK - init_netlink(); -#endif /* * Wan router layer. @@ -1479,6 +1474,17 @@ */ proto_init(); + + /* + * The netlink device handler may be needed early. + */ + +#ifdef CONFIG_RTNETLINK + rtnetlink_init(); +#endif +#ifdef CONFIG_NETLINK_DEV + init_netlink(); +#endif } int socket_get_info(char *buffer, char **start, off_t offset, int length) diff -u --recursive --new-file v2.1.67/linux/net/sunrpc/clnt.c linux/net/sunrpc/clnt.c --- v2.1.67/linux/net/sunrpc/clnt.c Sat Nov 29 11:25:12 1997 +++ linux/net/sunrpc/clnt.c Sun Nov 30 10:59:03 1997 @@ -60,8 +60,6 @@ static u32 * call_header(struct rpc_task *task); static u32 * call_verify(struct rpc_task *task); -#define _S(nr) (1 << ((nr) - 1)) - /* * Create an RPC client * FIXME: This should also take a flags argument (as in task->tk_flags). @@ -197,19 +195,24 @@ int flags, rpc_action func, void *data) { struct rpc_task my_task, *task = &my_task; - unsigned long oldmask, sigallow = _S(SIGKILL); + unsigned long sigallow = sigmask(SIGKILL); + sigset_t oldset; + unsigned long irqflags; int async, status; /* Turn off various signals */ if (clnt->cl_intr) { - struct sigaction *action = current->sig->action; - if (action[SIGINT-1].sa_handler == SIG_DFL) - sigallow |= _S(SIGINT); - if (action[SIGQUIT-1].sa_handler == SIG_DFL) - sigallow |= _S(SIGQUIT); - } - oldmask = current->blocked; - current->blocked |= ~sigallow; + struct k_sigaction *action = current->sig->action; + if (action[SIGINT-1].sa.sa_handler == SIG_DFL) + sigallow |= sigmask(SIGINT); + if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL) + sigallow |= sigmask(SIGQUIT); + } + spin_lock_irqsave(¤t->sigmask_lock, irqflags); + oldset = current->blocked; + siginitsetinv(¤t->blocked, sigallow & ~oldset.sig[0]); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, irqflags); /* Create/initialize a new RPC task */ if ((async = (flags & RPC_TASK_ASYNC)) != 0) { @@ -238,7 +241,11 @@ } out: - current->blocked = oldmask; + spin_lock_irqsave(¤t->sigmask_lock, irqflags); + current->blocked = oldset; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, irqflags); + return status; } diff -u --recursive --new-file v2.1.67/linux/net/sunrpc/sched.c linux/net/sunrpc/sched.c --- v2.1.67/linux/net/sunrpc/sched.c Wed Oct 15 16:04:24 1997 +++ linux/net/sunrpc/sched.c Sun Nov 30 10:59:03 1997 @@ -23,8 +23,6 @@ static int rpc_task_id = 0; #endif -#define _S(signo) (1 << ((signo)-1)) - /* * We give RPC the same get_free_pages priority as NFS */ @@ -410,9 +408,7 @@ * break the loop here, but go around once more. */ if (0 && !RPC_IS_ASYNC(task) && signalled()) { - dprintk("RPC: %4d got signal (map %08lx)\n", - task->tk_pid, - current->signal & ~current->blocked); + dprintk("RPC: %4d got signal\n", task->tk_pid); rpc_exit(task, -ERESTARTSYS); } } @@ -746,7 +742,7 @@ exit_files(current); exit_mm(current); - current->blocked |= ~_S(SIGKILL); + siginitsetinv(¤t->blocked, sigmask(SIGKILL)); current->session = 1; current->pgrp = 1; sprintf(current->comm, "rpciod"); @@ -754,13 +750,13 @@ dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); while (rpciod_users) { if (signalled()) { - if (current->signal & _S(SIGKILL)) { + if (sigismember(¤t->signal, SIGKILL)) { rpciod_killall(); } else { printk("rpciod: ignoring signal (%d users)\n", rpciod_users); } - current->signal &= current->blocked; + flush_signals(current); } __rpc_schedule(); @@ -795,17 +791,32 @@ static void rpciod_killall(void) { - while (all_tasks) { - unsigned long oldsig = current->signal; + unsigned long flags; + sigset_t old_set; + + /* FIXME: What had been going on before was saving and restoring + current->signal. This as opposed to blocking signals? Do we + still need them to wake up out of schedule? In any case it + isn't playing nice and a better way should be found. */ + + spin_lock_irqsave(¤t->sigmask_lock, flags); + old_set = current->blocked; + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); - current->signal = 0; + while (all_tasks) { rpc_killall_tasks(NULL); __rpc_schedule(); current->timeout = jiffies + HZ / 100; need_resched = 1; schedule(); - current->signal = oldsig; } + + spin_lock_irqsave(¤t->sigmask_lock, flags); + current->blocked = old_set; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); } /* @@ -846,7 +857,7 @@ void rpciod_down(void) { - unsigned long oldflags; + unsigned long flags; MOD_INC_USE_COUNT; down(&rpciod_sema); @@ -867,8 +878,7 @@ * Usually rpciod will exit very quickly, so we * wait briefly before checking the process id. */ - oldflags = current->signal; - current->signal = 0; + current->flags &= ~PF_SIGPENDING; current->state = TASK_INTERRUPTIBLE; current->timeout = jiffies + 1; schedule(); @@ -884,7 +894,9 @@ } interruptible_sleep_on(&rpciod_killer); } - current->signal = oldflags; + spin_lock_irqsave(¤t->sigmask_lock, flags); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); out: up(&rpciod_sema); MOD_DEC_USE_COUNT; diff -u --recursive --new-file v2.1.67/linux/net/sunrpc/sunrpc_syms.c linux/net/sunrpc/sunrpc_syms.c --- v2.1.67/linux/net/sunrpc/sunrpc_syms.c Tue Sep 23 16:48:50 1997 +++ linux/net/sunrpc/sunrpc_syms.c Sun Nov 30 14:00:40 1997 @@ -73,10 +73,10 @@ /* RPC statistics */ #ifdef CONFIG_PROC_FS +EXPORT_SYMBOL(rpc_proc_init); EXPORT_SYMBOL(rpc_proc_register); EXPORT_SYMBOL(rpc_register_sysctl); EXPORT_SYMBOL(rpc_proc_unregister); -EXPORT_SYMBOL(rpc_proc_init); EXPORT_SYMBOL(rpc_proc_read); EXPORT_SYMBOL(svc_proc_register); EXPORT_SYMBOL(svc_proc_unregister); diff -u --recursive --new-file v2.1.67/linux/net/sunrpc/svc.c linux/net/sunrpc/svc.c --- v2.1.67/linux/net/sunrpc/svc.c Wed Oct 15 16:04:24 1997 +++ linux/net/sunrpc/svc.c Sun Nov 30 10:59:03 1997 @@ -169,7 +169,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) { struct svc_program *progp; - unsigned long oldsigs = 0; + unsigned long flags; + sigset_t old_set; int i, error = 0, dummy; progp = serv->sv_program; @@ -177,9 +178,17 @@ dprintk("RPC: svc_register(%s, %s, %d)\n", progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port); + /* FIXME: What had been going on before was saving and restoring + current->signal. This as opposed to blocking signals? Do we + still need them to wake up out of schedule? In any case it + isn't playing nice and a better way should be found. */ + if (!port) { - oldsigs = current->signal; - current->signal = 0; + spin_lock_irqsave(¤t->sigmask_lock, flags); + old_set = current->blocked; + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); } for (i = 0; i < progp->pg_nvers; i++) { @@ -193,7 +202,14 @@ break; } } - current->signal |= oldsigs; + + if (!port) { + spin_lock_irqsave(¤t->sigmask_lock, flags); + current->blocked = old_set; + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + } + return error; } diff -u --recursive --new-file v2.1.67/linux/net/sysctl_net.c linux/net/sysctl_net.c --- v2.1.67/linux/net/sysctl_net.c Mon Jun 16 16:36:02 1997 +++ linux/net/sysctl_net.c Sun Nov 30 14:00:40 1997 @@ -24,7 +24,11 @@ extern ctl_table ipx_table[]; #endif -extern ctl_table core_table[], unix_table[]; +extern ctl_table core_table[]; + +#ifdef CONFIG_UNIX +extern ctl_table unix_table[]; +#endif #ifdef CONFIG_NET extern ctl_table ether_table[], e802_table[]; @@ -44,7 +48,9 @@ ctl_table net_table[] = { {NET_CORE, "core", NULL, 0, 0555, core_table}, +#ifdef CONFIG_UNIX {NET_UNIX, "unix", NULL, 0, 0555, unix_table}, +#endif #ifdef CONFIG_NET {NET_802, "802", NULL, 0, 0555, e802_table}, {NET_ETHER, "ethernet", NULL, 0, 0555, ether_table}, diff -u --recursive --new-file v2.1.67/linux/net/unix/Makefile linux/net/unix/Makefile --- v2.1.67/linux/net/unix/Makefile Mon Apr 7 11:35:33 1997 +++ linux/net/unix/Makefile Sun Nov 30 14:00:40 1997 @@ -9,6 +9,7 @@ O_TARGET := unix.o O_OBJS := af_unix.o garbage.o +M_OBJS := $(O_TARGET) ifeq ($(CONFIG_SYSCTL),y) O_OBJS += sysctl_net_unix.o diff -u --recursive --new-file v2.1.67/linux/net/unix/af_unix.c linux/net/unix/af_unix.c --- v2.1.67/linux/net/unix/af_unix.c Wed Sep 24 20:05:49 1997 +++ linux/net/unix/af_unix.c Sun Nov 30 14:00:40 1997 @@ -26,6 +26,7 @@ * Alan Cox : Started POSIXisms * Andreas Schwab : Replace inode by dentry for proper * reference counting + * Kirk Petersen : Made this a module * * Known differences from reference BSD that was tested: * @@ -57,6 +58,7 @@ * with BSD names. */ +#include #include #include #include @@ -310,6 +312,9 @@ sk->dead=1; unix_delayed_delete(sk); /* Try every so often until buffers are all freed */ } + + /* socket destroyed, decrement count */ + MOD_DEC_USE_COUNT; } static int unix_listen(struct socket *sock, int backlog) @@ -373,6 +378,10 @@ sk->mtu=4096; sk->protinfo.af_unix.list=&unix_sockets_unbound; unix_insert_socket(sk); + + /* socket created, increment count */ + MOD_INC_USE_COUNT; + return 0; } @@ -1465,7 +1474,14 @@ unix_create }; +#ifdef MODULE +extern void unix_sysctl_register(void); +extern void unix_sysctl_unregister(void); + +int init_module(void) +#else __initfunc(void unix_proto_init(struct net_proto *pro)) +#endif { struct sk_buff *dummy_skb; struct proc_dir_entry *ent; @@ -1474,14 +1490,33 @@ if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { printk(KERN_CRIT "unix_proto_init: panic\n"); +#ifdef MODULE + return -1; +#else return; +#endif } sock_register(&unix_family_ops); #ifdef CONFIG_PROC_FS ent = create_proc_entry("net/unix", 0, 0); ent->read_proc = unix_read_proc; #endif + +#ifdef MODULE + unix_sysctl_register(); + + return 0; +#endif } + +#ifdef MODULE +void cleanup_module(void) +{ + sock_unregister(AF_UNIX); + unix_sysctl_unregister(); +} +#endif + /* * Local variables: * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c" diff -u --recursive --new-file v2.1.67/linux/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c --- v2.1.67/linux/net/unix/sysctl_net_unix.c Mon Jul 7 08:20:00 1997 +++ linux/net/unix/sysctl_net_unix.c Sun Nov 30 14:00:40 1997 @@ -29,4 +29,31 @@ &proc_dointvec_jiffies}, {0} }; -#endif + +#ifdef MODULE +static struct ctl_table_header * unix_sysctl_header; +static struct ctl_table unix_root_table[]; +static struct ctl_table unix_net_table[]; + +ctl_table unix_root_table[] = { + {CTL_NET, "net", NULL, 0, 0555, unix_net_table}, + {0} +}; + +ctl_table unix_net_table[] = { + {NET_UNIX, "unix", NULL, 0, 0555, unix_table}, + {0} +}; + +void unix_sysctl_register(void) +{ + unix_sysctl_header = register_sysctl_table(unix_root_table, 0); +} + +void unix_sysctl_unregister(void) +{ + unregister_sysctl_table(unix_sysctl_header); +} +#endif /* MODULE */ + +#endif /* CONFIG_SYSCTL */ diff -u --recursive --new-file v2.1.67/linux/net/x25/af_x25.c linux/net/x25/af_x25.c --- v2.1.67/linux/net/x25/af_x25.c Wed Sep 24 20:05:49 1997 +++ linux/net/x25/af_x25.c Sun Nov 30 14:00:40 1997 @@ -440,6 +440,7 @@ sock_init_data(NULL, sk); + skb_queue_head_init(&x25->ack_queue); skb_queue_head_init(&x25->fragment_queue); skb_queue_head_init(&x25->interrupt_in_queue); skb_queue_head_init(&x25->interrupt_out_queue); diff -u --recursive --new-file v2.1.67/linux/net/x25/x25_dev.c linux/net/x25/x25_dev.c --- v2.1.67/linux/net/x25/x25_dev.c Mon Jul 7 08:20:00 1997 +++ linux/net/x25/x25_dev.c Sun Nov 30 14:00:40 1997 @@ -177,7 +177,6 @@ } skb->protocol = htons(ETH_P_X25); - skb->priority = SOPRI_NORMAL; skb->dev = neigh->dev; skb->arp = 1; @@ -208,7 +207,6 @@ } skb->protocol = htons(ETH_P_X25); - skb->priority = SOPRI_NORMAL; skb->dev = neigh->dev; skb->arp = 1; @@ -236,7 +234,6 @@ } skb->protocol = htons(ETH_P_X25); - skb->priority = SOPRI_NORMAL; skb->dev = neigh->dev; skb->arp = 1; diff -u --recursive --new-file v2.1.67/linux/net/x25/x25_in.c linux/net/x25/x25_in.c --- v2.1.67/linux/net/x25/x25_in.c Mon Jul 7 08:20:00 1997 +++ linux/net/x25/x25_in.c Sun Nov 30 14:00:40 1997 @@ -174,6 +174,7 @@ sk->protinfo.x25->vr = 0; sk->protinfo.x25->va = 0; sk->protinfo.x25->vl = 0; + x25_requeue_frames(sk); break; case X25_CLEAR_REQUEST: @@ -199,11 +200,9 @@ sk->protinfo.x25->vl = 0; sk->protinfo.x25->state = X25_STATE_4; } else { - if (sk->protinfo.x25->condition & X25_COND_PEER_RX_BUSY) { - sk->protinfo.x25->va = nr; - } else { - x25_check_iframes_acked(sk, nr); - } + x25_frames_acked(sk, nr); + if (frametype == X25_RNR) + x25_requeue_frames(sk); } break; @@ -221,11 +220,7 @@ sk->protinfo.x25->state = X25_STATE_4; break; } - if (sk->protinfo.x25->condition & X25_COND_PEER_RX_BUSY) { - sk->protinfo.x25->va = nr; - } else { - x25_check_iframes_acked(sk, nr); - } + x25_frames_acked(sk, nr); if (sk->protinfo.x25->condition & X25_COND_OWN_RX_BUSY) break; if (ns == sk->protinfo.x25->vr) { @@ -298,6 +293,7 @@ sk->protinfo.x25->vs = 0; sk->protinfo.x25->vl = 0; sk->protinfo.x25->state = X25_STATE_3; + x25_requeue_frames(sk); break; case X25_CLEAR_REQUEST: diff -u --recursive --new-file v2.1.67/linux/net/x25/x25_out.c linux/net/x25/x25_out.c --- v2.1.67/linux/net/x25/x25_out.c Mon Jul 7 08:20:00 1997 +++ linux/net/x25/x25_out.c Sun Nov 30 14:00:40 1997 @@ -126,8 +126,8 @@ void x25_kick(struct sock *sk) { - struct sk_buff *skb; - unsigned short end; + struct sk_buff *skb, *skbn; + unsigned short start, end; int modulus; if (sk->protinfo.x25->state != X25_STATE_3) @@ -149,11 +149,15 @@ return; modulus = (sk->protinfo.x25->neighbour->extended) ? X25_EMODULUS : X25_SMODULUS; + + start = (skb_peek(&sk->protinfo.x25->ack_queue) == NULL) ? sk->protinfo.x25->va : sk->protinfo.x25->vs; end = (sk->protinfo.x25->va + sk->protinfo.x25->facilities.winsize_out) % modulus; - if (sk->protinfo.x25->vs == end) + if (start == end) return; + sk->protinfo.x25->vs = start; + /* * Transmit data until either we're out of data to send or * the window is full. @@ -162,13 +166,25 @@ skb = skb_dequeue(&sk->write_queue); do { + if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + skb_queue_head(&sk->write_queue, skb); + break; + } + + skb_set_owner_w(skbn, sk); + /* - * Transmit the frame. + * Transmit the frame copy. */ - x25_send_iframe(sk, skb); + x25_send_iframe(sk, skbn); sk->protinfo.x25->vs = (sk->protinfo.x25->vs + 1) % modulus; + /* + * Requeue the original data frame. + */ + skb_queue_tail(&sk->protinfo.x25->ack_queue, skb); + } while (sk->protinfo.x25->vs != end && (skb = skb_dequeue(&sk->write_queue)) != NULL); sk->protinfo.x25->vl = sk->protinfo.x25->vr; @@ -193,17 +209,6 @@ sk->protinfo.x25->condition &= ~X25_COND_ACK_PENDING; x25_stop_timer(sk); -} - -void x25_check_iframes_acked(struct sock *sk, unsigned short nr) -{ - if (sk->protinfo.x25->vs == nr) { - sk->protinfo.x25->va = nr; - } else { - if (sk->protinfo.x25->va != nr) { - sk->protinfo.x25->va = nr; - } - } } #endif diff -u --recursive --new-file v2.1.67/linux/net/x25/x25_subr.c linux/net/x25/x25_subr.c --- v2.1.67/linux/net/x25/x25_subr.c Mon Jul 7 08:20:00 1997 +++ linux/net/x25/x25_subr.c Sun Nov 30 14:00:40 1997 @@ -50,6 +50,9 @@ while ((skb = skb_dequeue(&sk->write_queue)) != NULL) kfree_skb(skb, FREE_WRITE); + while ((skb = skb_dequeue(&sk->protinfo.x25->ack_queue)) != NULL) + kfree_skb(skb, FREE_WRITE); + while ((skb = skb_dequeue(&sk->protinfo.x25->interrupt_in_queue)) != NULL) kfree_skb(skb, FREE_READ); @@ -58,6 +61,49 @@ while ((skb = skb_dequeue(&sk->protinfo.x25->fragment_queue)) != NULL) kfree_skb(skb, FREE_READ); +} + + +/* + * This routine purges the input queue of those frames that have been + * acknowledged. This replaces the boxes labelled "V(a) <- N(r)" on the + * SDL diagram. +*/ +void x25_frames_acked(struct sock *sk, unsigned short nr) +{ + struct sk_buff *skb; + int modulus; + + modulus = (sk->protinfo.x25->neighbour->extended) ? X25_EMODULUS : X25_SMODULUS; + + /* + * Remove all the ack-ed frames from the ack queue. + */ + if (sk->protinfo.x25->va != nr) { + while (skb_peek(&sk->protinfo.x25->ack_queue) != NULL && sk->protinfo.x25->va != nr) { + skb = skb_dequeue(&sk->protinfo.x25->ack_queue); + kfree_skb(skb, FREE_WRITE); + sk->protinfo.x25->va = (sk->protinfo.x25->va + 1) % modulus; + } + } +} + +void x25_requeue_frames(struct sock *sk) +{ + struct sk_buff *skb, *skb_prev = NULL; + + /* + * Requeue all the un-ack-ed frames on the output queue to be picked + * up by x25_kick. This arrangement handles the possibility of an empty + * output queue. + */ + while ((skb = skb_dequeue(&sk->protinfo.x25->ack_queue)) != NULL) { + if (skb_prev == NULL) + skb_queue_head(&sk->write_queue, skb); + else + skb_append(skb_prev, skb); + skb_prev = skb; + } } /*