Unhang TCP connections stuck in FIN_WAIT_2 state
dennis at rlgvax.UUCP
dennis at rlgvax.UUCP
Thu Feb 6 06:22:35 AEST 1986
"Fixtcp" is a shell script which is useful for getting rid of 4.2bsd TCP
connections hung in the FIN_WAIT_2 state.
Steps:
Save this file in some directory.
Remove first lines from this file so that "#! /bin/sh" should
be the first line.
Type "sh file" where file is the name of this file.
cat fixtcp.mk, and follow those directions.
Enjoy,
-dennis
#--------------- CUT HERE ---------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
# _get_tcp_.c
# fixtcp
# fixtcp.mk
# This archive created: Wed Feb 5 15:16:26 EST 1986
#
if test -f _get_tcp_.c
then
echo shar: will not over-write existing file '_get_tcp_.c'
else
echo x - _get_tcp_.c
# ............ F I L E B E G .......... _get_tcp_.c
cat << '\SHAR_EOF' > _get_tcp_.c
/*
* dennis at rlgvax
* prints offsets of fields in TCP connection control block.
* called by fixtcp sh script
*/
#include <stdio.h>
#include <sys/types.h> /* u_char */
#include <netinet/tcp.h> /* tcp_seq typedef */
#include <netinet/tcp_timer.h> /* tcp timers */
#include <netinet/tcp_var.h> /* tcp connection control block */
#include <netinet/tcp_fsm.h> /* defines for tcp states */
/* use S3/S5 strrchr(), but on 4.x systems, remap to Berkeley rindex */
#ifdef BSD4
# define strrchr rindex
#endif
#define STR_SAME !strcmp
#define STR_DIFF strcmp
/* fw non-int functions */
char *basename();
/* external non-int functions */
extern char *strrchr();
main(argc, argv)
int argc;
char **argv;
{
char *cmd;
struct tcpcb *p = 0;
cmd = basename(argv[0]);
if (argc != 2)
{
usage:
fprintf(stderr, "usage: %s state|2msl|FIN_WAIT2|TIME_CLOSE\n", cmd);
exit(1);
}
if (STR_SAME(argv[1], "state"))
printf("0x%x\n", &p->t_state); /* state offset */
else if (STR_SAME(argv[1], "2msl"))
printf("0x%x\n", &p->t_timer[TCPT_2MSL]); /* timer offset */
else if (STR_SAME(argv[1], "FIN_WAIT2"))
printf("0x%x\n", TCPS_FIN_WAIT_2); /* state value */
else if (STR_SAME(argv[1], "TIME_CLOSE"))
printf("0x%x\n", TCPS_TIME_WAIT); /* state value */
else
goto usage;
}
/*
* return basename of full path name
*/
char *
basename(path)
char *path;
{
char *cp; /* general char pointer */
if ((cp = strrchr(path, '/')) == NULL) /* no rightmost slash */
return path;
else
return cp;
}
\SHAR_EOF
# ............ F I L E E N D .......... _get_tcp_.c
fi # end of overwriting check
if test -f fixtcp
then
echo shar: will not over-write existing file 'fixtcp'
else
echo x - fixtcp
# ............ F I L E B E G .......... fixtcp
cat << '\SHAR_EOF' > fixtcp
# fixtcp
# dennis bednar jan 24 86 dennis at rlgvax.uucp
#
# Unhang tcp connections which are stuck in the FIN_WAIT2 state
# These connections can be seen by doing a 4.2 netstat -a command.
#
# Usage:
# invoke as "fixtcp" to display kernel stuff for connections.
# "fixtcp" by itself is HIGHLY RECOMMENDED for the first time!
#
# invoke as "fixtcp fix" to patch kernel memory - you must be root.
# Then do a netstat -a command, and it should have gone away.
#
# CCI only symptom:
# A symptom of this problem is that "startoftp" goes wild restarting
# the receive daemon, and you see a lot of rcvlog.pid files being
# created in the oftp spool directory.
#
# Symptom for everybody else:
# In general, a symptom of this problem is that a tcpopen passive
# will fail with the errno UNIX reason being "Address Already In Use".
#
#
# To correct OFTP problem (CCI only):
# su root
# killoftp; fixtcp fix; startoftp
#
# Internals of how this script works:
# Works by loading the 2 * msl timer (addr+16) in the Connection Control Block
# with a 1, which means it will time out in 1/2 second from now, and
# enter the CLOSE state, and the the CCB will be freed (so you will not
# see it with netstat -a).
# The proper offset for the 2 * msl timer can be seen by examining
# /usr/include/netinet/tcp_var.h include file, plus other tcp*.h files
# in the same directory.
#
# relies on
# _get_tcp_ a.out file that returns the offset of various
# fields in a connecton control block.
# There is a _get_tcp_.c file to create this.
# This was created to avoid problems of offsets
# being site-dependent, if your OS uses different
# offsets.
#
#
# don't print full path name of command in error messages
cmd=`basename $0`
# name of state to look for in the netstat command
# state=ESTABLISHED # debugging
state=FIN_WAIT_2 # really
# get the values of the offsets of the fields the the structure for adb
stateoff=`_get_tcp_ state` # probably 0x8
timer2msloff=`_get_tcp_ 2msl` # probably 0x10
FIN_WAIT2=`_get_tcp_ FIN_WAIT2` # probably 9
FIN_CLOSE=`_get_tcp_ TIME_CLOSE` # probably 10
# remove temp file if SIGHUP, SIGINT, SIGTERM
trap "echo $cmd: interrupted; rm /tmp/fixtcp.$$; exit 1" 1 2 15
# get kernel address of TCP CCB's in FIN_WAIT2 and save in a temporary file
netstat -A | grep $state | sed '1,$s/ .*//p' >/tmp/fixtcp.$$
# check if we got any addresses
if [ ! -s /tmp/fixtcp.$$ ]
then
# file doesn't exist or is zero in length, therefore no addresses
echo "$cmd: Sorry, no tcp connections stuck in $state state."
rm /tmp/fixtcp.$$
exit 0
fi
echo "Before: only connections in state $state"
netstat -a | grep $state
# cat /tmp/fixtcp.$$ # debug
# see if we want to patch kernel memory or just display it
if [ "$1" = "fix" ]
then
# patch by writing
for addr in `cat /tmp/fixtcp.$$`
do
adb -w /vmunix /dev/kmem <<EOF
0x$addr+$timer2msloff/w 1
\$q
EOF
done
sleep 2 # wait for connection to clear
# make sure it really got unstuck
netstat -a | grep $state >/tmp/fixtcp.$$
if [ -s /tmp/fixtcp.$$ ] # file exists and size > 0
then
echo "$cmd: Sorry, TCP connections still hung!!"
rm /tmp/fixtcp.$$
exit 1
else
echo "$cmd: TCP connections in state $state have been unstuck."
fi
else
# just display the current state flag and current 2 * msl timer
for addr in `cat /tmp/fixtcp.$$`
do
echo "The next two numbers displayed by adb should be $FIN_WAIT2 and 0."
echo "The state flag value of $FIN_WAIT2 represents the FIN_WAIT_2 state."
echo "The decimal 0 means the 2 * msl timer is off."
adb /vmunix /dev/kmem <<EOF
0x$addr+8/d
0x$addr+0x10/d
\$q
EOF
done
fi
# cleanup intermediate file
rm /tmp/fixtcp.$$
echo "After: only connections in state $state"
netstat -a | grep $state
exit 0
\SHAR_EOF
# ............ F I L E E N D .......... fixtcp
fi # end of overwriting check
if test -f fixtcp.mk
then
echo shar: will not over-write existing file 'fixtcp.mk'
else
echo x - fixtcp.mk
# ............ F I L E B E G .......... fixtcp.mk
cat << '\SHAR_EOF' > fixtcp.mk
#
# dennis at rlgvax 2/4/86
#
# fixtcp.mk Makefile, this file
# fixtcp shell script
# _get_tcp_.c C program
# _get_tcp_ a.out program called by fixtcp
# .fixtcp.mail header for mail
#
# directions, type
# make -f fixtcp.mk # to make necessary files
# edit fixtcp.mk and change INSTALLDIR
# make -f fixtcp.mk install
# cd $INSTALLDIR # directory where you really installed it
# fixtcp # to display tcp connections hung in finwait2
#
# # don't do this if you have none to unstick
# su root # required for adb write mode
# fixtcp fix # to actually unstuck tcp connections
# change this at your site
INSTALLDIR = .
all: _get_tcp_
clean:
rm -f _get_tcp_
install: _get_tcp_
-cp _get_tcp_ $(INSTALLDIR)
-cp fixtcp $(INSTALLDIR)
# distribute the latest version to the world, private for dennis at rlgvax
dist:
rm -rf /tmp/dpb
mkdir /tmp/dpb
cp fixtcp.mk /tmp/dpb
cp _get_tcp_.c /tmp/dpb
cp ../cmd/fixtcp /tmp/dpb
cp .fixtcp.mail /tmp/dpb
(cd /tmp/dpb; makeshar * >>.fixtcp.mail)
# please note that .fixtcp.mail was chosen so that makeshar *
# doesn't try to append to itself.
\SHAR_EOF
# ............ F I L E E N D .......... fixtcp.mk
fi # end of overwriting check
# end of shell archive
exit 0
--
-Dennis Bednar
{decvax,ihnp4,harpo,allegra}!seismo!rlgvax!dennis UUCP
More information about the Comp.sources.unix
mailing list