Signed-off-by: Servaas Vandenberghe diff -pru xdm-a/include/dm.h xdm/include/dm.h --- xdm-a/include/dm.h 2011-01-17 18:52:07.000000000 +0100 +++ xdm/include/dm.h 2011-02-07 17:47:05.000000000 +0100 @@ -177,7 +177,8 @@ struct display { pid_t serverPid; /* process id of server (-1 if none) */ FileState state; /* state during HUP processing */ int startTries; /* current start try */ - Time_t lastCrash; /* time of last crash */ + Time_t lastReserv; /* time of last reserver crash */ + int reservTries; /* current reserver try */ # ifdef XDMCP /* XDMCP state */ CARD32 sessionID; /* ID of active session */ @@ -197,6 +198,7 @@ struct display { int openRepeat; /* open attempts to make */ int openTimeout; /* abort open attempt timeout */ int startAttempts; /* number of attempts at starting */ + int reservAttempts; /* allowed start-IO error sequences */ int pingInterval; /* interval between XSync */ int pingTimeout; /* timeout for XSync */ int terminateServer;/* restart for each session */ Only in xdm/include: dm.h~ diff -pru xdm-a/man/xdm.man xdm/man/xdm.man --- xdm-a/man/xdm.man 2011-01-17 18:52:07.000000000 +0100 +++ xdm/man/xdm.man 2011-02-07 17:39:50.000000000 +0100 @@ -450,27 +450,31 @@ See the section .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.openRepeat\fP" .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.openTimeout\fP" .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.startAttempts\fP" +.IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.reservAttempts\fP" These numeric resources control the behavior of .I xdm when attempting to open intransigent servers. \fBopenDelay\fP is the length of the -pause (in seconds) between successive attempts, \fBopenRepeat\fP is the +pause in seconds between successive attempts, \fBopenRepeat\fP is the number of attempts to make, \fBopenTimeout\fP is the amount of time to wait while actually attempting the open (i.e., the maximum time spent in the .IR connect (2) -system call) and \fBstartAttempts\fP is the number of times this entire process -is done before giving up on the server. After \fBopenRepeat\fP attempts have been made, -or if \fBopenTimeout\fP seconds elapse in any particular attempt, +system call) and \fBstartAttempts\fP is the number of times this +entire process is done before giving up on the server. +After \fBopenRepeat\fP attempts have been made, or +if \fBopenTimeout\fP seconds elapse in any particular attempt, .I xdm terminates and restarts the server, attempting to connect again. -This -process is repeated \fBstartAttempts\fP times, at which point the display is -declared dead and disabled. Although +This process is repeated \fBstartAttempts\fP times, at which point +the display is declared dead and disabled. Although this behavior may seem arbitrary, it has been empirically developed and -works quite well on most systems. The default values are -5 for \fBopenDelay\fP, 5 for \fBopenRepeat\fP, 30 for \fBopenTimeout\fP and -4 for \fBstartAttempts\fP. +works quite well on most systems. The bound \fBreservAttempts\fP +is the number of times a successful connect is allowed to be followed +by a fatal error. When reached, the display is disabled. +The default values are +\fBopenDelay\fP: 15, \fBopenRepeat\fP: 5, \fBopenTimeout\fP: 120, +\fBstartAttempts\fP: 4 and \fBreservAttempts\fP: 2. .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.pingInterval\fP" .IP "\fBDisplayManager.\fP\fIDISPLAY\fP\fB.pingTimeout\fP" To discover when remote displays disappear, diff -pru xdm-a/xdm/dm.c xdm/xdm/dm.c --- xdm-a/xdm/dm.c 2011-01-29 04:39:43.000000000 +0100 +++ xdm/xdm/dm.c 2011-02-07 17:40:32.000000000 +0100 @@ -492,6 +492,7 @@ WaitForChild (void) break; case OBEYSESS_DISPLAY: d->startTries = 0; + d->reservTries = 0; Debug ("Display exited with OBEYSESS_DISPLAY\n"); if (d->displayType.lifetime != Permanent || d->status == zombie) @@ -533,11 +534,16 @@ WaitForChild (void) int crash; time(&now); - Debug("time %i %i\n", now, d->lastCrash); - crash = d->lastCrash && - ((now - d->lastCrash) < XDM_BROKEN_INTERVAL); + crash = d->lastReserv && + ((now - d->lastReserv) < XDM_BROKEN_INTERVAL); + Debug("time %i %i try %i of %i%s\n", now, d->lastReserv, + d->reservTries, d->reservAttempts, + crash ? " crash" : ""); - if (crash) { + if (!crash) + d->reservTries = 0; + + if (crash && ++d->reservTries >= d->reservAttempts) { char msg[] = "Server crash frequency too high:" " stopping display"; Debug("%s %s\n", msg, d->name); @@ -561,7 +567,7 @@ WaitForChild (void) } else { RestartDisplay(d, TRUE); } - d->lastCrash = now; + d->lastReserv = now; } break; case waitCompose (SIGTERM,0,0): diff -pru xdm-a/xdm/dpylist.c xdm/xdm/dpylist.c --- xdm-a/xdm/dpylist.c 2011-01-17 18:52:07.000000000 +0100 +++ xdm/xdm/dpylist.c 2011-02-06 08:54:42.000000000 +0100 @@ -241,7 +241,9 @@ NewDisplay (char *name, char *class) d->openTimeout = 0; d->startAttempts = 0; d->startTries = 0; - d->lastCrash = 0; + d->lastReserv = 0; + d->reservAttempts = 0; + d->reservTries = 0; d->terminateServer = 0; d->grabTimeout = 0; #ifdef XDMCP diff -pru xdm-a/xdm/resource.c xdm/xdm/resource.c --- xdm-a/xdm/resource.c 2011-01-17 18:52:07.000000000 +0100 +++ xdm/xdm/resource.c 2011-02-07 13:53:00.000000000 +0100 @@ -222,6 +222,8 @@ struct displayResource serverResources[] "120" }, { "startAttempts","StartAttempts",DM_INT, boffset(startAttempts), "4" }, +{ "reservAttempts","ReservAttempts",DM_INT, boffset(reservAttempts), + "2" }, { "pingInterval","PingInterval",DM_INT, boffset(pingInterval), "5" }, { "pingTimeout","PingTimeout", DM_INT, boffset(pingTimeout),