From: Alexander Sverdlin Date: Sun, 29 Mar 2015 01:42:56 +0100 Subject: [PATCH] systemd-bootchart: Prevent closing random file descriptors If the kernel has no CONFIG_SCHED_DEBUG option set, systemd-bootchart produces empty .svg file. The reason for this is very fragile file descriptor logic in log_sample() and main() (/* do some cleanup, close fd's */ block). There are many places where file descriptors are closed on failure (missing SCHED_DEBUG provokes it), but there are several problems with it: - following iterations in the loop see that the descriptor is non zero and do not open the corresponding file again; - "some cleanup" code closes already closed files and the descriptors are reused already, in particular for resulting .svg file; - static "vmstat" and "schedstat" variables in log_sample() made the situation even worse. These are the strace fragments: [...] close(7) = -1 EBADF (Bad file descriptor) close(-1) = -1 EBADF (Bad file descriptor) pread(7, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor) close(7) = -1 EBADF (Bad file descriptor) close(-1) = -1 EBADF (Bad file descriptor) pread(7, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor) close(7) = -1 EBADF (Bad file descriptor) close(-1) = -1 EBADF (Bad file descriptor) getdents64(4, /* 0 entries */, 32768) = 0 clock_gettime(CLOCK_MONOTONIC, {24, 783843501}) = 0 nanosleep({0, 5221792}, NULL) = 0 clock_gettime(CLOCK_MONOTONIC, {24, 789726835}) = 0 lseek(4, 0, SEEK_SET) = 0 pread(5, "nr_free_pages 52309\nnr_alloc_bat"..., 4095, 0) = 685 pread(6, "version 15\ntimestamp 4294939775\n"..., 4095, 0) = 86 getdents64(4, /* 99 entries */, 32768) = 2680 pread(7, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor) close(7) = -1 EBADF (Bad file descriptor) close(-1) = -1 EBADF (Bad file descriptor) pread(8, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor) close(8) = -1 EBADF (Bad file descriptor) close(-1) = -1 EBADF (Bad file descriptor) pread(9, 0xbea60a2c, 4095, 0) = -1 EBADF (Bad file descriptor) close(9) = -1 EBADF (Bad file descriptor) [...] where it obviously tries to close same and reused decriptors many times, also passing return code "-1" instead of descriptor... [...] close(7) = -1 EBADF (Bad file descriptor) close(-1) = -1 EBADF (Bad file descriptor) pipe2([7, 8], O_CLOEXEC) = 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xb6fd0068) = 192 close(8) = 0 fcntl64(7, F_SETFD, 0) = 0 fstat64(7, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb6fd2000 read(7, "[ 0.074507] calling vfp_init"..., 4096) = 4096 [...] read(7, "s)\n[ 6.228910] UBIFS: reserve"..., 4096) = 4096 read(7, "trary Executable File Formats Fi"..., 4096) = 1616 read(7, "", 4096) = 0 close(7) = 0 wait4(192, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 192 --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=192, si_uid=0, si_status=0, si_utime=9, si_stime=9} --- munmap(0xb6fd2000, 4096) = 0 fstat64(7, 0xbea62850) = -1 EBADF (Bad file descriptor) mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb6fce000 write(7, "/* because process exited will result in some other victim descriptor being closed later and will therefore disturb the whole collected statistics. --- src/bootchart/store.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/bootchart/store.c b/src/bootchart/store.c index dfa681f..8525f62 100644 --- a/src/bootchart/store.c +++ b/src/bootchart/store.c @@ -152,6 +152,7 @@ void log_sample(int sample, struct list_sample_data **ptr) { n = pread(vmstat, buf, sizeof(buf) - 1, 0); if (n <= 0) { close(vmstat); + vmstat = 0; return; } buf[n] = '\0'; @@ -184,6 +185,7 @@ vmstat_next: n = pread(schedstat, buf, sizeof(buf) - 1, 0); if (n <= 0) { close(schedstat); + schedstat = 0; return; } buf[n] = '\0'; @@ -286,6 +288,7 @@ schedstat_next: s = pread(ps->sched, buf, sizeof(buf) - 1, 0); if (s <= 0) { close(ps->sched); + ps->sched = 0; continue; } buf[s] = '\0'; @@ -387,8 +390,11 @@ schedstat_next: if (s <= 0) { /* clean up our file descriptors - assume that the process exited */ close(ps->schedstat); - if (ps->sched) + ps->schedstat = 0; + if (ps->sched) { close(ps->sched); + ps->sched = 0; + } //if (ps->smaps) // fclose(ps->smaps); continue; @@ -488,8 +494,11 @@ catch_rename: if (s <= 0) { /* clean up file descriptors */ close(ps->sched); - if (ps->schedstat) + ps->sched = 0; + if (ps->schedstat) { close(ps->schedstat); + ps->schedstat = 0; + } //if (ps->smaps) // fclose(ps->smaps); continue; -- 2.3.1