Yesterday, I was digging issue in our program product (just to make things clearer, product is about network automation). Problem was in maintenance Perl script which performs heavyweight calculation and is being run periodically. It seemed like script lived persistently without any wish to finish their work and die appropriately. The rest of the issue investigation's story is commercially valuable information and can't be disclosed. Hopefully, it's irrelevant to the main subject of the post.
This issue wasn't reproduced on the rest of available test environments. Hence there was no chance to re-run this issue under the Perl debugger. The things were given as is. No CPU consumption, no massive IO wait, no swapping hell, database worked fast. Script was just hanged. I decided to drill alive Perl interpreter's process with strace at least to define if there is deadlock/livelock/waiting for resource. Here, I'd like to say that original problem was investigated successfully. What really matters is what I found during my investigation. Here is edited output from strace (with my humble comments):
~# strace -p 8888
...// it's fine
open("/tmp/f", O_RDONLY|O_LARGEFILE) = 3
// what is it?
ioctl(3, SNDCTL_TMR_TIMEBASE or TCGETS, 0xbfa77468) = -1 ENOTTY (Inappropriate ioctl for device)
...
// it's quite predictable
close(3)
...
~#
What I was surprised by is how exactly Perl open function looks at the lowest level of userspace. What does ioctl( ... ) stand for here? Moreover, what does its flags mean in the context?
Just to compare behavior during opening file in another languages, I checked how Ruby works. Here is the results:
~# ruby -e '1000000.times { File.open( "/tmp/f", "r" ) { |f| f.close; } }' &
[1] 8069
~# strace -p 8069
...
// it's OK; O_LARGEFILE makes sense also
open("/tmp/f", O_RDONLY|O_LARGEFILE) = 5
// filling internal instance variables, I guess
fstat64(5, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
fstat64(5, {st_mode=S_IFREG|0664, st_size=0, ...}) = 0
// what is happening...?
ioctl(5, SNDCTL_TMR_TIMEBASE or TCGETS, 0xbf872138) = -1 ENOTTY (Inappropriate ioctl for device)
// OK; the rest is fine at least
close(5)
...
And I met it again. Since MRI Ruby source code is available on GitHub I decided to dig into File class implementation (Perl source could be chosen too but I'm used to neat UI features of GitHub). Here is what I found out with some inline comments:
Firstly, I tried to find instance
initialize( ... ) method:
io.c:
...
void
Init_IO(void)
{
//...
Init_File();
rb_define_method(rb_cFile, "initialize", rb_file_initialize, -1);
//...
}
...
Then:
io.c:
...
static VALUE
rb_file_initialize(int argc, VALUE *argv, VALUE io)
{
if (RFILE(io)->fptr) {
rb_raise(rb_eRuntimeError, "reinitializing File");
}
if (0 < argc && argc < 3) {
VALUE fd = rb_check_convert_type(argv[0], T_FIXNUM, "Fixnum", "to_int");
if (!NIL_P(fd)) {
argv[0] = fd;
return rb_io_initialize(argc, argv, io);
}
}
rb_open_file(argc, argv, io);
return io;
}
...
Then:
io.c:
...
static VALUE
rb_open_file(int argc, VALUE *argv, VALUE io)
{
VALUE fname;
int oflags, fmode;
convconfig_t convconfig;
mode_t perm;
rb_scan_open_args(argc, argv, &fname, &oflags, &fmode, &convconfig, &perm);
rb_file_open_generic(io, fname, oflags, fmode, &convconfig, perm);
return io;
}
...
Then:
io.c:
...
static VALUE
rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig_t *convconfig, mode_t perm)
{
rb_io_t *fptr;
convconfig_t cc;
if (!convconfig) {
/* Set to default encodings */
rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2);
cc.ecflags = 0;
cc.ecopts = Qnil;
convconfig = &cc;
}
validate_enc_binmode(&fmode, convconfig->ecflags,
convconfig->enc, convconfig->enc2);
MakeOpenFile(io, fptr);
fptr->mode = fmode;
fptr->encs = *convconfig;
fptr->pathv = rb_str_new_frozen(filename);
fptr->fd = rb_sysopen(fptr->pathv, oflags, perm);
io_check_tty(fptr); // Hmm...? Let's get back later...
if (fmode & FMODE_SETENC_BY_BOM) io_set_encoding_by_bom(io);
return io;
}
...
Then:
io.c:
...
static int
rb_sysopen(VALUE fname, int oflags, mode_t perm)
{
int fd;
struct sysopen_struct data;
data.fname = rb_str_encode_ospath(fname);
data.oflags = oflags;
data.perm = perm;
fd = rb_sysopen_internal(&data);
if (fd < 0) {
if (errno == EMFILE || errno == ENFILE) {
rb_gc();
fd = rb_sysopen_internal(&data);
}
if (fd < 0) {
rb_sys_fail_path(fname);
}
}
rb_update_max_fd(fd);
return fd;
}
...
Then:
io.c:
...
static inline int
rb_sysopen_internal(struct sysopen_struct *data)
{
int fd;
fd = (int)rb_thread_blocking_region(sysopen_func, data, RUBY_UBF_IO, 0);
if (0 <= fd)
rb_update_max_fd(fd);
return fd;
}
...
Huh. We need to go deeper... Then:
io.c:
...
static VALUE
sysopen_func(void *ptr)
{
const struct sysopen_struct *data = ptr;
const char *fname = RSTRING_PTR(data->fname);
return (VALUE)open(fname, data->oflags, data->perm);
}
...
That's it. Here is final point in this branch. If I was regular expression in depth-first search mode I would backtrack now. Since I'm human I returned to the point where I saw another interesting call:
io.c:
...
static VALUE
rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig_t *convconfig, mode_t perm)
{
rb_io_t *fptr;
convconfig_t cc;
if (!convconfig) {
/* Set to default encodings */
rb_io_ext_int_to_encs(NULL, NULL, &cc.enc, &cc.enc2);
cc.ecflags = 0;
cc.ecopts = Qnil;
convconfig = &cc;
}
validate_enc_binmode(&fmode, convconfig->ecflags,
convconfig->enc, convconfig->enc2);
MakeOpenFile(io, fptr);
fptr->mode = fmode;
fptr->encs = *convconfig;
fptr->pathv = rb_str_new_frozen(filename);
fptr->fd = rb_sysopen(fptr->pathv, oflags, perm);
io_check_tty(fptr); // Hmm...? Let's get back later...
if (fmode & FMODE_SETENC_BY_BOM) io_set_encoding_by_bom(io);
return io;
}
...
Then:
io.c:
...
static void
io_check_tty(rb_io_t *fptr)
{
if (isatty(fptr->fd))
fptr->mode |= FMODE_TTY|FMODE_DUPLEX;
}
...
Hmm. int isatty( int ) is POSIX function which tests whether passed descriptor is terminal or not. It looks like something at GLibc level... Hmm... Let's write brief test on pure C:
/tmp/test.c:
#include "~/include/stdio.h"
int main( void ) {
for( int i = 0; i < 10000000; ++i )
fclose( fopen( "/tmp/f", "r" ) );
}
Let's compile and run:
~$ gcc -std=c99 /tmp/test.c -o /tmp/test
~$ /tmp/test &
[1] 14939
~$ sudo strace -p 14939
...
// Nothing special...
open("/tmp/f", O_RDONLY) = 3
close(3) = 0
open("/tmp/f", O_RDONLY) = 3
close(3) = 0
...
OK. Then it's not about implementation of
open( ... ) POSIX call in Glibc. Let's turn our sight to another player:
/tmp/test.c:
#define _POSIX_SOURCE
#include "~/include/stdio.h"
#include "~/include/unistd.h"
int main( void ) {
FILE *f;
for( int i = 0; i < 10000000; ++i ) {
f = fopen( "/tmp/f", "r" );
isatty( fileno( f ) );
fclose( f );
}
}
Let's test our opaque guessing that there is something in isatty( ... ):
buff@badtop:~$ gcc -std=c99 /tmp/test.c -o /tmp/test
buff@badtop:~$ /tmp/test &
[1] 15046
buff@badtop:~$ sudo strace -p 15032
...
open("/tmp/f", O_RDONLY) = 3
// GOTCHA!
ioctl(3, SNDCTL_TMR_TIMEBASE or TCGETS, 0xbfac3788) = -1 ENOTTY (Inappropriate ioctl for device)
close(3) = 0
open("/tmp/f", O_RDONLY) = 3
ioctl(3, SNDCTL_TMR_TIMEBASE or TCGETS, 0xbfac3788) = -1 ENOTTY (Inappropriate ioctl for device)
close(3) = 0
...
That's it. It was isatty(...). And this is how it's exactly implemented in Linux. Not sure about other UNIXes. I found via Google a noticeable amount of discussions at forums related to this topic. Authors guessed that there were some problems on the system level after they had investigated behavior of their scripts via strace tool. Just to stop these discussions I'd like to say: there is no problem. It is just like isatty( ... ) POSIX API call implemented in Glibc. In mentioned case, file is regular file and doesn't support terminal ioctls. But really wondering what sound-related SNDCTL_TMR_TIMEBASE ioctl stands for. Probably, we need to go deeper into Ulrich Drepper castle.
PS: Don't be surprised by strange #include directives. Angled brackets don't work. Probably, I should have to learn HTML basics before publishing.