perl off_t_problem.pl [directory|files]...
# check files for being problematic,
or just all files in a give directory
# use --silent when running it non-interactivly.
#! /usr/bin/env perl
eval 'exec perl -S $0 ${1+"$@"}'
if 0;
use strict;
# options-hash: use as $o{optionname} to check for commandline options.
# my %o=( files => 1, needed => 1, libpath => 1, detected => 1, symbols => 1 );
my %o = ( symbols => 1 );
my $help = "perl off_t_problem.pl [directory|files]...\n"
." scans the given files (or all files in a directory) for its\n"
." dynamic dependencies. The binary and all its dependencies\n"
." are classified whether they have been compiled as largefile\n"
." or not - depending on the existance of symbols like plain\n"
." fopen()/lseek() or their 64bit cousins from the transitional\n"
." largefile-API named fopen64()/lseek64() instead. When two\n"
." executable objects have a mismatch then it gets reported!\n"
." debug options:\n"
." --files after parsing commandline, print the list of files\n"
." that will be checked for largefile mismatch\n"
." --needed after scanning dynamic imports of the given files\n"
." print the (long) list of dependencies recognized\n"
." which wil be scanned too for largefile mismatches\n"
." --libpath show the libpath that was used to resolve some of\n"
." of the dependencies if `ldd` was not available\n"
." --symbols print the number of dynamic symbols found in each\n"
." object while scanning them (default=ON).\n"
." --detected for each object that was scanned, print the\n"
." classification attribute -??- -32- -64- or 3264\n"
." (along with the dynamic symbols that made this\n"
." think it is of that largefile type)\n"
." --quiet suppress the list of classifications printed just\n"
." or usually before the list of largefile mismatches\n"
." --silent ...it does also silence some other hints usually\n"
." printed to the screen (--quit/--no-symbols/--smart)\n"
." --smart suppress largefile mismatch for a limited set of\n"
." known dependency libs from which only a known set\n"
." of algorithm functions is imported (i.e. 'zlib')\n"
." --nonclean for libraries that might be checked smart, show\n"
." the first symbol that was thought to be offending.\n"
." --noncleanall or actually print all the imported symbols from\n"
." mismatching libs that are not known to be good.\n";
# helper: move to column - the length of the input string is taken current
sub col36 # column and some spaces are printed to STDOUT
{
my $column = length $_[0];
return if 36 <= $column;
return " " x (36 - $column);
}
# ----------------------------------------------------------------------
my %X; my $file; # use as $X{$file}
# this is the implicit libpath, as if used by ld.so to resolve imports..
my @L = ( "/lib", "/usr/lib", "/usr/local/lib");
{ # fill the library path
my $F = "/etc/ld.so.conf";
if (open F, "<$F")
{
while (<F>) { chomp; push @L; }
close F;
}else{
print STDERR "could not open $F: $!\n";
}
}
{ # scan the argument list, options and files and dirs, fill %X file-hash ...
my $old = ""; # pushback of $arg
my $arg;
for $arg (@ARGV)
{
if ($old =~ /^-L/) { push @L, $arg; $old = ""; next; }
if ($arg =~ /^--?help/) { print $help; exit 0; }
if ($arg =~ /^--?(\w[\w-]*)=(.*)/) { $o{$1} = $2; next; }
if ($arg =~ /^--?no-([a-z].*)/) { $o{$1} = ""; next; }
if ($arg =~ /^--?([a-z].*)/) { $o{$1} = "*"; next; }
if ($arg =~ /^-L(.+)/) { push @L, $1; next; }
if ($arg =~ /^-L/) { $old = $arg; next; }
if ($arg =~ /^-[A-Z]/) { die "illegal option $arg"; }
$arg =~ s/\/$//; # chomp dirsep
# register the file in th %X hash - .dir says where from (debugging)
if (-f $arg)
{
next if -d $arg or ! -r $arg;
$X{$arg}{dir} = $arg;
$X{$arg}{dir} =~ s:/[^/]+$::;
next;
}
# when a directory was given, we scan all executables in it
if (not opendir (D, $arg)) #
{
print STDERR "could not open directory '$arg': $!\n";
next;
}
my $entry;
foreach $entry (readdir (D))
{
my $file = "$arg/$entry";
if (-l $file) { $file = readlink $file or next; # try to resolve..
$file = "$arg/$file" if $file !~ m:^/:; }
next if -d $file or ! -r $file;
my $type = `file $file 2>/dev/null`;
next if $type =~ /script/ or $type =~ /text/;
# the following call will skip symlinks to real files..
# next unless $type =~ /ELF/; # well, we'll see warnings later on..
$X{$file}{dir} = $arg;
}
closedir (DIR);
} # for @ARGV
}
if ($o{libpath}) { # debugging - print @L list if "--libpath" seen
for $file (@L) {
print STDERR "-L ",$file,"\n";
}
}
if ($o{files}) { # debugging - print %X files if "--files" seen
for $file (sort keys %X) {
print STDERR $file, " <> ", $X{$file}{dir}, "\n";
}
}
# some options imply other options...
$o{quiet} = 1 if $o{silent};
$o{smart} = 1 if $o{silent};
$o{symbols} = "" if $o{silent}; # yes, --symbols is ON by default
$o{nonclean} = "*" if $o{noncleanall};
# __________________ detect dynamic library imports _________________
# register library imports in $X{$file}{needed}{*}
for $file (sort keys %X)
{
print "." if not $o{quiet};
# `ldd` prints a nice list of import libs and how they resolve
my $header = "";
$header = `ldd $file 2>/dev/null` unless $o{noldd}; # "--noldd" option
$header =~ s{ ^\s+(\S+)\s+[=][>]\s+(\S+) }
{ $X{$file}{needed}{$1} = $2; "" }gmex;
next if exists $X{$file}{needed};
# when there was nothing seen by `ldd` then try again with objdump.
# however, "objdump -p" shows lib imports but not how they resolve...
$header = `objdump -p $file 2>/dev/null`;
$header =~ s{ ^\s+NEEDED\s+(\S+) }
{
$X{$file}{needed}{$1} = "" unless $1 eq "NEEDED"; ""
}gmex;
my $lib;
for $lib (keys %{$X{$file}{needed}})
{
next if length $X{$file}{needed}{$lib};
my $dir;
for $dir (@L) # walk -L libpath
{
if (-f "$dir/$lib")
{ $X{$file}{needed}{$lib} = "$dir/$lib"; last; }
}
}
} print "\n" if not $o{quiet};
if ($o{needed}) { # debugging - print imports if "--needed" was seen
for $file (sort keys %X) { my $lib;
for $lib (sort keys %{$X{$file}{needed}}) {
print STDERR $file, " - ";
print STDERR $lib, " => '",$X{$file}{needed}{$lib}, "'\n";
}
}
}
# _____________________ classify each object ___________________________
my %R; my $lib; # use as $R{$lib} - it's a cache storing classifications.
# compare with largefile specs at http://ftp.sas.com/standards/large.file
# differences detected by 64on32bits hints, about section 4 of the
# http://ftp.sas.com/standards/large.file/specs/api+.006.ps
my @base64 = ( "creat64", "open64", "ftw64", "nftw64", "fgetpos64",
"fopen64", "freopen64", "fseeko64", "fsetpos64",
"ftello64", "tmpfile64", "mmap64", "fstat64",
"lstat64", "stat64", "statvfs64", "fstatvfs64",
"lockf64", "lseek64", "ftruncate64", "truncate64",
"aio_read64", "aio_write64", "lio_listio64", "aio_erro64",
"aio_return64", "aio_cancel64", "aio_suspend64",
# these have been seen in the wild as well...
"mkstemp64", "tmpfile64", "readdir64",
"pread64", "pwrite64", "sendfile64" );
# this routine is run for all %X files and all their $X{$file}{needed}{*}
# dependencies - it stores the information into the %R cache for each one.
sub classifyRlib
{
my $lib = $_[0];
my $sym;
# read the dynamic symbol table (slow!) and register in $R{$lib}{sym}{*}
my $dynamicsymbols = `objdump -T $lib`;
$dynamicsymbols =~ s{ ^ (.*) \s+ ([\w_]\w+) \s*$ }
{ $R{$lib}{sym}{$2} = $1; "" }gmex;
if ($o{symbols} and exists $R{$lib}{sym}) {
print STDERR " .... ",$lib," ", col36($lib)," ";
print STDERR scalar %{$R{$lib}{sym}}, " \t(symbols)\n";
}
$R{$lib}{_64} = "";
$R{$lib}{_32} = "";
for $sym (@base64) # foreach known ..64 symbol from the largefile-API
{
$sym =~ s/64$//; next if exists $R{$lib}{sym}{$sym."32"};
$R{$lib}{_64} .= " ".$sym."64" if exists $R{$lib}{sym}{$sym."64"};
$R{$lib}{_32} .= " ".$sym.".." if exists $R{$lib}{sym}{$sym};
$R{$lib}{danger} .= " ".$sym."../".$sym."64"
if exists $R{$lib}{sym}{$sym} and exists $R{$lib}{sym}{$sym."64"};
}
return if length $R{$lib}{_32};
# secondly - if the library/binary is itself _64 and does also export
# functions in traditional dualmode-style (none/none64) then declare
# them _32 as well - effectivly classifying it as a 3264 dualmode object
for $sym (keys %{$R{$lib}{sym}})
{
next if $sym !~ /\w[\w_]+\w\w64$/; # foreach symbol like "\w+64"
next if $sym =~ /(_int|Int)64$/; # (with one exception)
$sym =~ s/64$//; # which has a cousin symbol
next if not exists $R{$lib}{sym}{$sym}; # without the "64" suffix.
next if $R{$lib}{sym}{$sym} =~ /[*]UND[*]/;
my $number=""; # sanity check: there is no other symbol with a
my $num; # number suffix, esp. no "${sym}32" or "${sym}65"
for $num (0..1024) # but we actually test every number up to 1024
{
next if $num eq "64";
next if not exists $R{$lib}{sym}{$sym.$num};
$number=$num; last;
}
next if length $number and exists $R{$lib}{sym}{$sym.$number};
# okay, this $lib looks like exporting 3264 dualmode symbols..
$R{$lib}{_32} = " " x length($R{$lib}{_64}) if ! length $R{$lib}{_32};
$R{$lib}{_64} .= " ".$sym."64" if exists $R{$lib}{sym}{$sym."64"};
$R{$lib}{_32} .= " ".$sym.".." if exists $R{$lib}{sym}{$sym};
}
}
# the function above was defined as "sub", now let's walk all the binaries
# and imported libraries, and classify whether they are _32 or _64 (or both)
for $file (keys %X)
{
classifyRlib ($file);
my $importlib;
foreach $importlib (keys %{$X{$file}{needed}})
{
$lib = $X{$file}{needed}{$importlib};
next if exists $R{$lib}; # already classified
classifyRlib ($lib);
}
} print STDERR "\n" if $o{symbols}; # (done with scanning/reading object files)
# helper: print the classifyRlib result of a given Rlib to STDOUT
sub printRlib
{
my $lib = $_[0];
if (length $R{$lib}{_32})
{
if (length $R{$lib}{_64})
{
print " 3264 ",$lib," ",col36($lib),$R{$lib}{_64},"\n";
print " 3264 ",$lib," ",col36($lib),$R{$lib}{_32},"\n";
}else{
print " -32- ",$lib," ",col36($lib),$R{$lib}{_32},"\n";
}
}
elsif (length $R{$lib}{_64})
{
{
print " -64- ",$lib," ",col36($lib),$R{$lib}{_64},"\n";
}
}else{
{
print " -??-",$lib,"\n";
}
}
}
sub Rtyp # helper - subset of above, only 4char classfy-code is returned
{
my $lib = $_[0];
if (length $R{$lib}{_32})
{
return "3264" if length $R{$lib}{_64};
return "-32-";
}
elsif (length $R{$lib}{_64})
{
return "-64-";
}else{
return "-??-";
}
}
if ($o{detected}) { # debugging - print classifyRlib results to
for $lib (sort keys %R) { # STDOUT if "--detected" was seen
next if $lib =~ m:.*/libc[.]so[.]\d+$:;
printRlib ($lib);
}
}
# _______________________ smart helper function _____________________
# some dependencies should not provoke a mismatch even that the
# libraries themselves do mismatch in their largefile mode - that is
# the case when only algorithm functions are imported that would not
# trigger access to any filedescriptor - `zlib` is a good example.
#
# implementation: for a known set of dependent libraries, we can check
# which symbols have been imported from it. We know about those imports
# of algorithms that are acceptable. If only these were seen, then the
# import dependency turns out to be notoffending, i.e. it is "(clean)".
my %goodimports = ( libz => [ "deflate\\w*", "inflate\\w*",
"compress\\w*", "uncompress\\w*",
"\\w+32", "zError", "zlibVersion"],
# only file-reference: poptReadConfigFile(...,name)
libpopt => [ "popt[A-Z](?:\\w(?!File))*" ],
libutil => [ "(open|fork)pty", "log(in|out|wtmp|in_tty)" ],
".." => [ "<>" ]);
sub notoffending
{
my $bin = $_[0];
my $lib = $_[1];
return 0 if not length $R{$bin}{_64};
return 0 if not length $R{$lib}{_32};
my $library = ""; my $known;
foreach $known (keys %goodimports)
{
next if "/$lib" !~ m:/${known}[.]so\b[^/]*$:;
$library = $known; last;
}
# return 0 if not length $library and not $o{nonclean};
$library = ".." if not length $library;
my $sym; my $offending = "";
foreach $sym (keys %{$R{$lib}{sym}})
{
next if $R{$lib}{sym}{$sym} =~ /[*]UND[*]/; # $lib imports(!!) it.
next if $sym =~ /^_\w+_*/; # compiler symbols / hidden symbols
next if $sym =~ /^\d/; # hmmm, does exist sometimes
next if $sym =~ /^[A-Z_]+[.]\w+/; # a dot in the middle, "GLIBC_2.1"
next if $sym =~ /^\s*$/; # empty, some extra info line
next if not exists $R{$bin}{sym}{$sym};
# the symbol is exported(!!) by $lib and it exists in $bin....
foreach $known (@{$goodimports{$library}})
{
if ($sym =~ /^${known}$/) # it's a known symbol
{ $sym = ""; last; } # clean it - it's not offending.
}
if (length $sym)
{ # we have an offending symbol.
$offending .= '"'.$sym.'" ';
last unless $o{noncleanall};
}
}
return 1 if not length $offending; # imports only known good symbols.
print "$bin ",col36($bin),"(64->>-32).." if $o{nonclean};
print $library,".. " if $o{nonclean};
print "(not clean?)\n" if $o{noncleanall};
print $offending, "\n" if $o{nonclean};
return 0; # found symbols not in the goodlist, return FALSE.
}
# ___________________ show largefile-mode mismatches __________________
# we walk the %X{file}s twice - we check out all the largefile mismatches
# and register them in the %offending hash. When done, then we print the
# Rlib classification of these, so that the reader can have an eyeball
# check if that is actually done right. Finally, go over the list for
# real and print the largefile mismatches - as an extension some of the
# largefile-mismatches are marked "(clean)" when the `notoffending`-helper
# functions knows that the $bin file does not import any symbol from its
# dependency $lib that could trigger some file access. So, even that there
# is a mismatch, it does not matter for there will be no non-largefile-mode
# access to the filesystem effectivly. using "--smart" or "--silent" will
# suppress these lines completely from output to the user screen.
my %offending;
my $T = "";
for $file (keys %X) # register the largefile mismatches
{ my $importlib;
for $importlib (keys %{$X{$file}{needed}})
{
$lib = $X{$file}{needed}{$importlib};
next if not length $R{$file}{_32} and not length $R{$file}{_64};
next if not length $R{$lib}{_32} and not length $R{$lib}{_32};
next if length $R{$file}{_64} and length $R{$lib}{_64};
next if length $R{$file}{_32} and length $R{$lib}{_32}
and not length $R{$file}{_64};
# okay: -64->>-64- 3264>>-64- 3264>>3264 and -32->>-32- -32->>3264
# else: mismatch: 3264>>-32- -64->>-32- and -32->>-64-
next if $o{smart} and notoffending ($file, $lib);
# $importlib = ""; $importlib=" (clean)" if notoffending ($file,$lib);
# print $file," ",col36($file),Rtyp($file),">>",Rtyp($lib)," ",$lib;
# print $importlib,"\n";
$offending{$lib} = ""; # register both, so that we'll see the
$offending{$file} = ""; # Rlib classification of both of them.
}
$offending{$file} = "" if exists $R{$file}{danger};
}
unless ($o{quiet} or $o{q}) # and here we print the Rlib classification
{ # unless however "--quiet" or "--silent" seen.
my $mismatch="";
for $lib (sort keys %offending)
{ $mismatch="1"; printRlib ($lib); }
if (not length $mismatch)
{ print "no largefile mismatch found :-)\n" unless $o{silent};
exit 0; # note: the last line of this script reads "exit 1" :-)
}
}
unless ($o{quiet} or $o{q}) # here we show all the miscompiled libraries
{
for $lib (sort keys %offending)
{
next if not exists $R{$lib}{danger};
print $lib,col36($lib)," DANGER",$R{$lib}{danger},"\n";
}
}
for $file (sort keys %X) # now show the largefile mismatches
{ my $importlib;
for $importlib (sort keys %{$X{$file}{needed}})
{
$lib = $X{$file}{needed}{$importlib};
next if not length $R{$file}{_32} and not length $R{$file}{_64};
next if not length $R{$lib}{_32} and not length $R{$lib}{_32};
next if length $R{$file}{_64} and length $R{$lib}{_64};
next if length $R{$file}{_32} and length $R{$lib}{_32}
and not length $R{$file}{_64};
# okay: -64->>-64- 3264>>-64- 3264>>3264 and -32->>-32- -32->>3264
# else: mismatch: 3264>>-32- -64->>-32- and -32->>-64-
next if $o{smart} and notoffending ($file, $lib);
$importlib = ""; $importlib=" (clean)" if notoffending ($file,$lib);
print $file," ",col36($file),Rtyp($file),">>",Rtyp($lib)," ",$lib;
print $importlib,"\n";
# $offending{$lib} = "";
# $offending{$file} = "";
}
}
exit 1; # there were some offending imports, or so it seems....