#!/usr/bin/perl -w # my $progname = 'ip2cn-server'; my $version = '2008-10-06a'; # # ip2cn-server -- 2008-08-05, last edit # # memory resident IP to country name (IP geolocation) server that # communicates via inet domain sockets. # # Copyright (C) 2008 Grant Coady GPLv2 # #-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=- # This program is free software; you can redistribute it and/or # modify it under the terms of version 2 of the GNU General Public # License as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details: # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html # #-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=- # Credits # Sockets code by Michael Chapman with # minor modifications to suit this application, detailed below. # use strict; use Getopt::Std; use IO::Select; use IO::Socket::INET; use POSIX; my %option = (); getopts("a:c:dhjp:qrtv", \%option); my $inetaddr = $option{a} ||= 'localhost'; my $dot_conf = $option{c} ||= '/etc/ip2cn-server.conf'; my $daemon = $option{d} ||= 0; my $helpview = $option{h} ||= 0; my $junkview = $option{j} ||= 0; my $inetport = $option{p} ||= '4743'; my $logquery = $option{q} ||= 0; my $logreply = $option{r} ||= 0; my $timeload = $option{t} ||= 0; my $verbose = $option{v} ||= 0; if ($helpview) { print <<"EOF"; $progname -- $version IP to country name (IP geolocation) server daemon Copyright (C) 2008 Grant Coady GPLv2 Usage: ip2cn-server [/path/to/conf-file] [-a LocalAddr] [-p LocalPort] \ [-d] [-h] [-j] [-t] [-v] Example: ip2cn-server -d -j # daemon server, junkview mode, # default /etc/junkview.conf Information: Documentation: perldoc $progname Home site: http://bugsplatter.id.au/ip2cn/ EOF exit } ++$verbose if $timeload or not $daemon; #### database my (@ip2c_lo, @ip2c_hi, @ip2c_cc); my $ip2c_cn = 0; my %cc_name = (); my $seqnum = 0; #### program control my $logfile; my $pidfile; if (POSIX::getuid) { $pidfile = '/tmp/ip2cn-server.pid'; # user $logfile = '/tmp/ip2cn-server.log'; } else { $pidfile = '/var/run/ip2cn-server.pid'; # root $logfile = '/var/log/ip2cn-server.log'; } sub start_logger(); sub do_log($); sub do_warn($); sub do_die($); sub load_database(); sub run_inet_listener(); if (-r $pidfile and $verbose) { print < $pidfile" or do_die("open $pidfile: $!\n"); print FILE "$$\n"; close FILE; #### signal handling sub do_exit { unlink $pidfile; exit; } $SIG{TERM} = \&do_exit; sub exit_handler { do_die("signal: exit"); } my $signal_graceful_exit = 0; my $shutdown = 0; sub graceful_shutdown { do_log("signal: shutdown graceful"); $signal_graceful_exit = $shutdown = 1; } my $data_reload = 0; sub data_reload_handler { do_log("signal: data reload"); $signal_graceful_exit = $data_reload = 1; } $SIG{USR1} = $SIG{USR2} = \&data_reload_handler; $SIG{PIPE} = 'IGNORE'; # handled by run_inet_listener() sub daemon_mode { do_log("daemonise"); defined(my $pid = fork) or do_die("fork: $!"); # parent exit if $pid; # child chdir "/" or do_die("chdir '/': $!"); $verbose = 0; open STDIN, '/dev/null'; open STDOUT, '> /dev/null'; open STDERR, '> /dev/null'; POSIX::setsid() or do_die("setsid: $!"); open FILE, "> $pidfile" or do_die("open $pidfile: $!\n"); print FILE "$$\n"; close FILE; } daemon_mode if $daemon; # request database load after server starts listening socket $signal_graceful_exit = $data_reload = 1; #### main program loop do { $SIG{INT} = $SIG{HUP} = \&graceful_shutdown; run_inet_listener(); $signal_graceful_exit = 0; $SIG{INT} = $SIG{HUP} = \&exit_handler; if ($data_reload) { load_database(); do_die("time data load") if $timeload; $data_reload = 0; } } until $shutdown; do_die("shutdown"); #### logger my $warn_count = 0; my $warn_limit = 24; sub datestamp { return POSIX::strftime("%F.%T", localtime(time)); } sub start_logger() { open LOGFILE, ">> $logfile" or die "fail open $logfile $!\n"; # acquire non-block exclusive write lock flock LOGFILE, 6 or die "$logfile lock: server is running\n"; autoflush LOGFILE 1; do_log("logfile: $logfile"); } sub do_log($) { my $mesg = shift; $mesg = sprintf("%s\t -\t%s\n", datestamp, $mesg); print LOGFILE $mesg; print $mesg if $verbose; $warn_count = 0; } sub do_warn($) { my $mesg = shift; $mesg = sprintf("%s\twarn\t%s\n", datestamp, $mesg); print LOGFILE $mesg; print $mesg if $verbose; if (++$warn_count > $warn_limit) { do_die("excessive warnings") } } sub do_die($) { my $mesg = shift; $mesg = sprintf("%s\tdie\t%s\n", datestamp, $mesg); print LOGFILE $mesg; close LOGFILE; print $mesg if $verbose; unlink $pidfile; exit; } #### query handler sub do_query($) { my $q = shift; my ($a, $cc, $cn, @d, $da, $hi, $lo, $m, $na, $nz, $r, $rf, %rt); ($q, $rf) = split /\s+/, $q, 2; $da = 0; if ($q !~ /^[0-9\.]+$/) { ++$da; } elsif ($q =~ /\./) { @d = split /\./, $q, 4; $a = 0; for ($m = 0; $m < 4; $m++) { if ($d[$m] > 255) { ++$da } $a *= 256; $a += $d[$m]; } } else { ++$da if ($q > 0xffffffff); $a = $q; } if ($da) { $da = $q = $na = $lo = $hi = $cc = '?'; $cn = 'bad query'; } else { $lo = 1; $hi = $ip2c_cn; $da = $a; while ($hi - $lo > 1) { $m = ($lo + $hi) >> 1; if ($ip2c_lo[$m] < $a) { $lo = $m } else { $hi = $m } } if ($ip2c_lo[$hi] > $a) { --$hi } if ($ip2c_hi[$hi] < $a) { $na = $lo = $hi = '-'; $cc = '--'; $cn = 'unassigned'; } else { $cc = $ip2c_cc[$hi]; $cn = $cc_name{$cc}; if ($junkview) { $lo = $ip2c_lo[$hi]; $hi = $ip2c_hi[$hi]; $na = 4; $m = 0x0fffffff; while (($a & ~$m) < $lo) { ++$na; $m >>= 1 } while (($a | $m) > $hi) { ++$na; $m >>= 1 } $a = $a & ~$m; for ($m = 4; $m > 0; $m--) { $d[$m] = $a & 255; $a >>= 8; } $nz = sprintf("%03d.%03d.%03d.%03d/%02d", $d[1], $d[2], $d[3], $d[4], $na); $na = sprintf("%d.%d.%d.%d/%d", $d[1], $d[2], $d[3], $d[4], $na); } # regenerate query terms for clean format conversion $a = $da; for ($m = 4; $m > 0; $m--) { $d[$m] = $a & 255; $a >>= 8; } $q = sprintf("%d.%d.%d.%d", $d[1], $d[2], $d[3], $d[4]); } } #### reply formatter %rt = ( 't' => "\t", # tab 'a' => $na, # netaddr, CIDR * 'b' => sprintf("%-18s", $na), # ditto, fixed width * 'c' => $cc, # country code 'n' => $cn, # country name 'd' => $da, # query addr, decimal 'f' => sprintf("%10u ", $da), # ditto, fixed width 's' => sprintf("%010u", $lo), # start addr, decimal * 'e' => sprintf("%010u", $hi), # end addr, decimal * 'q' => $q, # query dotquad addr 'p' => sprintf("%-15s", $q), # ditto, fixed width 'z' => $nz # netaddr, CIDR, zero fill * ); # * these values have no meaning when server not in junkview mode $rt{'a'} = $rt{'b'} = $rt{'s'} = $rt{'e'} = $rt{'z'} = '-' unless $junkview; $r = ''; $m = 0; $rf ||= 'p c:n'; $rf = lc $rf; $rf =~ s![0-9g-moru-y`~@#$%^&*/?]+!!g; if ($rf eq '-') { $rf = 'c:n' } # sf4sf elsif ($rf eq '+') { $rf = 'a:c :n:s:e' } # junkview elsif ($cc eq '?') { $rf = '(n)' } # bad query my @t = split //, $rf; while (my $t = shift @t) { if (defined $rt{$t}) { $r .= $rt{$t}; $rt{$t} = '' unless $rt{$t} eq 't'; ++$m } else { $r .= $t } } $r .= "\n"; $r = "$rt{'p'}$cc:$cn\n" unless $m; printf "%9d %s", ++$seqnum, $r if $verbose; do_log("reply: $rt{'p'}$cc:$cn") if $logreply; return $r; } #### database loader sub load_database() { my ($datapath, $ndxdata0, $ndxdata1, $namedata); my ($indexfile, $namesfile); do_log("read: $dot_conf"); open FILE, "< $dot_conf" or do_die("$dot_conf $!"); flock FILE, 1; # shared reading while () { next if /^$/; next if /^#/; next if /^junkview/; chomp; my ($key, $value, $comment) = split /\s+/, $_, 3; if ($key eq "datapath") { $datapath = $value } elsif ($key eq "ndxdata0") { $ndxdata0 = $value } elsif ($key eq "ndxdata1") { $ndxdata1 = $value } elsif ($key eq "namedata") { $namedata = $value } # may set these if not already set on command line elsif ($key eq "inetaddr") { $inetaddr = $value if $inetaddr eq 'localhost' } elsif ($key eq "inetport") { $inetport = $value if $inetport eq '4743' } elsif ($key eq "logquery") { $logquery = $value unless $logquery } elsif ($key eq "logreply") { $logreply = $value unless $logreply } elsif ($key eq "datamode") { $junkview = $value unless $junkview } } close FILE; $indexfile = "$datapath/$ndxdata0"; $indexfile = "$datapath/$ndxdata1" if $junkview; $namesfile = "$datapath/$namedata"; do_log("read: $indexfile"); open FILE, "< $indexfile" or do_die("$indexfile $!"); flock FILE, 1; $ip2c_cn = 0; while () { next if /^$/; next if /^#/; next if /^junkview/; chomp; ( $ip2c_lo[++$ip2c_cn], $ip2c_hi[$ip2c_cn], $ip2c_cc[$ip2c_cn] ) = split /\s+/, $_; } close FILE; do_log("read: $namesfile"); open FILE, "< $namesfile" or do_die("$namesfile $!"); flock FILE, 1; %cc_name = (); while () { next if /^$/; next if /^#/; next if /^junkview/; chomp; my ($cc, $name) = split /:/, $_; $cc_name{$cc} = $name; } close FILE; } #-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=- # inet domain sockets interface # `````````````````````````````` # Sockets code by Michael Chapman thanks! # # Changelog -- things Grant altered # `````````````````````````````````` # 2008-08-05 # modify to use inet domain instead of unix domain # remove @ARGV handling # insert 'do_query' call in data read handling # 2008-08-06 # bumped 'Listen' from '1' to '5' (tcp default) # ignore empty queries, they make the server do odd things # change dir and warn to function calls do_die() do_warn() for logging # add $shutdown handling, convert to subroutine # 2008-08-07 # add $signal_graceful_exit graceful exit handling # 2008-08-16 # add query logging option # 2008-10-06 # add ReuseAddr => 1 to stop "fail create listen socket: Address already # in use" error on restart # use constant FH => 0; use constant RBUF => 1; use constant WBUF => 2; my $readers; my $writers; my $listener; my $listener_started = 0; sub do_read { my $s = shift; my $read = $s->[FH]->sysread(my $data, 4096); defined $read or do_warn("no read socket: $!"); $read or return; $s->[RBUF] .= $data; while ($s->[RBUF] =~ s/^(.*)\n//) { # read by line next unless $1; do_log("query: $1") if $logquery; $s->[WBUF] .= do_query($1); } 1; } sub do_write { my $s = shift; my $written = $s->[FH]->syswrite($s->[WBUF]); defined $written or $! == EPIPE or do_warn("no write socket: $!"); $written or return; substr $s->[WBUF], 0, $written, ''; 1; } #### server sockets interface sub run_inet_listener() { unless ($listener_started) { $listener = new IO::Socket::INET( LocalAddr => $inetaddr, LocalPort => $inetport, Proto => 'tcp', Type => SOCK_STREAM, Listen => 5, ReuseAddr => 1, ) or do_die("fail create listen socket: $!"); $readers = new IO::Select; $writers = new IO::Select; $readers->add($listener); ++$listener_started; return if $signal_graceful_exit; # startup dataload } do_log("listen: $inetaddr:$inetport"); # block, waiting for sockets ready for I/O or signalled exit while (my ($r, $w) = IO::Select::select($readers, $writers) or $signal_graceful_exit) { # handle new connections or ready-for-reading sockets foreach my $s (@$r) { # no new connections or input if doing graceful exit last if $signal_graceful_exit; if ($s == $listener) { # new connections my $new = $listener->accept or do_die("client not accepted: $!"); $readers->add([$new, '', '']); # FH,RBUF,WBUF } else { # ready-for-reading if (do_read $s) { $writers->add($s) if length $s->[WBUF]; } else { $readers->remove($s); $writers->remove($s); } } } # handle ready-for-writing sockets or graceful exit my $active_writers = 0; foreach my $s (@$w) { ++$active_writers; if (do_write $s) { $writers->remove($s) unless length $s->[WBUF]; } else { $readers->remove($s); $writers->remove($s); } } return if $signal_graceful_exit and not $active_writers; } do_warn("odd, fell out of server loop"); return; } do_die("very odd, got past server loop"); #-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=- # Documentation # `````````````` =head1 NAME ip2cn-server - IP to country name (IP geolocation) server =head1 SYNOPSIS ip2cn-server [-c /path/to/conf] [-a LocalAddr] [-p LocalPort] \ [-d] [-h] [-j] [-t] [-v] =head1 DESCRIPTION Memory resident IP to country name (IP geolocation) server. Copyright (C) 2008 Grant Coady GPLv2 This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: http://www.gnu.org/licenses/old-licenses/gpl-2.0.html =head2 Why? The size of database files required for IP geolocation impose a significant load time penalty for applications using that data. To speed up these applications this server keeps one copy of the database files in memory and responds to client requests for IP to country name lookups quickly. =head2 How it works On startup the server loads the database into memory and starts listening for socket connections. Clients make a connection to the server and send a query, the server responds with a reply. The connection may be for one or many queries. Casual clients do a connect-query-disconnect while a tail logging client might keep a connection open for the life of the process. An optional reply format field may be appended to the queries (see below). The server communicates with the client applications via inet domain sockets and can have simultaneous open connections to several applications. There's no need for exclusive access locking as was the case with the previous ip2c-server. The server also runs a logfile and catches signals for database reload and graceful shutdown. Clients disconnect cleanly when the server is shutdown. (Clients written in gawk terminate if the server disconnects). =head2 Advantages The main advantage of using this server is that ad-hoc queries no longer have to wait for the database to load into memory, this suits shell and CGI applications that need to perform single lookups on demand. Applications may share the IP to country database in an efficient manner. =head2 Geolocation accuracy The database used with this server is derived from the top level Internet registries only, so IP blocks are resolved to the country where the registry is located. In Europe there is some cross-border uncertainty as well as use of the EU location. This server is only as accurate as the data collected from the public registries: apnic, afrinic, arin, lacnic, ripe, iana, and iso for the country codes and names. =head2 Old ip2c-server clients Existing users of the old server, junkview and sf4sf, have been converted to use this server. They also fall back to loading the database files direct of the server is not running. New casual client scripts are detailed below, 'ccfind' is rewritten to suit the server. =head1 OPTIONS -c Configuration file Default: '/etc/junkview.conf' Location of the configuration file server reads to discover from which directory to read the database files. -a LocalAddr Default: 'localhost' Address of server machine, if you use the machine's name rather than 'localhost', the server may receive queries from the network. -p LocalPort Default: '4743' Define which port the server listens on for new connections. -d Daemon mode Default: off Set this to have the server switch to daemon mode, specify this when starting the server from your initscripts. -h Help Default: off Display help text, a reminder use perldoc ip2cn-server. -j Junkview mode Default: off When set, server uses database file 'ip2c-data' for junkview operation, otherwise the server uses the smaller, lower resolution 'ip2c-index' database file. The default database, ip2c-index, has adjacent IP blocks for the same country merged for faster load time and operation. -t Time database loading Default: off. When set, the server exits after loading the database files. -v Verbose Default: it depends When set the server reports operation. It is switched off when server switches to daemon mode, it is switched on for non-daemon mode. Use -v to watch server load files before enter daemon mode. =head1 NOTES =head2 Testing as user For testing: ip2cn-server -v; for server use: ip2cn-server -d [-j] [-v] the server will turn off -v option when switching to daemon mode. =head2 Activity logging The server appends to a logfile while running, either: /var/log/ip2cn-server.log (run as root), or /tmp/ip2cn-server.log (run as user) An exclusive write lock is held on the logfile. =head2 PID file The server writes a pidfile for process management, this file is written to: /var/log/ip2cn-server.pid (run as root), or /tmp/ip2cn-server.pid (run as user) =head2 Database tables A tarball for datafiles files for IP to country names lookup to suit this server is available from (usually updated daily): ftp://bugsplatter.id.au/junkview/ip2c-database.tar.lzma 172k ftp://bugsplatter.id.au/junkview/ip2c-database.tar.bz2 272k ftp://bugsplatter.id.au/junkview/ip2c-database.tar.gz 280k Note that the database tarball above is suitable only for IP to country lookup, it cannot be used to discover individual IP block allocation as adjacent blocks with the same country have been merged to reduce the database size from 93k records to 37k recods. If you want the full resolution database files, see the database update scripts at http://bugsplatter.id.au/junkview/ which grab and process the IP to country database files available from http://software77.net/ and supply the '-j' command line option to the ip2cn-server. A shared reading lock (flock 1) is held while reading database file to prevent them being written while open for read. Update scripts should also use flock when rewriting any database files. =head2 Database country names The database uses the two-letter ISO codes for countries, and, in the most part the ISO country names except where they've been trimmed for readability. The following non-iso names are added to the names lookup: AP Asia Pacific CS Serbia and Montenegro EU European Union ZZ IETF Reserved or Private ISO country codes not found in the registry IP block allocations are removed as they serve no purpose here. =head2 Database ER diagram +----------------+ | ip2c-index | +----------------+ |*Record number* | +--------------+ | IP block start | | ip2c-names | | IP block end | +--------------+ | Country code |---|*Country code*| +----------------+ | Country name | +--------------+ =head2 Database reload The server will reload database files on receipt of the SIGUSR1 signal, send it like this: # /etc/rc.d/rc.ip2cn-server reload which executes this command: # kill -SIGUSR1 $(cat /var/run/ip2cn-server.pid) =head2 Signal handling During startup or database load, the SIGINT or SIGHUP will trigger immediate shutdown. While the server is running the sockets interface, SIGINT or SIGHUP will trigger a graceful shutdown of the sockets interface, ignoring new connections and queries. This action suspends the server's clients while the server is busy reloading the database. This graceful shutdown also prevents the clients getting a broken connection for a shutdown request. They exit cleanly when the server exits as they are waiting for the reply from server. SIGINT, SIGHUP trigger graceful shutdown. SIGUSR1, SIGUSR2 trigger a database reload. SIGTERM triggers an ungraceful exit as the system is going down. See: http://bugsplatter.id.au/ip2cn/rc.ip2cn-server for an example of ip2cn-server graceful stop using signals. =head2 Query handling Query is numeric or dotquad IPv4 IP address, reply takes various forms depending on an optional query format field detailed below. For casual queries try: gawk client (3.1.5 or later): #!/usr/bin/gawk -f BEGIN { service = "/inet/tcp/0/localhost/4743" } { print $0 |& service service |& getline print } $ echo 64.233.167.99 |./client 64.233.167.99 US:United States shell wrapper : #!/bin/bash echo $1 | gawk ' BEGIN { service = "/inet/tcp/0/localhost/4743" } { print $0 |& service service |& getline print }' 2>/dev/null $ ./client1 64.233.167.99 64.233.167.99 US:United States =head2 Reply formatting At the moment there are two different users of this server, each expecting its own reply format, so they now append a single character format specifier. For casual use there's a default format that returns the query, country code and country name. The reply format may be specified by appending a format field to the query from this list. 't' tab character 'c' country code, two letters 'n' country name, text - length varies 'd' decimal (numeric) query address 'f' decimal query address formatted: "%10u " (fixed width) 'p' query dotquad address formatted "%-16s" (fixed width) 'q' query dotquad address Space or punctuation characters are inserted into the reply as field separators. Examples: the predefind reply formats are: 'pc:n' default '-' 'c:n' for sf4sf '+' 'a:c :n:s:e' for junkview When using ip2cn-server with the -j option, more information is available: 'a' network block address containing query IP 'b' network block address formatted: "%-19s" (fixed width) 's' numeric block start address 'e' numeric block end address 'z' network block address formatted with zero fill for sorting =head2 Examples $ echo 64.233.167.99 |./client # default 64.233.167.99 US:United States $ echo 64.233.167.99 -|./client # sf4sf US:United States $ echo 64.233.167.99 +|./client # junkview 64.233.160.0/19:US :United States:1089052672:1089060863 $ echo 64.233.167.99 'pw c -> n'|./client # freeform 64.233.167.99 64.233.160.0/19 US -> United States Note use of single quotes to stop the shell interpreting the format field. =head1 SEE ALSO =head2 Related projects junkview http://bugsplatter.id.au/junkview/ awk and bash scripts behind the junkshow page junkshow http://bugsplatter.id.au/junkshow/ display last 24 hours activity from the firewall sf4sf http://bugsplatter.id.au/firewall/ firewall log monitor, a pretty printer with geolocation option. cc2ip http://bugsplatter.id.au/cc2ip/ country code or name to IP blocks converter to make blacklist or whitelist firewall rules. =head1 AUTHOR Copyright (C) 2008 Grant Coady GPLv2 This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: http://www.gnu.org/licenses/old-licenses/gpl-2.0.html Home site: =head2 Credits Sockets code by Michael Chapman with minor modifications to suit this application: to handle empty queries, and to wrap into a subroutine with graceful shutdown option. Binary search algorithm from Tim Bray's site, more information here: http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary =cut