#!/bin/perl #====================================================================== # # HttpSniffer $Revision: 2.14.1 $ # # Copyright (c) 1998-2001 Tim Meadowcroft . # All rights reserved. # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # #====================================================================== # $Id: HttpSniffer.pl 2.14 2001/03/18 21:22:54 Tim Exp Tim $ # Recent Changes: # $Log: HttpSniffer.pl $ # Revision 2.14 2001/03/18 21:22:54 Tim # Added -b/-body arsg to show HTTP body too (thanks Lars) # Added require 5.005 and removed 5.004 compatibility # Added the long considered performance tweak (aiding data throughput) # Tidied up some random bits of code # # Revision 2.13 2001/03/16 00:02:47 Tim # Added options such as skiprtype and skiprcode to let you filter results. # Also tidied up options handling generally. # # Revision 2.12 2000/04/14 16:55:09 MeadowT # Fixed use of Time::HiRes (wasn't importing into other packages) and # brought POD up to date with newer command line args. # # Revision 2.11 2000/02/17 09:28:49 MeadowT # Ooops, 2.10 had a missing ")" ! # # Revision 2.10 2000/02/16 19:49:21 MeadowT # Look for "Connection: close" header from HTTP 1.1 servers (eg Netscape 3.6) # Fix a bug in code that's never used ;^) in closing sockets on error # # Revision 2.9 2000/02/03 19:04:01 MeadowT # Rejigged internals to time various key times (at user request) # Combined with options for simplified logging, it means this can now # be used to get useful timing figures for web sites (see -t and -d args) # # Revision 2.8 2000/01/27 15:37:56 MeadowT # Cleaned up some of the progress messages. # Responses now show the first line of the appropriate Request. # # Revision 2.7 2000/01/25 18:22:57 MeadowT # Added -v arg to make verbose extra comments optional # Added a require 5.004 clause to ensure up-to-date Perl (sorry) # Tidied up creation of listener to produce better diagnostics/error message # Add optional "Proto" arg to making sockets to avoid bug on some platforms # Renamed a data member to avoid ambiguity clash # # Revision 2.6 1999/04/27 13:10:17 MeadowT # Added RCS keywords and brough RCS and release versions in step # # 2.5 - Accept buggy headers with LF terminated lines (should be CRLF) #====================================================================== %::opts = ( p => 8080, r => "localhost:80", ignore => [], # list of headers to ignore.... skiptype => [], # skip reporting these file types skiprcode => [], # skip reporting if the response code is one of these l => '', # logfile h => 0, v => 0, # verbose flag - 0 is non-verbose t => 0, # show timing details d => 999, # display the headers b => [ 'post' ], # when to display the HTTP body ); my $Usage =<] [B<-p> port] [B<-r> realhost[:realport]] [B<-l> logfile] =head1 DESCRIPTION Not so quick and dirty program to listen in to HTTP conversations. This acts as an HTTP tunnel, forwarding all HTTP requests it receives to the specified realhost, logging the header details of the conversation in each direction. This program comes from my attempts to use cookies from ASP scripts, and my frustration at not being able to see what headers were really being sent in each direction. It's now been extended to be a more general program and help produce timing figures. =head1 USAGE perl -w HttpSniffer.pl [options] Typical uses =over 4 =item Trace Conversations between a Browser and a Server on one machine Start the program with no arguments, point your browser at http://localhost to speak to the server directly, or at http://localhost:8080 to debug via the program. =item Trace Conversations between a Browser and a Remote Server Start the program with the > argument to specify the real server. Point your browser at http://ErealserverE to speak to real server directly, or at http://localhost:8080 to debug via the program. =item Trace Conversations between a Browser and All Servers using a Proxy If you have a web proxy available (a firewall, or maybe an ISP web-cache) then you can run the sniffer to forward all queries to that proxy, and re-configure the browser to use the sniffer as a proxy. Then use your browser as normal, and the sniffer will trace all HTTP conversations that are directed through the proxy. =item Profile the responses of a server to various requests Use the -t argument to report timing figures for respones. Use "-d 0" or "-d 1" to reduce the output of headers. Use "-ignore" to tailor the reporting of headers (selected header fields are not reported). Optionally use "-together" to display requests and replies together rather than interleaved in strictly "time received" order. Use the "-skiptype" and "-skiprcode" arguments to avoid reporting items such as the retrieval of images, or authentication handshakes (although these can be well worth watching). =back =head1 ARGUMENTS HttpSniffer accepts command line arguments using Getopt::Long, so args can be specified using "-arg" or "--arg" format, and names can be abbreviated. The following arguments are defined. =over 4 =item -verbose Verbose mode - with this flag some extra I messages are logged. Without it, just the header details are logged. =item -port portnumber The port number to listen to on this machine (ie the fake HTTP server port). You can't use a port that is already in use on this machine. Default is "8080". =item -remote host[:port] The machine and optional port number that the program tunnels to. All HTTP requests will be forwarded to this machine. Default is "localhost:80" (ie the default webserver on this machine). =item -log logfile Details of headers are written to this file or STDOUT if not specified. =item -timing Displays timing stats togther with each response. We show the total time a request took (from issuing the request until the entire reply was received) and also break this down into Latency (the gap from issuing the request until the first bytes of reply were received) and Processing (the gap from receiving the first bytes of reply until the whole reply was read). =item -display num Control what's displayed from each header. 0 - don't display the HTTP header, just the summary info 1 - just display the first line of each HTTP header =item -together Display requests with replies (instead of as they are made). This makes the output easier to read, but doesn't show the inetrleaving of requests and replies as accurately as the normal view. =item -ignore istr When reporting HTTP headers, we don't bother reporting header fields where the name includes istr. This is often used to drop "noise headers" such as ETag which can clutter up the output. =item -skiprcode rcode Don't report replies where the HTTP Response code is rcode. This is used to avoid filling up the output with authentication handshakes (401 responses), but equally you could use it to ignore "normal" responses (rcode 200) and just watch for the exceptional cases. See the RFC's for more details if you don't know what HTTP Response codes are. This option is more useful (and less confusing) if used with -together. =item -skiptype rtype Don't report replies where the URI requested is of type rtype (eg in Ehttp://server/path/file.XYZ?arg1=123&arg2=456E then the rtype is "XYZ"). This can be used to avoid reporting, say, requests for images and other "scaffolding" rather than "content" resources. This option is more useful (and less confusing) if used with -together. =back =head1 AUTHOR Tim Meadowcroft - Etim@schmerg.comE. Visit http://www.schmerg.com for the latest version. Helpful suggestions, extra work, handy features and bug reports by a collection of friendly locals - not named here as they probably dislike junk mail, but anyone who'd like an explicit credit can contact me to get their names up in POD. =head1 SEE ALSO See RFC 2068 for details of HTTP 1.1 (especially sections 4.1 to 4.5). http://www.faqs.org/rfcs/rfc2068.html is a good spot to view it. =head1 COPYRIGHT Copyright (c) 1998-2001 Tim Meadowcroft . All rights reserved. This program is distributed under the Artistic License. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut # **** NOTEs **** # - I presume the IO:Socket automatically makes an autoflush socket, # but some older versions didn't. If you have problems, check for a # newer version of IO:Socket. # # - Note on sockets - when a socket has been closed by the other end, # select->can_read indicates there's data on the socket, but a read # from the socket returns 0 bytes - this is how we recognise a socket needs # closing. This program uses blocking sockets, but avoids blocking. # # - I buffer up reading an entire HTTP header and body - this sniffer is # written for debugging convenience not speed, later versions may change. # Similarly the code is verbose and explicit not speedy and minimal. # # - I read the Content data out of headers, but almost no more except # for matching replies to HEAD requests (which must have no content) # # - This is NOT a proxy, it doesn't read the request and forward it to # the server specified in the request field. It's a tunnel, it forwards # all its requests to the same server (same machine, different port). # Turning it into a proxy wouldn't be hard, but I didn't want a proxy... # Note that RFC2068 says proxies must attach VIA headers and do other stuff # that tunnels don't have to. # #====================================================================== use strict; use IO; use IO::Socket; use IO::Select; use Getopt::Long; # I'm using Perl 5.005 features, if you are running an earlier Perl you # can probably get this to run by rewriting to avoid following 5.005 'isms # EXPR foreach EXPR; (rewrite as "foreach (EXPR) { EXPR }") # /[^[:isprint]]/; (rewite to avoid use of regex POSIX char classes) # substr() with 4 args (rewrite to old 3-param version of substr()) # I think that's the lot, sorry, but since I use a newer Perl I find it # hard to guarantee back-compatibility... require 5.005; BEGIN { # If we can use Time::HiRes time() will be a float to 6 decimal places. # Otherwise time() returns an integral number of seconds. $::UseTimeHiRes = 0; if (eval "require Time::HiRes") { Time::HiRes->import("time"); $::UseTimeHiRes = 1; } } $::HttpEol = "\015\012"; # HTTP end of line marker (see RFC 2068) $::Timeout = 10; # default timeout period # # Format this time in an appropriate way. # sub FTime { my $t = $_[0]; return $::UseTimeHiRes ? sprintf("%.3f", $t) : $t; } # # RemoveLine($data, [ $eol ] ) # Removes a line from the first param, as terminated by eol 2nd optional param # and returns it (with the eol string), or undef if not found. # Note that the first parameter has the line removed on success. # sub RemoveLine { my ($data, $eol) = @_; $eol = "\n" unless defined($eol); my $spos = index($data, $eol); return RemoveBytes($_[0], $spos+length($eol)); } # RemoveHttpLine( $data ) returns the first HttpLine removed from $data # # An HTTP header MUST be terminated with CR and LF (see 3.7.1 of RFC2068): # "... a bare CR or LF MUST NOT be substituted for CRLF within any of the # HTTP control structures (such as header fields and multipart boundaries)" # but sometimes some servers use just LF. # In this case, plump it out and fix it. # sub RemoveHttpLine { my $CR = chr(13); my $LF = chr(10); my $CRLF = $CR . $LF; my $pos = index($_[0], $LF); if ($pos >= 0 and ($pos == 0 or substr($_[0],$pos-1,2) ne $CRLF)) { # replace the LF with CRLF... substr($_[0],$pos,1) = $CRLF; } return RemoveLine($_[0], $CRLF); } # # RemoveBytes($data,$n) # Removes first $n bytes from $data and returns them, or undef if not enough # sub RemoveBytes { my ($data, $n) = @_; return (defined($data) and $n >= 0 and length($data) >= $n) ? substr($_[0],0,$n,"") : undef; } #---------------------------------------------------------------------- # # Splits $data into a list of the HTTP header and what's left. # Returns less than 2 defined values on error. # HTTP headers are a series of lines terminated by a blank line. # sub RemoveHttpHeader { my($data, $eof, $reqh) = @_; my $header = ""; my $line; while (defined($line = RemoveHttpLine($data))) { # **** Note from RFC 2068 section 4.1 **** # In the interest of robustness, servers SHOULD ignore any empty # line(s) received where a Request-Line is expected. In other words, if # the server is reading the protocol stream at the beginning of a # message and receives a CRLF first, it should ignore the CRLF. # This is the only spot where I change the bytes passing through... # latest logic looks at the opening bytes of a header to tell type # $header .= $line unless $line eq $::HttpEol and length($header) == 0; last if $line eq $::HttpEol and length($header) > 0; } return ((defined($line) ? $header : undef), $data); } # # Given a block of HTTP data (described by the passed $header which has been # removed), read and return the body and whats left (either part is undef # on error). If the block being parsed is a reply, $reqh is the request header # that provoked the message (replies to HEAD requests never return a body). # The header tells us what type of body follows, either a block of data, or # a series of size-prefixed chunks. # sub RemoveHttpBody { my($data, $eof, $header, $reqh) = @_; my $body = undef; # **** Note from RFC 2068 section 4.3 **** # The presence of a message-body in a request is signaled by the # inclusion of a Content-Length or Transfer-Encoding header field in the # request's message-headers. # # Sometimes we get replies like this # HTTP/1.0 200 OK # Server: Microsoft-PWS/2.0 # Date: Fri, 13 Nov 1998 17:31:45 GMT # Content-type: text/html # Set-Cookie: ASPSESSIONID=UZFWFNFOZVSRWONB; path=/FWeb # Cache-control: private # where we have body but no proper indication of length. In these cases # we presume HTTP 1.0 behaviour and read body until the server closes # the socket (but only if we see "content-" headers, to avoid # closing GET requests etc., or get explicit "Connection: close" header). # # Note order of checks, ignore Content-Length if Transfer-Encoding given. # # Also... # For response messages, whether or not a message-body is included with # a message is dependent on both the request method and the response # status code (section 6.1.1). All responses to the HEAD request method # MUST NOT include a message-body, even though the presence of entity- # header fields might lead one to believe they do. # my $clength = 0; # assume exactly 0 bytes of body.... if ($header =~ /^HTTP/ and defined($reqh) and $reqh =~ /^HEAD/) { $clength = 0; } # replies to HEAD queries have no body ... elsif ($header =~ /^\s*Transfer-Encoding\s*:\s*chunked/im) { $clength = "chunked"; } elsif ($header =~ /^\s*Content-Length\s*:\s*(\d+)/im) { $clength = $1; } elsif ($header =~ /^\s*Connection:\s+close/im and defined($reqh)) { $clength = -1; } # Netscape Server and others abusing "Connection:" # to mean explicit "body is ended by eof..." elsif ($header =~ m!^HTTP/1.0!) { $clength = -1; } # explicit "body is ended by eof..." elsif ($header =~ /^\s*Content-/im) { # We have some content, but no length specified in any way. # Guess this is HTTP 1.0 or earlier and marked by eof on the socket. $clength = -1; } if ($clength eq "chunked") { # Chunked transfer - need to work out the length as we go.... # Read multiple chunks, followed by a 0 chunk. # These may be followed by more headers... my $line; $body = ""; while (defined($body) and defined($line = RemoveHttpLine($data))) { $body .= $line; substr($line, -length($::HttpEol)) = ""; if ($line !~ /^\s*([0-9a-fA-F]+)\s*(;.*)?$/) { # bad chunk line or incomplete $body = undef; } else { # read the chunksize, ignore optional extensions my $chunksize = hex( $1 ); last if $chunksize == 0; # each chunk is followed by an HttpEol my $c = RemoveBytes($data, $chunksize + length($::HttpEol)); $body = defined($c) ? $body.$c : undef; } } $body = undef unless defined($line) and defined($data); # Read footer lines up to and including an empty line... # while (defined($body) and defined($line = RemoveHttpLine($data))) { $body .= $line; last if $line eq $::HttpEol; } $body = undef unless defined($line) and defined($data); } elsif ($clength >= 0) { # **** Note from RFC 2068 section 4.1 **** # Note: certain buggy HTTP/1.0 client implementations generate an # extra CRLF's after a POST request. To restate what is explicitly # forbidden by the BNF, an HTTP/1.1 client must not preface or follow # a request with an extra CRLF. # **** My Note: IE4 does this !! **** # We try to look for the extra couple of bytes, but if we don't find # them and they come later, we'll be OK 'cos we skip leading CR/LF # in headers anyway if (defined($body = RemoveBytes($data, $clength))) { $body .= RemoveBytes($data, length($::HttpEol)) if $header =~ /^POST/ and $data =~ /^$::HttpEol/; } } else { # We think this is ended by the socket closing, so ignore it until # we get an eof marker ($body, $data) = ($data, "") if $eof; } return ($body, $data); } #---------------------------------------------------------------------- # # Given a block of data which is known to start with an HTTP header, # this tries to split it into header, body and what's left at the end. # Returns these 3 items, some may be undef if parsing is incomplete. # $eof tells us if this is the real socket eof (used for HTTP 1.0 replies). # $req is the most outstanding Request header, HTTP responses may # consult this to see what was asked for. # sub SplitHttp { my ($data, $eof, $reqh) = @_; my ($head, $body, $next); ($head,$data) = RemoveHttpHeader($data, $eof, $reqh) if defined($data); ($body,$next) = RemoveHttpBody($data, $eof, $head, $reqh) if defined($head) and defined($data); return ($head, $body, $next); } #---------------------------------------------------------------------- # # A socket $reader has a read event (from select), usually data to be sent # to $writer. # Note that this is the same routine for both directions (browser --> server # and vice-versa). # For now we read and write the whole reply, or push it back into the buffer # for when more data arrives, but later we might want to do this over # multiple calls (eg read and write a header or partial body). # # $rqRequests is a queue of outstanding requests for this conversation held # as HttpHeader objects. # New requests get added to the end, responses clear a request from the head. # # Returns 0 if this socket is now dead and should be closed. # sub HandleSocketData { my($reader, $writer, $rqRequests, $mprefix, $log) = @_; # Read all the available bytes for this socket - we get a read event # with 0 new bytes on real EOF and the socket is closing. my $numNewBytes = $reader->FillReadBuffer(); my $isEof = ($numNewBytes == 0); # HTTP 1.0 replies are terminated by closing the socket, so # still parse any queued data... my $data = $reader->ReadBufferedData(); if (length($data) > 0) { # Look for a complete message - if found, interpret and send, # else push it back and wait for more. # Note that to improve throughput we immediately write the new # bytes we just read.... $writer->Write(substr($data, -$numNewBytes)) if $numNewBytes > 0; my($header,$body, $next) = SplitHttp($data, $isEof, $rqRequests->[0]); if (defined($header) and ($header =~ /^(Secure-)?HTTP/i)) { # we've got at least a partial message, note the times... $rqRequests->[0]->ResponseRecvTime(); } while (defined($header) and defined($body) and defined($next)) { # fix up the queue of outstanding requests my $logmsg = ""; my $hh = HttpHeader->new($header); my $rqh; if ($hh->IsRequest()) { # Got a new request, add it to the end of the list... $rqh = $hh; $rqh->BodySize( length($body) ); push(@{$rqRequests}, $rqh); my $rt = FTime( $rqh->RequestSendTime() ); my $rq = $rqh->FirstLine; my $numq = int(@{$rqRequests}); my $qmsg = $numq > 1 ? " $numq requests queued" : ""; $logmsg .= "$mprefix ==== ($rt) Request <$rq>$qmsg\n"; } else { # Got a reply - drop the matching request unless it's a # special "100 Continue" interim reply. my($hc,$hm) = ($hh->ReplyStatusCode(), $hh->ReplyStatusText()); $rqh = $rqRequests->[0]; shift @{$rqRequests} unless $hc == 100; my $rt = FTime($rqh->ResponseSendTime()); # Total Response time is how long it took to get a full reply # Latency is the gap between when we sent the request and # when we received the first bytes of reply. # Processing is the gap between when we received the start of # the reply, and when we received the full reply. my $t = FTime($rqh->ResponseSendTime - $rqh->RequestSendTime); my $l = FTime($rqh->ResponseRecvTime - $rqh->RequestSendTime); my $p = FTime($rqh->ResponseSendTime - $rqh->ResponseRecvTime); my $tMsg = "Response time $t (Latency $l, Processing $p)"; my $rq = $rqh->FirstLine; my $numq = int(@{$rqRequests}); my $qmsg = $numq ? " $numq requests queued" : ""; my $hs = (($::opts{d} == 0) ? "$hc $hm" : $hc); $logmsg .= "$mprefix ==== ($rt) Response $hs to <$rq>$qmsg\n"; $logmsg .= "$mprefix ==== $tMsg\n" if $::opts{t}; } if ($::opts{d} > 0) { # build up the message for the HTTP header lines HEADER: foreach (split(/$::HttpEol/, $header)) { # If we've been given a list of header fields not to report # then don't report them.... foreach my $ignorestr (@{$::opts{ignore}}) { next HEADER if index(lc($_),lc($ignorestr)) >= 0; } $logmsg .= "$mprefix $_\n"; last if $::opts{d} == 1; } } $logmsg .= "$mprefix ==== Body ".length($body)." bytes\n"; if (exists $::opts{b}->{$hh->RequestMethod()}) { # print the body of POST requests.... (to show form parameters) my($f,@b) = split("&", $body); $logmsg .= "$mprefix Body => $f\n"; $logmsg .= "$mprefix Body => &$_\n" foreach @b; } elsif (exists $::opts{b}->{ALL}) { # print some of the other bodies if asked to do so my $n = $::opts{b}->{ALL}; $body =~ s/[^[:print:]]/\./gm; $body = substr($body, 0, int $n) if $n; $logmsg .= "$mprefix Body $n => $body\n"; } $logmsg .= "\n"; my $reportIt = 1; if ($hh->IsReply()) { # look to see if we skip reporting this my $rcode = $hh->ReplyStatusCode(); $reportIt = not grep($_ == $rcode, @{$::opts{skiprcode}}); } if ($reportIt and @{$::opts{skiptype}} > 0) { # skip replies or request relating to specified file types my($ft) = ($rqh->RequestUri() =~ m!\.([^\.\?]+)(\?.*)?$!); $reportIt = not grep(lc($_) eq lc($ft), @{$::opts{skiptype}}); } # we've now handled a complete message - log the details.... if ($reportIt and $::opts{'='}) { # report requests and replies together... if ($hh->IsRequest()) { $hh->Summary( $logmsg ); # store message to report later } else { $log->print( $rqh->Summary() . $logmsg ); } } elsif ($reportIt) { # just print everything as it comes... $log->print($logmsg); } # push back the data for the next item, and try to read another... $reader->PushBytes($next); $data = $reader->ReadBufferedData(); ($header,$body, $next) = SplitHttp($data,$isEof,$rqRequests->[0]); } # push back any remaining data.... $reader->PushBytes( $data ); } return ! $isEof; } #---------------------------------------------------------------------- # # Main program, see usage message.... # #if (! getopts("d:p:l:r:hvt=", \%::opts) or @ARGV > 0 or exists($::opts{h})) if (GetOptions('display=i' => \$::opts{d}, 'b!' => sub { push(@{$::opts{b}}, "all") }, 'body=s' => $::opts{b}, 'port=i' => \$::opts{p}, 'log=s' => \$::opts{l}, 'remote=s' => \$::opts{r}, 'ignore=s' => \@{$::opts{ignore}}, 'skiptype=s' => \@{$::opts{skiptype}}, 'skiprcode=s' => \@{$::opts{skiprcode}}, 'help!' => \$::opts{h}, 'verbose!' => \$::opts{v}, 'timing!' => \$::opts{t}, 'together!' => \$::opts{'='}, ) == 0 or @ARGV > 0 or $::opts{h} > 0) { print STDERR "Unknown arg $ARGV[0]\n\n" if @ARGV > 0; die $Usage; } # If args accept multiple items, expand comma seperated items unless quoted # Note that double quotes are already removed by Getopt::Long... # foreach (keys %::opts) { @{$::opts{$_}} = map(/^'(.*)'$/ ? $1 : split(/,/, $_), @{$::opts{$_}}) if (ref($::opts{$_}) eq "ARRAY" and @{$::opts{$_}} > 0); } my($rhost,$rport) = ($::opts{r} =~ m!^(?:http://)?([^:]+):?(\d*)$!i); $rport = 80 unless $rport; $rhost = "localhost" unless defined($rhost); my $log = ($::opts{l} ne "") ? new IO::File($::opts{l}, "w") : *STDOUT; $log->autoflush; my $listener = IO::Socket::INET->new(LocalPort => $::opts{p}, Type => SOCK_STREAM, Reuse => 1, Proto => "tcp", Listen => 10, TimeOut => $main::Timeout); die "Can't setup listener on port $::opts{p} - it's probably in use already\n" unless $listener; my $select = new IO::Select ( $listener ) or die "Can't setup selecter\n"; # # Report that we're started and ready to roll.... # if ($::opts{v}) { $log->print("HttpSniffer started with options:\n"); foreach (keys %::opts) { my $v = ref($::opts{$_}) ? join("\n\t\t", @{$::opts{$_}}) : $::opts{$_}; $log->print("\t$_ = $v\n") if (defined($v) and $v ne ""); } $log->print("Listening socket made on socket ".$listener->fileno."\n"); } # convert the "body" args of a list of XXXnnn to a hash of XXX => nnn $::opts{b} = { map((uc =~ /^(\D+)(\d*)$/), @{$::opts{b}}) }; STDERR->print("HttpSniffer waiting for clients on port $::opts{p}...\n"); STDERR->flush(); my %clients; # the client socket for a given socket my %servers; # the server socket for a given socket my %requests; # queue of outstanding request headers from each client while (my @ready = $select->can_read) { my @readyhandles = map { $_->fileno } @ready; my @allhandles = map { $_->fileno } $select->handles; $log->print("#### Sockets ".join(",", @readyhandles)." of " .join(",",@allhandles)." need checking ####\n") if $::opts{v}; # don't close sockets inside this loop, or we'd have to # remove them from @ready too, so make a list of what to close # my(%closeList); foreach my $fh (@ready) { if ($fh == $listener) { # Accept (create) a new socket - it's a new client. # Make a new partner socket to the real host. my $c = BufferedSocket->new($listener->accept) or die "Can't accept new client\n"; $c->Name( sprintf("C%02d", $c->fileno) ); my $ss = IO::Socket::INET->new(PeerAddr => $rhost, PeerPort => $rport, Proto => 'tcp', Timeout => $main::Timeout) or die "Can't connect to server $rhost:$rport\n"; my $s = BufferedSocket->new($ss); $s->Name( sprintf("S%02d", $s->fileno) ); $log->print("#### Adding new client ".$c->Name. " ( from ".$c->peerhost.")". " and server ".$s->Name."\n") if $::opts{v}; # Add them both to the select list.... ($clients{$c->fileno}, $servers{$c->fileno}) = ($c, $s); ($clients{$s->fileno}, $servers{$s->fileno}) = ($c, $s); $requests{$c->fileno} = []; $select->add($c->Socket, $s->Socket); } else { # Data socket, work out the client and server sockets my $c = $clients{$fh->fileno}; my $s = $servers{$fh->fileno}; my $readfrom = $fh->fileno == $c->fileno ? $c : $s; my($cn,$sn) = ($c->Name, $s->Name); # we write to the one of the pair that we don't read from my $writeto = ($readfrom == $c) ? $s : $c; my $mprefix = ($readfrom == $c) ? " --> $cn --> $sn" : " <-- $cn <-- $sn"; if (! HandleSocketData($readfrom, $writeto, $requests{$c->fileno}, $mprefix, $log)) { $closeList{$c->fileno} = $c; $closeList{$s->fileno} = $s; } } } # Mark for closing any socket pairs with errors (rarely if ever happens) # Note that later versions ask us to call has_exception() rather than # has_error(), as far as I know this is from version 1.14 onwards my @errs = ($IO::Select::VERSION >= 1.14) ? $select->has_exception(0) : $select->has_error(0); foreach (@errs) { $closeList{$_->fileno} = $clients{$_->fileno}; $closeList{$_->fileno} = $servers{$_->fileno}; } # Now close all the sockets that need it foreach (keys %closeList) { $log->print("Removing dead socket $_\n") if $::opts{v}; $select->remove( $closeList{$_}->Socket ); delete $clients{$_}; delete $servers{$_}; delete $requests{$_}; $closeList{$_}->Socket->close; $closeList{$_} = undef; } } die "Done and die\n"; # never really gets here... just for completeness package BufferedSocket; #---------------------------------------------------------------------- # # A BufferedSocket reads data from a socket and returns it a char at a time. # Data can be pushed back onto a buffered socket, but more importantly we can # read up to a certain string, or read a number of bytes. # #---------------------------------------------------------------------- BEGIN { Time::HiRes->import("time") if $::UseTimeHiRes; } # Takes one param, the socket to use # Makes no object if no valid socket is passed. sub new { shift; my $rawsocket = shift; return undef unless $rawsocket; my $self = bless {}; $self->{_socket} = $rawsocket; $self->{_data} = ""; return $self; } # Basic inline type methods - return the raw socket object etc. sub Socket { $_[0]->{_socket}; } sub fileno { $_[0]->{_socket}->fileno; } sub NumBytesAvailable { length($_[0]->{_data}); } sub Write { syswrite( $_[0]->{_socket}, $_[1], length($_[1]) ); } sub peerhost { $_[0]->{_socket}->peerhost(); } sub sockport { $_[0]->{_socket}->sockport(); } # To make reporting easier, each object can be given a name - it means # nothing to the object itself. # Called with a parameter it sets the name, always return the current name... # sub Name { my $self = shift; $self->{_name} = $_[0] if @_; return $self->{_name}; } # # Reads as many bytes as possible from the socket without blocking and # return the number of bytes read from the socket. # To do this, we read until the first time we get less bytes than we asked for # sub FillReadBuffer { my $self = shift; my $numIn = (@_ == 0) ? 4096 : shift; # chunk size ... my ($nr, $total) = (0,0); my $buf; do { $nr = sysread($self->{_socket}, $buf, $numIn); if (defined($nr) and $nr > 0) { $total += $nr; $self->{_data} .= $buf; } } while (defined $nr and $nr == $numIn); return $total; } # # Return the current n chars, or up to end of buffered data. # If numbytes isn't specified (or is < 0), reads whatever is in the cache. # sub ReadBufferedData { my ($self, $n) = @_; $n = length($self->{_data}) unless (defined($n) and $n >= 0); return substr($self->{_data}, 0, $n, ""); } # # Push back the specified bytes into the socket read buffer # sub PushBytes { my ($self, $str) = @_; $self->{_data} = $str . $self->{_data} if defined($str); } package HttpHeader; #---------------------------------------------------------------------- # An HttpHeader object holds the details of an HTTP header - this is used to # match request and response details - eg HEAD requests always produce replies # without any data. # They are created with the header string. #---------------------------------------------------------------------- my $startTime; BEGIN { Time::HiRes->import("time") if $::UseTimeHiRes; $startTime = time(); } sub new { my($proto,$header) = @_; return undef unless defined($header); my $class = (ref($proto) or $proto); return bless( { _header => $header }, $class ); } # utility routine to set and store named time (elapsed time) sub Time { my($s,$n) = @_; $s->{$n} = time - $startTime unless exists $s->{$n}; $s->{$n}; } # get the header sub Header { $_[0]->{_header} } sub FirstLine { my $h = $_[0]->{_header}; return substr($h,0,index($h,$::HttpEol)); } # we can store away the size of the body and a summary if we want.... sub Summary { $_[0]->{_summary} = $_[1] if @_ > 1; return $_[0]->{_summary}; } sub BodySize { $_[0]->{_bodysize} = $_[1] if @_ > 1; return $_->[0]->{_bodysize}; } # Is this a request or a response - and what are the key details ? # Request-Lines are of the form # Method SP Request-URI SP HTTP-Version CRLF # whereas Reply-Lines are of the form # HTTP-Version SP Status-Code SP Reason-Phrase CRLF # HTTP-Version was originally of the form # "HTTP" "/" 1*DIGIT "." 1*DIGIT # but now we allow for HTTPS too # "Secure-HTTP" "/" 1*DIGIT "." 1*DIGIT # sub IsRequest { return $_[0]->FirstLine() !~ /^(Secure-)?HTTP/i; } sub IsReply { return ! $_[0]->IsRequest(); } sub _FirstField { return (split(/\s+/, $_[0]->FirstLine()))[$_[1]]; } sub RequestMethod { return $_[0]->_FirstField(0) if $_[0]->IsRequest(); } sub RequestUri { return $_[0]->_FirstField(1) if $_[0]->IsRequest(); } sub RequestHttpVer { return $_[0]->_FirstField(2) if $_[0]->IsRequest(); } sub ReplyHttpVer { return $_[0]->_FirstField(0) if $_[0]->IsReply(); } sub ReplyStatusCode { return $_[0]->_FirstField(1) if $_[0]->IsReply(); } sub ReplyStatusText { return $_[0]->_FirstField(2) if $_[0]->IsReply(); } # get some times... sub RequestSendTime { $_[0]->Time("_qst") } # sent request to server sub ResponseRecvTime { $_[0]->Time("_rrt") } # got start of reply sub ResponseSendTime { $_[0]->Time("_rst") } # got complete reply