#!/usr/bin/perl require 5; my $VERSION = '2.3.0.0041'; my $LogFile = 'log.txt'; my $MaxLogSize = 0; my @IgnoreHosts = ('66.87.155.95'); my %Maps = ( 'http://xav.com/' => 'http://www.xav.com/', 'http://ftp.xav.com/' => 'http://www.xav.com/', ); my $AllowDebug = 0; my $resolve_dns_names = 1; my $use_ssi_detect = 1; my $domain = 'http://' . &query_env('SERVER_NAME','localhost'); my $header = "Content-type: text/html\015\012\015\012"; my $TimeOffsetInHours = 0; my $NoLogHead = 0; %::private = (); $::private{'PRINT_HTTP_STATUS_HEADER'} = 0; my %FORM = (); &WebFormL(\%FORM); my $Export = 0; if (($0 =~ m!^(.+)(\\|/)!) and ($0 !~ m!safeperl\d*!i)) { chdir($1); } my $mode = $FORM{'mode'} || ''; # $ref is the full URL of the referring file. If not given, will query HTTP_REFERER my $ref = $FORM{'ref'} || $ENV{'HTTP_REFERER'} || ''; # $to is the full URL of the file being visited. If not given, will be pulled from various environment variables my $to = $FORM{'to'} || ''; if ($mode eq 'img') { $to = &query_env('HTTP_REFERER'); } my $nexturl = $FORM{'nexturl'} || ''; my $qs = &query_env('QUERY_STRING'); DetectMode: { # is the mode explicitly set? last if (($mode eq 'img') or ($mode eq 'redir')); # SSI call: if ($use_ssi_detect) { if ($ENV{'DOCUMENT_URI'}) { $mode = 'ssi' unless ($mode); unless ($to) { $to = $domain . $ENV{'DOCUMENT_URI'}; if ($ENV{'REDIRECT_QUERY_STRING'}) { $to .= '?' . $ENV{'REDIRECT_QUERY_STRING'}; } } last; } # Alternate SSI call (via REQUEST_URI not DOCUMENT_URI) if ($ENV{'REQUEST_URI'} and ($qs eq '')) { $mode = 'ssi' unless ($mode); unless ($to) { $to = $domain . $ENV{'REQUEST_URI'}; } last; } # Alt SSI call on Windows/IIS if ((&query_env('SERVER_SOFTWARE') =~ m!iis!i) and ($ENV{'PATH_INFO'} ne $ENV{'SCRIPT_NAME'})) { $mode = 'ssi' unless ($mode); unless ($to) { $to = $domain . $ENV{'SCRIPT_NAME'}; } last; } } # trans image logging: if ($qs =~ m!^(\w+)\.gif(\&ref=)?(.*)$!i) { $mode = 'img' unless ($mode); $ref = $3 if ($3); $to = &query_env('HTTP_REFERER'); last; } # redirect if (($qs) and ($qs ne 'debugme')) { $mode = 'redir' unless ($mode); $nexturl = $qs unless ($nexturl); $Export = 1; last; } if (lc($qs) eq 'debugme') { $mode = 'debug'; last; } } if ($mode eq 'redir') { $to = $nexturl; } # provide output the user first, independent of logging action: if ($mode eq 'ssi') { print "$header\n \n"; } elsif ($mode eq 'img') { &Print_Image; } elsif ($mode eq 'redir') { # 0040 strip vertical whitespace for security $nexturl =~ s!\r|\n|\015|\012!!sg; print "HTTP/1.0 301 Moved\015\012" if ($::private{'PRINT_HTTP_STATUS_HEADER'}); print "Location: $nexturl\015\012\015\012"; } elsif ($mode eq 'debug') { &SpawnDebugger; } else { # we should never get here, this is just a valid HTTP response # in case of mis-configuration or whatever: print "HTTP/1.0 200 OK\015\012" if ($::private{'PRINT_HTTP_STATUS_HEADER'}); print $header; print "

$0 - working okay - no logging command received - use ?debugme query string for more info.

"; } # decide whether or not to log this visit: my $err = ''; Err: { last Err if ($mode eq 'debug'); last Err if (&query_env('HTTP_COOKIE') =~ m!axs_no_log=1!); last Err if (($NoLogHead) and (&query_env('REQUEST_METHOD') eq 'HEAD')); my ($vhost, $vaddr) = &resolve_host($resolve_dns_names); my $ighost = ''; foreach $ighost (@IgnoreHosts) { $ighost = quotemeta($ighost); next unless ($ighost); last Err if ($vhost =~ m!$ighost!); last Err if ($vaddr =~ m!$ighost!); } # Note: you can filter on other things as well. If you want to ignore people # arriving from a certain site, like Yahoo, you can write the following (note # that HTTP_REFERER is used instead of REMOTE_HOST): # # @ignore = ('yahoo.com', 'av.yahoo.com'); # foreach (@ignore) { # exit if ($ENV{'HTTP_REFERER'} =~ m!$_!); # } # don't fill up the file system: my $LogSize = -s $LogFile || 0; last Err if (($MaxLogSize) and ($MaxLogSize < $LogSize)); # cleanse the data: my ($clean_url, $host, $port, $path, $is_valid) = &parse_url($ref); if ($is_valid) { $ref = $clean_url; } ($clean_url, $host, $port, $path, $is_valid) = &parse_url($to); if ($is_valid) { $to = $clean_url; } # Apply the mappings: foreach (keys %Maps) { $to =~ s!$_!$Maps{$_}!ig; $ref =~ s!$_!$Maps{$_}!ig; } &log_visit($vhost,$vaddr,$ref,$to); last Err; } sub Print_Image { print "HTTP/1.0 200 OK\015\012" if ($::private{'PRINT_HTTP_STATUS_HEADER'}); print "Pragma: no-cache\015\012"; print "Expires: Saturday, February 15, 1997 10:10:10 GMT\015\012"; print "Content-Type: image/gif\015\012\015\012"; binmode(STDOUT); foreach (71,73,70,56,57,97,1,0,1,0,128,255,0,192,192,192,0,0,0,33,249,4,1,0,0,0,0,44,0,0,0,0,1,0,1,0,0,1,1,50,0,59) { print pack('C',$_); } } # ___________________________________________________________________________ # This runs a filesystem test against $LogFile and dumps a ton of (hopefully) # useful information to the screen: sub SpawnDebugger { print "HTTP/1.0 200 OK\015\012" if ($::private{'PRINT_HTTP_STATUS_HEADER'}); print "Content-Type: text/html\015\012\015\012"; unless ($AllowDebug) { print '

Error: no output available because $AllowDebug = 0 in this script.

'; return 0; } my $filesys_test = ''; my $filesys_ok = 0; TEST: { if (-e $LogFile) { my ($LogSize,$LastModT) = (stat($LogFile))[7,9]; $LastModT = scalar localtime($LastModT); $filesys_test .= "

The log file, $LogFile, exists with size $LogSize bytes. It was last modified on $LastModT. "; if (open(FILE,">>$LogFile")) { binmode(FILE); close(FILE); $filesys_test .= "The log file is writable.

The filesystem test passed!

"; $filesys_ok = 1; } else { $filesys_test .= <<"EOM"; However, the log file is not writable. The filesystem returned "$!" when this script tried to write to it. You need to change the file permissions to make it script-writable.

The filesystem test failed.

EOM last TEST; } } elsif (open(FILE,">>$LogFile")) { binmode(FILE); close(FILE); $filesys_test .= <<"EOM";

The log file, $LogFile, did not exist when this script started. However, this script attempted to create it for you, and the server responded that this was successful. So everything should be fine now. Reload this web page, and hopefully you will see a message that the file system test has passed. If it does not pass, and instead you get an error or you get this message again, then you will have to manually create the log file and set it's permissions.

The filesystem test needs to be run again. (reload this page)

EOM last TEST; } else { $filesys_test .= <<"EOM";

The log file, $LogFile, doesn't exist. You need to create one and give it writable permissions. Alternately, the log file may exist but the \$LogFile variable might not point to the correct location, in which case you will need to change your variable.

The filesystem test failed.

EOM last TEST; } } my $homelink = ''; my @ext = ('pl', 'cgi'); if ($0 =~ m!\.cgi$!) { @ext = ('cgi','pl'); } foreach (@ext) { my $file = 'ax-admin.' . $_; if (-e $file) { $homelink = qq!

Click here to return to $file.

\n!; last; } } my $cookie = &he($ENV{'HTTP_COOKIE'} || ''); my $cookie_info = ''; if ($cookie =~ m!axs_no_log=1!) { $cookie_info = "

Your visits will NOT be logged because the 'axs_no_log=1' cookie was detected.

\n"; } else { $cookie_info = "

Your visits will be logged, because the 'axs_no_log=1' cookie was NOT detected.

\n"; } my $ignore_host_info = ''; IgnoreHostInfo: { if (not @IgnoreHosts) { $ignore_host_info .= "

The \@IgnoreHosts array is empty. No logging overrides will occur due to IP address or hostname.

\n"; last; } my ($vhost, $vaddr) = &resolve_host($resolve_dns_names); $ignore_host_info .= "

The \@IgnoreHosts array contains:
\n"; my $b_ignored = 0; foreach (@IgnoreHosts) { $ignore_host_info .= "   '$_'"; if ($_) { my $qm = quotemeta($_); if ($vhost =~ m!$qm!) { $ignore_host_info .= " logging disabled for you because $vhost matches\n"; $b_ignored = 1; } elsif ($vaddr =~ m!$qm!) { $ignore_host_info .= " logging disabled for you because $vaddr matches\n"; $b_ignored = 1; } } $ignore_host_info .= "
\n"; } $ignore_host_info .= "

"; if ($b_ignored) { $ignore_host_info .= "

Your client address ($vhost/$vaddr) will cause your visits to not be logged.

\n"; } else { $ignore_host_info .= "

Your client address ($vhost/$vaddr) does not match any of these entries. Logging will not be disabled based on \@IgnoreHosts values.

\n"; } last; } my $env_info = ''; foreach (sort keys %ENV) { my ($name, $value) = &he( $_, substr($ENV{$_},0,60) ); $env_info .= qq!$name:$value
\n!; } my $axpath = 'http://' . ( $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || '' ) . $ENV{'SCRIPT_NAME'}; print <<"EOM"; Debug Page and Usage Instructions $homelink

Review the AXS help file if you need more help.

Filesystem Test:

$filesys_test

Usage Instructions:

  1. Add this "AXS tracking code" to any HTML pages that you want to have tracked. This text should be placed within the <body> section of the document, towards the bottom of the page. You can place the text almost anywhere, so feel free to move it around if it causes problems with your layout:

    Note that this text only works on normal HTML pages, not in frameset documents.

  2. After entering that HTML code on your pages, transfer the pages up to the server. Then clear your browser cache and visit the pages. Use your browser's "view-source" command to look at the HTML source code. Confirm that the above Javascript appears in your pages *exactly* as it appears above. Make sure that the line breaks appear in the right places.

    This is important because some HTML editor programs will corrupt the text that you try to insert into your pages. You are responsible for entering the Javascript logging code correctly and for verifying that it appears correctly. If you do not do this, then the product will not work.

  3. Code your off-site links (links to pages/files that don't already contain the AXS tracking code) like this:

    <a href="$ENV{'SCRIPT_NAME'}?http://yahoo.com/">http://yahoo.com/</a>

    Here is an example link.

If any of your HTML pages reside on a different website than AXS, then you should use:

$axpath

instead of:

$ENV{'SCRIPT_NAME'}

in the examples above.

Standard Debugging Information:

This is AXS Logging Module version $VERSION in debug mode.
The file name of this script is $0.
This script is executing under Perl version $].
The critical file system variable is \$LogFile = "$LogFile";.
\$MaxLogSize = $MaxLogSize; (bytes)

Webmaster Logging Override

You can disable the logging of your own visits by having the "axs_no_log=1" cookie, or by having your IP address or hostname present in the \@IgnoreHosts array.

See this help file for more information about not tracking your own visits.

Cookie Override

Your browser sent the following cookie header:

HTTP_COOKIE: $cookie
$cookie_info

IP or Hostname Override

$ignore_host_info

Environment Variables:

$env_info


EOM } # Trim - thanks to William Boudreau for & fix sub Trim { local $_ = $_[0] ? $_[0] : ''; s!^[\r\n\s]+!!o; s![\r\n\s]+$!!o; return $_; } #changed 0033 -- no longer mapping // => / within the query string portion of the URL # fixed Google image search backtracking sub clean_path { my $path = &Trim($_[0]); # strip pound signs and all that follows (links internal to a page) $path =~ s!\#.*$!!; my ($base, $question, $query) = ($path, '', ''); if ($path =~ m!^(.*?)(\?)(.*)$!s) { ($base, $question, $query) = ($1, $2, $3); } local $_ = $base; # map /%7E to /~ (common source of duplicate URL's) s!\/\%7E!\/\~!ig; # map "/./" to "/" s!/+\./+!/!g; # map trailing "/." to "/" s!/+\.$!/!g; # nuke all leading "/../" entries (meaningless for us) # map /../foo => /foo while (s!^/+\.\./+!/!) {} # map "folder/../" => "/" # map "bar/folder/../" => "bar//" while (s!([^/]+)/+\.\./+!/!) {} # map "/folder/.." => "/" s!/+([^/]+)/+\.\.$!/!; # collapse back-to-back slashes in the path s!/+!/!g; return $_ . $question . $query; } sub parse_url { local $_ = $_[0] || ''; my ($clean_url, $host, $port, $path, $is_valid) = ('', '', 80, '/', 0); # add trailing slash if none present $_ .= '/' if (m!^http://([^/]+)$!i); if (m!^http://([\w|\.|\-]+)\:?(\d*)/(.*)$!i) { ($host, $port, $path, $is_valid) = (lc($1), $2, &clean_path("/$3"), 1); $port = 80 unless $port; if ($port == 80) { $clean_url = "http://$host$path"; } else { $clean_url = "http://$host:$port$path"; } } return ($clean_url, $host, $port, $path, $is_valid); } =item WebFormL Usage: &WebFormL( \%FORM ); Returns a by-reference hash of all name-value pairs submitted to the CGI script. updated: 8/21/2001 Dependencies: &url_decode &query_env =cut sub WebFormL { my ($p_hash) = @_; my @Pairs = (); if (&query_env('QUERY_STRING')) { @Pairs = split(m!\&!, &query_env('QUERY_STRING')); } else { @Pairs = @ARGV; } local $_; foreach (@Pairs) { next unless (m!^(.*?)=(.*)$!s); my ($name, $value) = (&url_decode($1), &url_decode($2)); if ($$p_hash{$name}) { $$p_hash{$name} .= ",$value"; } else { $$p_hash{$name} = $value; } } } sub url_decode { local $_ = defined($_[0]) ? $_[0] : ''; tr!+! !; s!\%([a-fA-F0-9][a-fA-F0-9])!pack('C', hex($1))!eg; return $_; } =item query_env Usage: my $remote_host = &query_env('REMOTE_HOST'); Abstraction layer for the %ENV hash. Why abstract? Here's why: 1. adds safety for -T taint checks 2. always returns '' if undef; prevent -w warnings =cut sub query_env { my ($name,$default) = @_; if (($ENV{$name}) and ($ENV{$name} =~ m!^(.*)$!s)) { return $1; } elsif (defined($default)) { return $default; } else { return ''; } } =item resolve_host Usage: my ($host,$addr) = &resolve_host($resolve_dns_names); Returns either the FQDN and IP address of the visitor, based on the variables $ENV{'REMOTE_HOST'}, $ENV{'REMOTE_ADDR'}, and $resolve_dns_names. =cut sub resolve_host { my ($resolve_dns_names) = @_; # This code converts un-resolved hostnames to their text versions, then makes # the names lowercase, and then aborts logging if this hostname is forbidden: my ($host, $addr) = (&query_env('REMOTE_HOST'), &query_env('REMOTE_ADDR')); if (($host eq '') or ($host =~ m!^\d+\.\d+\.\d+\.\d+$!)) { if (($resolve_dns_names) and ($addr =~ m!^(\d+)\.(\d+)\.(\d+)\.(\d+)$!)) { $host = (gethostbyaddr(pack('C4',$1,$2,$3,$4),2))[0]; } } $host = lc($host) || $addr; return ($host,$addr); } sub log_visit { my ($host,$addr,$ref,$to) = @_; my $logline = '|'; $ref = url_decode($ref); foreach ($host,$addr,$ref,$to,&query_env('HTTP_USER_AGENT')) { # strip delimiters: s!\||\015|\012!!sg; $logline .= $_.'|'; } foreach ((localtime(time + (3600*$TimeOffsetInHours)))[0..7]) { $logline .= $_.'|'; } $logline .= 'export|' if ($Export); $logline .= "\n"; # Make sure the record is strictly valid before writing to the log: exit unless ($logline =~ m!^\|([^\|]+)\|([^\|]+)\|([^\|]*)\|([^\|]*)\|([^\|]*)\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|(export\|)?$!); if (open(LOG,">>$LogFile")) { binmode(LOG); print LOG $logline; close(LOG); } } sub he { my @out = @_; local $_; foreach (@out) { $_ = '' if (not defined($_)); s!\&!\&!g; s!\>!\>!g; s!\ 0)) { return @out; } else { return $out[0]; } } 1;