/usr/bin/make_combined_log2 is in libapache2-mod-log-sql 1.100-16.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | #!/usr/bin/perl
# $Id: make_combined_log.pl,v 1.2 2004/02/12 23:32:55 urkle Exp $
#
# make_combined_log.pl
#
# Usage: make_combined_log <days> <virtual host>
#
# This perl script extracts the httpd access data from a MySQL database
# and formats it properly for parsing by 3rd-party log analysis tools.
#
# The script is intended to be run out by cron. Its commandline arguments tell
# it how many days' worth of access records to extract, and which virtual_host
# you are interested in (because many people log several virthosts to one MySQL
# db.) This permits you to run it daily, weekly, every 9 days -- whatever you
# decide.
#
# Note: By "days" I mean "chunks of 24 hours prior to the moment this script is
# run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34
# p.m. on the 11th.
#
# Known issues:
# * Because GET and POST are not discriminated in the MySQL log, we'll just
# assume that all requests are GETs. This should have negligible effect
# on any analysis software. This could be remedied IF you stored the full
# HTTP request in your database instead of just the URI, but that's going to
# cost you a LOT of space really quickly...
#
# * Because this is somewhat of a quick hack it doesn't do the most robust
# error checking in the world. Run it by hand to confirm your usage before
# putting it in crontab.
$| = 1;
use DBI;
# Remove the # in front of this line when you have
# edited the variables below.
#$has_edited_source = 1;
#
# Set up the proper variables to permit database access
#
$serverName = "your.dbhost.com";
$serverPort = "3306";
$serverUser = "someuser";
$serverPass = "somepass";
$serverTbl = "acc_log_tbl";
$serverDb = "apache";
if (!defined($has_edited_source)) {
print "Please edit this file and configure it first.\n";
print "This program is $0\n";
exit 1;
}
# Remember, $#ARGV is parameters minus one...
if ($#ARGV != 1) {
print "Usage $0 days virtualhost\n";
exit 1;
}
$days = $ARGV[0];
$virthost = $ARGV[1];
#
# Other constants
#
$st_tz = "-0800";
$dt_tz = "-0700";
$now = time();
$start = $now - (86400 * $days);
#
# Connect and fetch the records
#
$dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass);
if (not $dbh) {
die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)";
}
$records = $dbh->prepare("select remote_host,remote_user,request_uri,time_stamp,status,bytes_sent,referer,agent,request_method,request_protocol from `$serverTbl` where virtual_host='$virthost' and time_stamp >= $start order by time_stamp");
$records->execute;
if (not $records) {
die "No such table or the select returned no records."
}
#Right
#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
#Bad
#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
#
# Pull out the data row by row and format it
#
while (@data = $records->fetchrow_array) {
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[3]);
$year=$year+1900;
# Create format for leading-zero formatting
if ($mday < 10) { $mday = "0$mday"; }
if ($mon < 10) { $mon = "0$mon"; }
if ($hour < 10) { $hour = "0$hour"; }
if ($min < 10) { $min = "0$min"; }
if ($sec < 10) { $sec = "0$sec"; }
# Convert numeric month to string month
for ($mon) {
if (/00/) { $mon = "Jan";}
elsif (/01/) { $mon = "Feb";}
elsif (/02/) { $mon = "Mar";}
elsif (/03/) { $mon = "Apr";}
elsif (/04/) { $mon = "May";}
elsif (/05/) { $mon = "Jun";}
elsif (/06/) { $mon = "Jul";}
elsif (/07/) { $mon = "Aug";}
elsif (/08/) { $mon = "Sep";}
elsif (/09/) { $mon = "Oct";}
elsif (/10/) { $mon = "Nov";}
elsif (/11/) { $mon = "Dec";}
}
# Create the output
print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec ";
if ($isdst) {
print "$dt_tz\] ";
} else {
print "$st_tz\] ";
}
print "\"$data[8] $data[2] $data[9]\" $data[4] $data[5] \"$data[6]\" \"$data[7]\"\n";
}
#
# Done
#
$records->finish;
|