/usr/share/irssi/scripts/urlplot.pl is in irssi-scripts 20131030.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 | use strict;
#use warnings; # Not a default module in perl 5.005
use vars qw($VERSION %IRSSI);
$VERSION = '1.2';
%IRSSI = (
authors => 'bwolf',
contact => 'bwolf@geekmind.org',
name => 'urlplot',
description => 'URL grabber with HTML generation and cmd execution',
license => 'BSD',
url => 'http://www.geekmind.net',
changed => 'Sun Jun 16 14:00:13 CEST 2002'
);
# To read the documentation you may use one of the following commands:
#
# pod2man urlplot.pl | nroff -man | more
# pod2text urlplot.pl | more
# pod2man urlplot.pl | troff -man -Tps -t > urlplot.ps
=head1 NAME
urlplot
=head1 SYNOPSIS
All URL loggers suck. This one just sucks less.
=head1 DESCRIPTION
urlplot watches your channels for URLs and creates nice HTML logfiles of it.
Actually it parses normal text and topic changes for URLs. Internally it uses
two caches to prevent flooding and logging of duplicate URLs. As an additional
feature urlplot can create CSV datafiles. Logfiles can be created for all
channels and for separate channels. Logging can be allowed and denied on a per
channel/nick basis. A lockfile is used to protect the caches and logfiles from
accessing them by multiple irssi instances. A command allows you to send a
logged URL to your webbrowser of choice.
The format of the CSV logfiles is as follows:
date nick channel url
=head1 GETTING STARTED
Copy urlplot.pl intoF< $HOME/.irssi/scripts> and create the necessary
directories withC< mkdir -p>F< $HOME/.irssi/urlplot/urls>.
Look for the settingsC< url_log_basedir> andC< url_db_basedir> if you want to
change the directories urlplot will populate with files.
Follow the documentation and configure urlplot to fit your needs.
=head1 COMMANDS
=head2 /url <integer>
Executes the commandC< url_command> with an URL from the cache as its
argument. If no number has been specified it defaults to nth URL logged which
references the most recently logged URL.
=head2 /url -list
Displays a list of all logged URLs.
=head2 /url -clearcache
Clears the cache databases.
=head /url -showlog
ExecutesC< url_command> withC< url_navigate> as its argument. It can be used
to display the main logfile in your favourite webbrowser.
=head1 SETTINGS
=head2 Pathnames
Please note that you can't use $HOME or any environment variables in the
settings because irssi/urlplot isn't a shell ;)
=head2 /set url_command <string>
Command to be executed to display an URL (see /url). The command string should
contain the sequence C<__URL__> which will be replaced by a certain URL.
The default is:
C< mozilla -remote "openURL(__URL__)" E<gt> /dev/null 2E<gt>&1 || \ >
C< mozilla "__URL__"& >
This will send a certain URL to mozilla or it will start mozilla if it is not
already there. The string can be anything. For example I use the following:
C< ssh host /home/user/bin/mozopenurl "'__URL__'" >/dev/null 2>&1 &>
where mozopenurl is a shell script that contains similar code as the mozilla
-remote example above.
=head2 /set url_cache_max <integer>
Specifies the maximum count of items which will be held in the persisten URL
caches. A value of zero disables automatic cache resizing (round-robbin). The
default is to keep the last 90 URLs.
=head2 /set url_log_basedir <path>
Specifies the logging base directory used to create the log files beneath it.
The default isF< $HOME/.irssi/urlplot/urls/>. You have to create directories
by yourself:C< mkdir -p>F< $HOME/.irssi/urlplot/urls>.
=head2 /set url_log_file_name <relative-filename>
Defines the filename of the full logfile. It will be passed to I<
strftime(3)>. This can be usefull to create logfiles with a timestamp.
The file will be created relative toC< url_log_basedir>. The default
isF< ircurls.html>.
=head2 /set url_chan_prefix <string>
Defines the filename prefix for channel logfiles. The leadingC< # >of the
channel name will be replaced by this prefix. It will be passed to
I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The
default isF< chan_>.
=head2 /set url_chan_logging <bool>
Enables or disable channel logging globally.
The default isC< ON>.
=head2 /set url_log_csv_file_name <relative-filename>
Defines the filename of the full CSV logfile. It will be passed to
I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The
default isF< ircurls.csv>.
=head2 /set url_log_csv_file_max_size <integer>
Defines the maximum size of the full CSV logfile. If it reaches the specified
maximum size in bytes it will be simply resized to zero. The default isC< 30*1024>
bytes.
=head2 /set url_log_csv_separator <string>
Defines the separator used as a delimeter for the fields of the CSV files.
The default isC< |>.
=head2 /set url_csv_logging <bool>
Conditionally turns on or off CSV logging for the full logfile. The default
isC< OFF>.
=head2 /set url_csv_chan_logging <bool>
Conditionally turns on or off CSV logging of the channel logfiles. The default isC< OFF>.
=head2 /set url_time_format <string>
Specifies the time format that will be passed toI< strftime(3)> to produce an
ASCII representation of the time/date when an URL was grabbed. It will be used
in the logfiles. The default isC< %Y:%m:%d - %H:%M:%S>.
=head2 /set url_log_file_max_size <integer>
Defines the maximum size of the full logfile and the channel logfile. If it
reaches the specified maximum size in bytes it will be simply resized to zero.
The default isC< 30*1024> bytes.
=head2 /set url_log_file_autoreload_time <integer>
Intervall in seconds used for the HTML logfile header. The logfile reloads
itself every N seconds. The default isC< 90> seconds.
=head2 /set url_db_basedir <path>
Specifies the database base directory where two database files and a lockfile
will be created. The default isF< $HOME/.irssi/urlplot>. You have to create
the directory by yourself.
=head2 /set url_db_cache_a_filename <relative-filename>
Defines the filename of the index URL database. The file will be created
relative toC< url_db_basedir>. The default isF< a_cache>.
=head2 /set url_db_cache_h_filename <relative-filename>
Defines the filename of the hash URL database. The file will be created
relative toC< url_db_basedir>. The default isF< h_cache>.
=head2 /set url_db_lock_filename <relative-filename>
Defines the filename of the lockfile used to lock all logfiles and the cache
databases. It will be created relative toC< url_db_basedir>. The default
isF< lockfile>.
=head2 /set url_policy_default <allow|deny>
Specifies the default policy that will be used to decide if logging ist
permitted for a certain nick or channel. This can be eitherC< allow>
orC< deny>. If you set this toC< deny> you will have to allow explicitly those
channels and nicks for which logging should be permitted. In contrast if you
set it to allow, you can deny logging for certain nicks and channels.
The keysC< url_policy_chans> andC< url_policy_nicks> control the allow, deny
behaviour depending onC< url_policy_default>. The default isC< allow> which
permits logging of all channels and nicks.
=head2 /set url_policy_chans <string>
Specifies those channels for whoom logging is permitted or denied. Multiple
channels may be specified by usingC< ,>C< ;>C< :> or a space to separate the
items.
=head2 /set url_policy_nicks <string>
SeeC< url_policy_chans> and replace the word channel by nick.
=head2 /set url_navigate <string>
ExecutesC< url_command> withC< url_navigate> as its argument. It can be used
to display the main logfile in your favourite webbrowser. Because you may pass
this command at anytime to your webbrowser it will not be passed to strftime.
Thus you can only specify a static file here.
=head1 AUTHOR
Marcus Geiger <bwolf@geekmind.org>
=cut
use integer;
use Irssi;
use POSIX qw(strftime);
use Fcntl qw(:DEFAULT :flock);
use DB_File;
# Regexps
sub URL_SCHEME_REGEX() { '(http|ftp|https|news|irc)' }
sub URL_GUESS_REGEX() { '(www|ftp)' }
sub URL_BASE_REGEX() { '[a-z0-9_\-+\\/:?%.&!~;,=\#<>]' }
# Other
sub BACKWARD_SEEK_BYTES() { 130 }
sub LOG_FILE_MARKER() { '<!-- bottom-line -->' }
# Keys for settings
sub KEY_URL_COMMAND() { 'url_command' }
sub KEY_URL_CACHE_MAX() { 'url_cache_max' }
sub KEY_URL_LOG_BASEDIR() { 'url_log_basedir' }
sub KEY_URL_LOG_FILE_NAME() { 'url_log_file_name' }
sub KEY_URL_CHAN_PREFIX() { 'url_chan_prefix' }
sub KEY_URL_CHAN_LOGGING() { 'url_chan_logging' }
sub KEY_URL_LOG_CSV_FILE_NAME() { 'url_log_csv_file_name' }
sub KEY_URL_LOG_CSV_FILE_MAX_SIZE() { 'url_log_csv_file_max_size' }
sub KEY_URL_LOG_CSV_SEPARATOR() { 'url_log_csv_separator' }
sub KEY_URL_CSV_LOGGING() { 'url_csv_logging' }
sub KEY_URL_CSV_CHAN_LOGGING() { 'url_csv_chan_logging' }
sub KEY_URL_TIME_FORMAT() { 'url_time_format' }
sub KEY_URL_LOG_FILE_MAX_SIZE() { 'url_log_file_max_size' }
sub KEY_URL_LOG_FILE_AUTORELOAD_TIME() { 'url_log_file_autoreload_time' }
sub KEY_URL_DB_BASEDIR() { 'url_db_basedir' }
sub KEY_URL_DB_CACHE_A_FILENAME() { 'url_db_cache_a_filename' }
sub KEY_URL_DB_CACHE_H_FILENAME() { 'url_db_cache_h_filename' }
sub KEY_URL_DB_LOCK_FILENAME() { 'url_db_lock_filename' }
sub KEY_URL_POLICY_DEFAULT() { 'url_policy_default' }
sub KEY_URL_POLICY_CHANS() { 'url_policy_chans' }
sub KEY_URL_POLICY_NICKS() { 'url_policy_nicks' }
sub KEY_URL_NAVIGATE() { 'url_navigate' }
# Defaults
sub DEF_URL_COMMAND() {
'sensible-browser __URL__ > /dev/null 2>&1' }
sub DEF_URL_CACHE_MAX() { 90 }
sub DEF_URL_LOG_FILE_AUTORELOAD_TIME() { 120 }
sub DEF_URL_TIME_FORMAT() { '%Y:%m:%d - %H:%M:%S' }
sub DEF_URL_DO_FILE_RESIZE() { '0' }
sub DEF_URL_LOG_FILE_MAX_SIZE() { 1024 * 30 }
sub DEF_URL_LOG_BASEDIR() { '.irssi/urlplot/urls/' }
sub DEF_URL_LOG_FILE_NAME() { 'ircurls.html' }
sub DEF_URL_CHAN_PREFIX() { 'chan_' }
sub DEF_URL_CHAN_LOGGING() { '1' }
sub DEF_URL_LOG_CSV_FILE_NAME() { 'ircurls.csv' }
sub DEF_URL_LOG_CSV_FILE_MAX_SIZE() { 1024 * 30 }
sub DEF_URL_LOG_CSV_SEPARATOR() { '|' }
sub DEF_URL_CSV_LOGGING() { '' }
sub DEF_URL_CSV_CHAN_LOGGING() { '' }
sub DEF_URL_DB_BASEDIR() { '.irssi/urlplot/' }
sub DEF_URL_DB_CACHE_A_FILENAME() { 'a_cache' }
sub DEF_URL_DB_CACHE_H_FILENAME() { 'h_cache' }
sub DEF_URL_DB_LOCK_FILENAME() { 'lockfile' }
sub DEF_URL_POLICY_DEFAULT() { 'allow' }
sub DEF_URL_POLICY_CHANS() { '' }
sub DEF_URL_POLICY_NICKS() { '' }
sub DEF_URL_NAVIGATE() { '.irssi/urlplot/urls/ircurls.html' }
sub print_full_log_file_template {
my ($fh, $reload) = @_;
print $fh <<EOT;
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>IRC-URLs</title>
<meta http-equiv="cache-control" content="no-cache" />
<meta http-equiv="refresh" content="$reload;" />
<style type="text/css">
<!--
.small { font-size: small; }
.xsmall { font-size: x-small; }
-->
</style>
</head>
<body>
<h1>IRC-URLs</h1>
<p class="xsmall">
Visit <a href="http://www.geekmind.net">geekmind.net</a>
</p>
<p>This page reloads itself every $reload seconds.</p>
<p>
<a name="top" />
<a class="small" href="#bottom">Page bottom</a>
<br />
<br />
</p>
<table rules="rows" frame="void" width="100%" cellpadding="5">
<tr align="left">
<th><b>Date/Time</b></th>
<th><b>Nick</b></th>
<th><b>Channel/Nick</b></th>
<th><b>URL</b></th>
</tr>
EOT
}
sub print_chan_log_file_template {
my ($fh, $reload, $channel, $full_log) = @_;
print $fh <<EOT;
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>IRC-URLs of $channel</title>
<meta http-equiv="cache-control" content="no-cache" />
<meta http-equiv="refresh" content="$reload;" />
<style type="text/css">
<!--
.small { font-size: small; }
.xsmall { font-size: x-small; }
-->
</style>
</head>
<body>
<h1>IRC-URLs of $channel</h1>
<p class="xsmall">
Visit <a href="http://www.geekmind.net">geekmind.net</a>
</p>
<p>This page reloads itself every $reload seconds.</p>
<p><a href="$full_log">Complete</a> listing.</p>
<p>
<a name="top" />
<a class="small" href="#bottom">Page bottom</a>
<br />
<br />
</p>
<table rules="rows" frame="void" width="100%" cellpadding="5">
<tr align="left">
<th><b>Date/Time</b></th>
<th><b>Nick</b></th>
<th><b>URL</b></th>
</tr>
EOT
}
sub LOG_FILE_TAIL () {
return <<"EOT";
@{[ LOG_FILE_MARKER ]}
</table>
<p>
<a class="small" href="#top">Page top</a>
<a name="bottom" />
</p>
</body>
</html>
EOT
}
sub print_chan_log_file_entry {
my ($fh, $date, $nick, $channel, $url) = @_;
print $fh <<EOURL;
<tr>
<td>$date</td>
<td><em>$nick</em></td>
<td><a href=\"$url\">$url</a></td>
</tr>
EOURL
print $fh LOG_FILE_TAIL;
};
sub print_full_log_file_entry {
my ($fh, $date, $nick, $channel, $chan_log, $url) = @_;
print $fh <<EOURL;
<tr>
<td>$date</td>
<td><em>$nick</em></td>
<td><a href="$chan_log">$channel</a></td>
<td><a href=\"$url\">$url</a></td>
</tr>
EOURL
print $fh LOG_FILE_TAIL;
}
sub p_error { # Error printing (directly to the current window)
Irssi::print("urlplot: @_");
}
sub p_normal { # Normal printing (to the msg window)
Irssi::print("@_", MSGLEVEL_MSGS+MSGLEVEL_NOHILIGHT);
}
sub scan_url {
my $rawtext = shift;
return $1 if $rawtext =~ m|(@{[ URL_SCHEME_REGEX ]}://@{[ URL_BASE_REGEX ]}+)|io;
# The URL misses a scheme, try to be smart
if ($rawtext =~ m|@{[ URL_GUESS_REGEX ]}\.@{[ URL_BASE_REGEX ]}+|io) {
my $preserve = $&;
return "http://$preserve" if $1 =~ /^www/;
return "ftp://$preserve" if $1 =~ /^ftp/;
}
return undef;
}
sub aquire_lock {
my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR)
|| die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}";
my $lockfile = Irssi::settings_get_str(KEY_URL_DB_LOCK_FILENAME)
|| die "missing setting for @{[ KEY_URL_DB_LOCK_FILENAME ]}";
local *LOCK_F;
my $fh;
$db_base .= '/' if $db_base !~ m#/$#;
$lockfile = "${db_base}${lockfile}";
die "directory $db_base doesn't exist or isn't readable"
unless -d $db_base and -r $db_base;
sysopen(LOCK_F, $lockfile, O_RDONLY | O_CREAT)
|| die "can't open/create lockfile $lockfile: $!";
flock(LOCK_F, LOCK_EX | LOCK_NB)
|| die "can't exclusively lock $lockfile: $!";
# Can't pass back localized typeglob reference
$fh = *LOCK_F;
return $fh;
}
sub open_caches {
my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR)
|| die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}";
my $dbfile_a = Irssi::settings_get_str(KEY_URL_DB_CACHE_A_FILENAME)
|| die "missing setting for @{[ KEY_URL_DB_CACHE_A_FILENAME ]}";
my $dbfile_h = Irssi::settings_get_str(KEY_URL_DB_CACHE_H_FILENAME)
|| die "missing setting for @{[ KEY_URL_DB_CACHE_H_FILENAME ]}";
my (@cache, %cache);
$db_base .= '/' if $db_base !~ m#/$#;
$dbfile_a = "${db_base}${dbfile_a}";
$dbfile_h = "${db_base}${dbfile_h}";
die "directory $db_base doesn't exist or isn't readable"
unless -d $db_base and -r $db_base;
tie @cache, 'DB_File', $dbfile_a, O_RDWR | O_CREAT, 0666, $DB_RECNO
or die "can't tie urlcache db $dbfile_a: $!";
tie %cache, 'DB_File', $dbfile_h, O_RDWR | O_CREAT, 0666
or die "can't tie urlcache db $dbfile_h: $!";
return \(@cache, %cache);
}
sub create_chan_template {
my ($full_log, $file, $channel) = @_;
my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME);
local *FH;
open(FH, "> $file")
|| die "can't create logfile $file: $!";
print_chan_log_file_template(\*FH, $reload, $channel, $full_log);
print FH LOG_FILE_TAIL;
close(FH);
}
sub create_full_template {
my $file = shift;
my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME);
local *FH;
open(FH, "> $file")
|| die "can't create logfile $file: $!";
print_full_log_file_template(\*FH, $reload);
print FH LOG_FILE_TAIL;
close(FH);
}
sub create_csv_file {
my $file = shift;
open(FH, "> $file")
|| die "can't create $file: $!";
close FH;
}
sub log_csv {
my $csv_log = shift;
my $sep = Irssi::settings_get_str(KEY_URL_LOG_CSV_SEPARATOR);
my $fields = join $sep, @_;
local *FH;
open(FH, ">> $csv_log")
|| die "can't open $csv_log: $!";
print FH "$fields\n";
close FH;
}
sub position_log_file {
my $file = shift;
my ($fh, $pos, $buf, @lines, $off, $got_it);
local *FH;
my $hint = "Conside manual removal of this file";
sysopen(FH, $file, O_RDWR)
|| die "can't open $file: $!";
$pos = sysseek(FH, 0, 2)
|| die "can't seek to EOF in $file. ${hint}: $!";
$pos -= BACKWARD_SEEK_BYTES;
sysseek(FH, $pos, 0)
|| die "can't seek backwards to $pos in $file. ${hint}: $!";
sysread(FH, $buf, 2048)
|| die "can't read rest of $file. ${hint}: $!";
$off = 0;
@lines = split /\n/, $buf;
for (@lines) {
$off += length;
$off += 1;
chomp;
next if /^$/;
if (/@{[ LOG_FILE_MARKER ]}/io) {
$got_it = 1;
$off -= length;
$off -= 1;
last;
}
}
die "Can't locate @{[ LOG_FILE_MARKER ]} in $file. ${hint}"
unless $got_it;
$pos += $off;
sysseek(FH, $pos, 0)
|| die "Can't seek to $pos in $file. ${hint}: $!";
# Can't pass back localized typeglob reference
$fh = *FH;
return $fh;
}
sub log_url {
my ($nick, $channel, $url) = @_;
my $log_base = Irssi::settings_get_str(KEY_URL_LOG_BASEDIR)
|| die "missing setting for @{[ KEY_URL_LOG_BASEDIR ]}";
my $fullfile = Irssi::settings_get_str(KEY_URL_LOG_FILE_NAME)
|| die "missing setting for @{[ KEY_URL_LOG_FILE_NAME ]}";
my $csvfile = Irssi::settings_get_str(KEY_URL_LOG_CSV_FILE_NAME)
|| die "missing setting for @{[ KEY_URL_LOG_CSV_FILE_NAME ]}";
my $csv_max = Irssi::settings_get_int(KEY_URL_LOG_CSV_FILE_MAX_SIZE);
my $csv_logging = Irssi::settings_get_bool(KEY_URL_CSV_LOGGING);
my $csv_chan_logging = Irssi::settings_get_bool(KEY_URL_CSV_CHAN_LOGGING);
my $time_fmt = Irssi::settings_get_str(KEY_URL_TIME_FORMAT)
|| die "missing setting for @{[ KEY_URL_TIME_FORMAT ]}";
my $max = Irssi::settings_get_int(KEY_URL_LOG_FILE_MAX_SIZE);
my $chan_prefix = Irssi::settings_get_str(KEY_URL_CHAN_PREFIX)
|| die "missing setting for @{[ KEY_URL_CHAN_PREFIX ]}";
my $chan_logging = Irssi::settings_get_bool(KEY_URL_CHAN_LOGGING);
my @curr_time = localtime(time());
$log_base .= '/' if $log_base !~ m#/$#;
die "directory $log_base doesn't exist or isn't readable"
unless -d $log_base and -r $log_base;
# Make channel filename
my $tmp = POSIX::strftime($chan_prefix, @curr_time);
my $chan_fname = lc $channel;
$chan_fname =~ s/^#/$tmp/;
my $chan_log = "${log_base}${chan_fname}.html";
# Make full filename
$tmp = POSIX::strftime($fullfile, @curr_time);
my $full_fname = $tmp;
my $full_log = $log_base . $tmp;
# Replace spaces in date string to show up as ' ' to prevent line
# breaks.
my $date = POSIX::strftime($time_fmt, @curr_time);
my $html_date = $date;
$html_date =~ s/ /\ /g;
my $fh;
# Channel logging
if ($chan_logging) {
create_chan_template $full_fname, $chan_log, $channel
if not -r $chan_log or ($max > 0 and (stat($chan_log))[7] > $max);
$fh = undef;
$fh = position_log_file $chan_log;
print_chan_log_file_entry($fh, $html_date, $nick, $channel, $url);
close $fh;
}
# Full logging
create_full_template $full_log
if not -r $full_log or ($max > 0 and (stat($full_log))[7] > $max);
$fh = undef;
$fh = position_log_file $full_log;
print_full_log_file_entry($fh, $html_date, $nick, $channel,
"${chan_fname}.html", $url);
close $fh;
# CSV logging
if ($csv_logging) {
$tmp = POSIX::strftime($csvfile, @curr_time);
my $log = $log_base . $tmp;
create_csv_file $log
if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max);
log_csv($log, $date, $nick, $channel, $url);
}
# CSV channel logging
if ($csv_chan_logging) {
my $log = "${log_base}${chan_fname}.csv";
create_csv_file $log
if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max);
log_csv($log, $date, $nick, $channel, $url);
}
}
sub mk_home($) {
my $arg = shift;
return "$ENV{HOME}/$arg";
}
sub logging_permited {
my ($nick, $chan_or_nick) = @_;
my $default_policy = Irssi::settings_get_str(KEY_URL_POLICY_DEFAULT)
|| die "missing setting for @{[ KEY_URL_POLICY_DEFAULT ]}";
my $chans = Irssi::settings_get_str(KEY_URL_POLICY_CHANS);
my $nicks = Irssi::settings_get_str(KEY_URL_POLICY_NICKS);
my @policy_chans = split /[,;: ]/, $chans;
my @policy_nicks = split /[,;: ]/, $nicks;
my $permit;
if ($default_policy eq 'deny') {
# logging must be explicitly permited
$permit = 0;
for (@policy_chans) {
return 1 if $_ eq $chan_or_nick;
}
for (@policy_nicks) {
return 1 if $_ eq $nick;
}
} elsif ($default_policy eq 'allow') {
# logging must be explicitly denied
$permit = 1;
for (@policy_chans) {
return 0 if $_ eq $chan_or_nick;
}
for (@policy_nicks) {
return 0 if $_ eq $nick;
}
} else {
p_error("setting @{[ KEY_URL_POLICY_DEFAULT ]} can be either " .
"'allow' or 'deny'");
return undef;
}
return $permit;
}
sub do_locked {
my $f = shift or die "missing function argument " . caller;
my $lockf;
eval { $lockf = aquire_lock };
if ($@) {
p_error("$@");
return;
}
eval { $f->(@_) };
p_error("$@") if $@;
eval { close $lockf };
}
sub do_with_caches {
my $f = shift or die "missing function argument " . caller;
my ($cache_a, $cache_h) = ();
eval { ($cache_a, $cache_h) = open_caches };
if ($@) {
p_error("$@");
eval { untie %$cache_h } if defined $cache_h;
eval { untie @$cache_a } if defined $cache_a;
return;
}
eval { $f->($cache_a, $cache_h, @_) };
p_error("$@") if $@;
eval { untie %$cache_h };
eval { untie @$cache_a };
}
sub url_msg_log {
my ($cache_a, $cache_h, $nick, $chan_or_nick, $url) = @_;
my ($cache_size, $tmp);
my $max_cache = Irssi::settings_get_int(KEY_URL_CACHE_MAX);
unless (exists $cache_h->{$url}) {
$cache_size = scalar(@$cache_a) + 1;
$cache_h->{$url} = '1';
# push the URL to the end of the file seems to work better on
# some systems in contrast to unshift.
push @$cache_a, $url;
if ($max_cache > 0 && $cache_size > $max_cache) {
$tmp = shift @$cache_a;
delete $cache_h->{$tmp};
}
log_url($nick, $chan_or_nick, $url);
}
}
sub url_topic {
my ($server, $channel, $topic, $nick, $hostmask) = @_;
url_message($server, $topic, $nick, $hostmask, $channel);
}
sub url_message {
my ($server, $rawtext, $nick, $hostmask, $channel) = @_;
my ($url, $permit, $chan_or_nick);
if (defined($url = scan_url($rawtext))) {
$chan_or_nick = defined $channel ? $channel : $server->{nick};
if (defined($permit = logging_permited($nick, $chan_or_nick)) && $permit) {
do_locked(\&do_with_caches, \&url_msg_log, $nick, $chan_or_nick, $url);
}
}
}
sub url_cmd_show {
my ($cache_a, $cache_h) = @_;
my $n = 0;
p_normal("urlplot: total of " . scalar(@$cache_a) . " URLs");
foreach my $url (@$cache_a) {
p_normal(sprintf("%02d - %s", $n++, $url));
}
}
sub url_cmd_clearcaches {
my ($cache_a, $cache_h) = @_;
@$cache_a = ();
%$cache_h = ();
}
sub url_cmd_real_navigate {
my ($url) = @_;
die 'no URLs captured so far' unless $url;
my $url_cmd = Irssi::settings_get_str(KEY_URL_COMMAND)
|| die "missing setting for @{[ KEY_URL_COMMAND ]}";
unless ($url_cmd =~ s/__URL__/$url/g) {
die "setting url_cmd doesn't contain an URL placeholder '__URL__'";
}
system($url_cmd);
}
sub url_cmd_navigate {
my ($cache_a, $cache_h, $n) = @_;
my ($len, $url) = scalar @$cache_a;
unless (defined $n) {
$n = $len > 0 ? $len - 1 : $len;
}
die "no such URL; I've only $len" unless $n < $len;
$url = $cache_a->[$n];
die 'no URLs captured so far' unless $url;
url_cmd_real_navigate $url;
}
sub url_command {
my ($data, $server, $witem) = @_;
$_ = $data;
if (/^-list/) {
do_locked(\&do_with_caches, \&url_cmd_show);
} elsif (/^-clearcache/) {
do_locked(\&do_with_caches, \&url_cmd_clearcaches);
} elsif (/^-showlog/) {
my $nav_url = Irssi::settings_get_str(KEY_URL_NAVIGATE)
|| die "missing setting for @{[ KEY_URL_NAVIGATE ]}";
url_cmd_real_navigate $nav_url;
} else {
my $n;
if (/^(\d+)/) {
$n = $1;
if ($n < 0) {
p_error("argument must be a positive integer");
return;
}
} elsif (/^$/) {
$n = undef;
} else {
p_error("usage for /url [-list|-showlog|-clearcache|<digit>]");
return;
}
do_locked(\&do_with_caches, \&url_cmd_navigate, $n);
}
}
Irssi::signal_add_last('message public', 'url_message');
Irssi::signal_add_last('message private', 'url_message');
Irssi::signal_add_last('message topic', 'url_topic');
Irssi::command_bind('url', 'url_command');
Irssi::settings_add_str('misc', KEY_URL_COMMAND, DEF_URL_COMMAND);
Irssi::settings_add_int('misc', KEY_URL_CACHE_MAX, DEF_URL_CACHE_MAX);
Irssi::settings_add_str('misc', KEY_URL_LOG_BASEDIR, mk_home(DEF_URL_LOG_BASEDIR));
Irssi::settings_add_str('misc', KEY_URL_LOG_FILE_NAME, DEF_URL_LOG_FILE_NAME);
Irssi::settings_add_str('misc', KEY_URL_CHAN_PREFIX, DEF_URL_CHAN_PREFIX);
Irssi::settings_add_bool('misc', KEY_URL_CHAN_LOGGING, DEF_URL_CHAN_LOGGING);
Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_FILE_NAME, DEF_URL_LOG_CSV_FILE_NAME);
Irssi::settings_add_int('misc', KEY_URL_LOG_CSV_FILE_MAX_SIZE, DEF_URL_LOG_CSV_FILE_MAX_SIZE);
Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_SEPARATOR, DEF_URL_LOG_CSV_SEPARATOR);
Irssi::settings_add_bool('misc', KEY_URL_CSV_LOGGING, DEF_URL_CSV_LOGGING);
Irssi::settings_add_bool('misc', KEY_URL_CSV_CHAN_LOGGING, DEF_URL_CSV_CHAN_LOGGING);
Irssi::settings_add_str('misc', KEY_URL_TIME_FORMAT, DEF_URL_TIME_FORMAT);
Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_MAX_SIZE, DEF_URL_LOG_FILE_MAX_SIZE);
Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_AUTORELOAD_TIME,
DEF_URL_LOG_FILE_AUTORELOAD_TIME);
Irssi::settings_add_str('misc', KEY_URL_DB_BASEDIR, mk_home(DEF_URL_DB_BASEDIR));
Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_A_FILENAME, DEF_URL_DB_CACHE_A_FILENAME);
Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_H_FILENAME, DEF_URL_DB_CACHE_H_FILENAME);
Irssi::settings_add_str('misc', KEY_URL_DB_LOCK_FILENAME, DEF_URL_DB_LOCK_FILENAME);
Irssi::settings_add_str('misc', KEY_URL_POLICY_DEFAULT, DEF_URL_POLICY_DEFAULT);
Irssi::settings_add_str('misc', KEY_URL_POLICY_CHANS, DEF_URL_POLICY_CHANS);
Irssi::settings_add_str('misc', KEY_URL_POLICY_NICKS, DEF_URL_POLICY_NICKS);
Irssi::settings_add_str('misc', KEY_URL_NAVIGATE, 'file://' . mk_home(DEF_URL_NAVIGATE));
#
# $Log$
#
|