#!/usr/bin/perl

#
# WebEscorter version 1.2.0
#      for Ajax form client
#

# INCLUDE MODULES
use strict;
use warnings;
use CGI;
use Encode qw/from_to/;

# 設定
my $file =		'/var/log/apache2/access.log';	# Apacheのアクセスログのファイル名: Apacheのアクセスログのファイルパス
my $entry_limit =	'1000';				# 検索結果リミッター: 指定した件数以上の検索結果を表示しなくなる

# PREPARE 
$| = 1;
my $querystr=$ENV{QUERY_STRING};

print "Content-Type: text/plain; charset=UTF-8\n\n";

# GET QUERYS
my $q = new CGI;
my $addr = $q->param('addr');
my $date = $q->param('date');
my @switch = $q->param('disp');

# 表示オプション確認
my ($perfect_matching,$url_enabled,$word_enabled,$lookup_enabled,$debug_enabled);
foreach my $disp_sw (@switch) {
    if (defined $disp_sw and $disp_sw eq "0") {$perfect_matching = 1;}
    if (defined $disp_sw and $disp_sw eq "1") {$url_enabled = 1;}
    if (defined $disp_sw and $disp_sw eq "2") {$word_enabled = 1;}
    if (defined $disp_sw and $disp_sw eq "3") {$lookup_enabled = 1;}
    if (defined $disp_sw and $disp_sw eq "4") {$debug_enabled = 1;}
}

# アドレス絞り込みの前処理
if (length($addr) == 0) {
    $addr = ".+";
} else {
    if (defined $addr and $addr !~ /[\d\.]+/) {
    } else {
         $addr = (gethostbyname(substr($addr,0,64)))[4];
         $addr = sprintf("%u.%u.%u.%u", unpack("C*", $addr));
    }
}

# 日付絞り込みの前処理
my $datequery;
if (length($date) == 0) {
    $datequery = ".+";
} else {
    my ($first, $second, $third) = split(/\//, $date );

    if (defined $first and $first =~ /\d{4}/) {
    #     $first = substr($first,2,2);
    } else {
        &invalid_param("date");
    }
    if (defined $second and $second =~/\d{1,2}/) {
    my %hash = (
    1  => 'Jan',
    2  => 'Feb',
    3  => 'Mar',
    4  => 'Apr',
    5  => 'May',
    6  => 'Jun',
    7  => 'Jul',
    8  => 'Aug',
    9  => 'Sep',
    10 => 'Oct',
    11 => 'Nov',
    12 => 'Dec',
    );
    $second = $hash{$second};
    } else {
        &invalid_param("second");
    }
    if (defined $third and $third =~/\d{1,2}/) {
    } else {
        &invalid_param("third");
    }
    $datequery = $third.'/'.$second.'/'.$first;
}

# ANALYZE CODE
#-- file exists? --
if ( -e $file ){

    #-- file is readable? --
    if ( -r $file){
        open(LOG, "<$file") or die("Cannot open log file : ($!)");
           
            # SCAN URI LOOP
            require Parse::AccessLogEntry;
            my $p=Parse::AccessLogEntry::new();
            my %host_cache;
            my $entry_counter = 0;
            while(my $buff = <LOG>){
                chomp $buff;
                #-- 解析 --
                my $Hashref=$p->parse($buff);
                my $lhost = $Hashref->{host};
                my $ldate = $Hashref->{date}.' '.$Hashref->{time};
                unless ($datequery eq ".+") {
                    next if (defined $date and defined $ldate and not $Hashref->{date} eq "$datequery");
                }
                unless ($addr eq ".+") {
                    if ($perfect_matching == 1){
                        next if (defined $addr and defined $lhost and not $lhost eq "$addr");
                    } else {
                        next if (defined $addr and defined $lhost and $lhost !~ /${addr}/);
                    }
                }
                my $lurl = $Hashref->{file};
                my $lmethod = $Hashref->{rtype};
                #-- プロキシリクエスト以外は取り除く --
                if ($lurl =~ /^(?:s?https?|ftp):\/\/[-_.!~*'()a-zA-Z0-9;\/\?:\@&=+\$,%#]+/ or (defined $lmethod and $lmethod =~ /connect/i and $lurl =~ /[-_.!~*'()a-zA-Z0-9;\?:\@&=+\$,%#]+/)) {
                       if ($entry_counter >= $entry_limit) {
                           print "検索エントリ超過\t -- \t検索エントリが $entry_limit 件を超えたので検索が中止されました\n";
                           last;
                       }
                       # IP addr. to hostname 逆引き （おしゃれなキャッシュ機能つき）
                       if ($lhost =~ /^(?:\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:\d|[01]?\d\d|2[0-4]\d|25[0-5])$/ and $lookup_enabled == 1) {
                           my $yet_resolve = $lhost;
                           if (exists $host_cache{$yet_resolve}) {
                               $lhost = $host_cache{$yet_resolve};
                               $lhost .= " ($yet_resolve)";
                           } else {
                               eval {$lhost = gethostbyaddr(pack("C4", split(/\./, $yet_resolve)),2); };
                               $host_cache{$yet_resolve} = $lhost;
                               $lhost .= " ($yet_resolve)";
                           }
                       }
                        #---- 検索ワードスイッチがon ----
                        if ((defined $word_enabled and $word_enabled == 1) and $Hashref->{code} =~ /^2\d{2}/) {
                            my $http_status = "";
                            $http_status = ":$Hashref->{code}" if $debug_enabled == 1;
                            if ($lurl =~ /^http:\/\/[\w]+.google\.[\.\w]+\/search(.+)/i and $lurl =~ /qu?=/i) {
                                $entry_counter++;
                                # URLから検索クエリを抜き取る
                                my $temp = $lurl;
                                $temp =~ /qu?=(.+?)&/;
                                my $que = $1;
                                # URLのパーセントエンコードをデコードする
                                $que =~ tr/+\// /;
                                $que =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
                                # URLの文字コードを変換
                                $temp =~ /ie=([\w\d\-_]+)/;
                                if (defined $1 and $1 eq "utf-8") {

                                } else {
                                    my $enc = $1;
                                    eval {from_to($que, $enc, "utf-8");};
                                }
                                $que =~ tr/\t\n/ /;
                                print $ldate,"\t",$lhost,"\t[GOOGLE",$http_status,'] ',$que,"\n";
                            } elsif ($lurl =~ /^http:\/\/[\w-]+\.yahoo\.[\.\w]+\/search/i and $lurl =~ /p=/i) {
                                $entry_counter++;
                                # URLから検索クエリを抜き取る
                                my $temp = $lurl;
                                my $que = $1;
                                if ($temp =~ /p=(.+?)&/) {
                                    $que = $1; 
                                } else {
                                    $temp =~ /&p=(.+?)$/;
                                    $que = $1;
                                }
                                # URLのパーセントエンコードをデコードする
                                $que =~ tr/+/ /;
                                $que =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
                                # URLの文字コードを変換
                                $temp =~ /ei=([\w\d\-_]+)/;
                                if (defined $1 and $1 eq "utf-8") {

                                } else {
                                    my $enc = $1;
                                    eval {from_to($que, $enc, "utf-8");};
                                }
                                $que =~ tr/\t\n/ /;
                                print $ldate,"\t",$lhost,"\t[YAHOO",$http_status,'] ',$que,"\n";
                            } elsif ($lurl =~ /^http:\/\/search\.(?:msn|live)\.[\.\w]+\/results.aspx/i and $lurl =~ /q=/i) {
                               $entry_counter++;
                                my $temp = $lurl;
                                # URLから検索クエリを抜き取る
                                $temp =~ /q=(.+?)&/;
                                my $que = $1;
                                # URLのパーセントエンコードをデコードする
                                $que =~ tr/+/ /;
                                $que =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
                                $que =~ tr/\t\n/ /;
                                print $ldate,"\t",$lhost,"\t[MSN",$http_status,'] ',$que,"\n";
                            }
                        }
                        #---- URL表示スイッチがon ----
                        if (defined $url_enabled and $url_enabled == 1) {
                            $lmethod .= ":$Hashref->{code}" if $debug_enabled == 1;
                            if ($lmethod =~ /connect/i) {
                                $entry_counter++;
                                print $ldate,"\t",$lhost,"\t[",$lmethod,'] ',$lurl,"\n";
                            } else {
                                $entry_counter++;
                                print $ldate,"\t",$lhost,"\t[",$lmethod,'] ',$lurl,"\n";
                            }
                        }
                    
                }
            }
        close(LOG);
    } else {
        print 'Cannot read log file';
    }
      
} else {
  print 'Log file does not exist';
  }

# エラー表示ルーチン
sub invalid_param {
      print "Invalid parameter: $_[0]\n";
}


