# -*- mode: perl; coding: utf-8 -*-
# keitairc/lib/plugins/33distinct
# based on 'Add link to google maps japan on address-like texts'
# by twk, from http://nonn-et-twk.net/twk/node/51
# 
# $Id: 33distinct,v 1.3 2008-12-31 13:59:52 morimoto Exp $
# $Source: /home/ishikawa/work/keitairc/tmp/keitairc/lib/plugins/33distinct,v $

# The line number (1 incremented) and filename below must be
# actual. see perlsyn.
# line 12 "keitairc/lib/plugins/32address"

use Encode;
use utf8;

my $tokyo_special_wards = '(?:千代田区|中央区|港区|新宿区|文京区|台東区|墨田区|江東区|品川区|目黒区|大田区|世田谷区|渋谷区|中野区|杉並区|豊島区|北区|荒川区|板橋区|練馬区|足立区|葛飾区|江戸川区)';
my $tokyo_cities = '(?:昭島市|あきる野市|稲城市|青梅市|清瀬市|国立市|小金井市|国分寺市|小平市|狛江市|立川市|多摩市|調布市|西東京市|八王子市|羽村市|東久留米市|東村山市|東大和市|日野市|府中市|福生市|町田市|三鷹市|武蔵野市|武蔵村山市)';

my $NO_WHITESPACE_ex = q/[^\s　:：()\[\]［］「」（）、。]/;
my $SHICHOSON_ex = '(?:' . $tokyo_special_wards . '|' . $tokyo_cities . ')';
my $AZA_ex = $NO_WHITESPACE_ex . '{1,20}';
my $BANCHI_ex = q/[\d０-９]/ . '{1,8}'; # 一二三四五六七八九十東西南北無ABC
my $BUILDING1_re = q/^[^\d０-９]+[\d０-９]+(?:(?:[-―－ー]|丁目|番地?|号)[\d０-９]+)+/;
my $BUILDING2_re = q/^(.*(?:丁目|番地?|号))[^地\d０-９]+(?:ビル|$)/;

my $re = {
	general =>
		'(?:' . $SHICHOSON_ex . ')' . '[ \t　]*' .
		'(?:' . $AZA_ex . '[ \t　]*' .
		  '(?:' . $BANCHI_ex . '(?:丁目|番地?|号)?[-―－ー]?)' . '{1,4}' .
		')',
		building1 => $BUILDING1_re,
		building2 => $BUILDING2_re,
};

$plugin = {
	name => 'distinct',
	message_replace_regexp => '(' . $re->{general} . ')',

	message_replace_imprementation => sub {
		my ($session_id, $src) = @_;
		my $s = $src;
		$s =~ s/.*($re->{general}).*/$1/;
		# remove building names since google maps rarely recognize them
		$s =~ s/($re->{building1}).*/$1/;
		$s =~ s/$re->{building2}.*/$1/;
		return sprintf('<a target="_self" class="inline" href="%s%s/address/%s">%s</a>',
			       $::cf->web_root(), $session_id,
			       URI::Escape::uri_escape_utf8($s), $src);
	},
};

1;
