/*
 * Copyright (C) 2010-2011 Mtzky.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *         http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.mtzky.lucene.filter;

import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.TokenStream;

/**
 * <p>
 * Normalizes lower case Kana to upper case.
 * </p>
 * <p>
 * This class converts the following:
 * </p>
 * <ol>
 * <li>The Hiragana range (U+3040 ... U+309F)</li>
 * <li>The Katakana range (U+30A0 ... U+30FF)</li>
 * <li>"Katakana Phonetic Extensions" range (U+31F0 ... U+31FF), which includes
 * some extra characters for writing the Ainu language.</li>
 * <li>The Halfwidth and Fullwidth Forms block (U+FF00 ... U+FFEF)</li>
 * </ol>
 * <p>
 * <strong>NOTE</strong>: NOT support Unicode version 6.0;
 * "Katakana letter archaic E" (U+1B000) and "Hiragana letter archaic YE"
 * (U+1B001) in the Kana Supplement block.
 * </p>
 * 
 * @author mtzky
 */
public class KanaUpperCaseFilter extends CharToCharMappingTokenFilter {

	private static final Map<Character, Character> MAP = new HashMap<Character, Character>();
	static {
		MAP.put('\u3041', 'あ');
		MAP.put('\u3043', 'い');
		MAP.put('\u3045', 'う');
		MAP.put('\u3047', 'え');
		MAP.put('\u3049', 'お');
		MAP.put('\u3063', 'つ');
		MAP.put('\u3083', 'や');
		MAP.put('\u3085', 'ゆ');
		MAP.put('\u3087', 'よ');
		MAP.put('\u308E', 'わ');
		MAP.put('\u3095', 'か');
		MAP.put('\u3096', 'け');

		MAP.put('\u30A1', 'ア');
		MAP.put('\u30A3', 'イ');
		MAP.put('\u30A5', 'ウ');
		MAP.put('\u30A7', 'エ');
		MAP.put('\u30A9', 'オ');
		MAP.put('\u30C3', 'ツ');
		MAP.put('\u30E3', 'ヤ');
		MAP.put('\u30E5', 'ユ');
		MAP.put('\u30E7', 'ヨ');
		MAP.put('\u30EE', 'ワ');
		MAP.put('\u30F5', 'カ');
		MAP.put('\u30F6', 'ケ');

		MAP.put('\u31F0', 'ク');
		MAP.put('\u31F1', 'シ');
		MAP.put('\u31F2', 'ス');
		MAP.put('\u31F3', 'ト');
		MAP.put('\u31F4', 'ヌ');
		MAP.put('\u31F5', 'ハ');
		MAP.put('\u31F6', 'ヒ');
		MAP.put('\u31F7', 'フ');
		MAP.put('\u31F8', 'ヘ');
		MAP.put('\u31F9', 'ホ');
		MAP.put('\u31FA', 'ム');
		MAP.put('\u31FB', 'ラ');
		MAP.put('\u31FC', 'リ');
		MAP.put('\u31FD', 'ル');
		MAP.put('\u31FE', 'レ');
		MAP.put('\u31FF', 'ロ');

		MAP.put('\uFF67', 'ｱ');
		MAP.put('\uFF68', 'ｲ');
		MAP.put('\uFF69', 'ｳ');
		MAP.put('\uFF6A', 'ｴ');
		MAP.put('\uFF6B', 'ｵ');
		MAP.put('\uFF6C', 'ﾔ');
		MAP.put('\uFF6D', 'ﾕ');
		MAP.put('\uFF6E', 'ﾖ');
		MAP.put('\uFF6F', 'ﾂ');
	}

	/**
	 * @param in
	 */
	public KanaUpperCaseFilter(final TokenStream in) {
		super(in);
	}

	@Override
	protected char convert(final char c) {
		final Character another = MAP.get(c);
		if (another == null) {
			return c;
		}
		return another;
	}

}
