001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang3.text.translate;
018
019 import java.io.IOException;
020 import java.io.Writer;
021
022 /**
023 * Translate XML numeric entities of the form &#[xX]?\d+; to
024 * the specific codepoint.
025 *
026 * @author Apache Software Foundation
027 * @since 3.0
028 * @version $Id: NumericEntityUnescaper.java 967237 2010-07-23 20:08:57Z mbenson $
029 */
030 public class NumericEntityUnescaper extends CharSequenceTranslator {
031
032 /**
033 * {@inheritDoc}
034 */
035 @Override
036 public int translate(CharSequence input, int index, Writer out) throws IOException {
037 // TODO: Protect from ArrayIndexOutOfBounds
038 if(input.charAt(index) == '&' && input.charAt(index + 1) == '#') {
039 int start = index + 2;
040 boolean isHex = false;
041
042 char firstChar = input.charAt(start);
043 if(firstChar == 'x' || firstChar == 'X') {
044 start++;
045 isHex = true;
046 }
047
048 int end = start;
049 while(input.charAt(end) != ';') {
050 end++;
051 }
052
053 int entityValue;
054 try {
055 if(isHex) {
056 entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 16);
057 } else {
058 entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10);
059 }
060 } catch(NumberFormatException nfe) {
061 return 0;
062 }
063
064 if(entityValue > 0xFFFF) {
065 char[] chrs = Character.toChars(entityValue);
066 out.write(chrs[0]);
067 out.write(chrs[1]);
068 } else {
069 out.write(entityValue);
070 }
071 return 2 + (end - start) + (isHex ? 1 : 0) + 1;
072 }
073 return 0;
074 }
075 }