| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
| TranslateFunction |
|
| 3.8333333333333335;3.833 |
| 1 | /* |
|
| 2 | * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/TranslateFunction.java,v 1.9 2005/06/28 13:44:45 elharo Exp $ |
|
| 3 | * $Revision: 1.9 $ |
|
| 4 | * $Date: 2005/06/28 13:44:45 $ |
|
| 5 | * |
|
| 6 | * ==================================================================== |
|
| 7 | * |
|
| 8 | * Copyright (C) 2000-2002 bob mcwhirter & James Strachan. |
|
| 9 | * All rights reserved. |
|
| 10 | * |
|
| 11 | * Redistribution and use in source and binary forms, with or without |
|
| 12 | * modification, are permitted provided that the following conditions |
|
| 13 | * are met: |
|
| 14 | * |
|
| 15 | * 1. Redistributions of source code must retain the above copyright |
|
| 16 | * notice, this list of conditions, and the following disclaimer. |
|
| 17 | * |
|
| 18 | * 2. Redistributions in binary form must reproduce the above copyright |
|
| 19 | * notice, this list of conditions, and the disclaimer that follows |
|
| 20 | * these conditions in the documentation and/or other materials |
|
| 21 | * provided with the distribution. |
|
| 22 | * |
|
| 23 | * 3. The name "Jaxen" must not be used to endorse or promote products |
|
| 24 | * derived from this software without prior written permission. For |
|
| 25 | * written permission, please contact license@jaxen.org. |
|
| 26 | * |
|
| 27 | * 4. Products derived from this software may not be called "Jaxen", nor |
|
| 28 | * may "Jaxen" appear in their name, without prior written permission |
|
| 29 | * from the Jaxen Project Management (pm@jaxen.org). |
|
| 30 | * |
|
| 31 | * In addition, we request (but do not require) that you include in the |
|
| 32 | * end-user documentation provided with the redistribution and/or in the |
|
| 33 | * software itself an acknowledgement equivalent to the following: |
|
| 34 | * "This product includes software developed by the |
|
| 35 | * Jaxen Project <http://www.jaxen.org/>." |
|
| 36 | * Alternatively, the acknowledgment may be graphical using the logos |
|
| 37 | * available at http://www.jaxen.org/ |
|
| 38 | * |
|
| 39 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
| 40 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
| 41 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
| 42 | * DISCLAIMED. IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT |
|
| 43 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
| 44 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
| 45 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
| 46 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
| 47 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
| 48 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
| 49 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
| 50 | * SUCH DAMAGE. |
|
| 51 | * |
|
| 52 | * ==================================================================== |
|
| 53 | * This software consists of voluntary contributions made by many |
|
| 54 | * individuals on behalf of the Jaxen Project and was originally |
|
| 55 | * created by bob mcwhirter <bob@werken.com> and |
|
| 56 | * James Strachan <jstrachan@apache.org>. For more information on the |
|
| 57 | * Jaxen Project, please see <http://www.jaxen.org/>. |
|
| 58 | * |
|
| 59 | * $Id: TranslateFunction.java,v 1.9 2005/06/28 13:44:45 elharo Exp $ |
|
| 60 | */ |
|
| 61 | ||
| 62 | ||
| 63 | package org.jaxen.function; |
|
| 64 | ||
| 65 | import java.util.HashMap; |
|
| 66 | import java.util.List; |
|
| 67 | import java.util.Map; |
|
| 68 | ||
| 69 | import org.jaxen.Context; |
|
| 70 | import org.jaxen.Function; |
|
| 71 | import org.jaxen.FunctionCallException; |
|
| 72 | import org.jaxen.Navigator; |
|
| 73 | ||
| 74 | /** |
|
| 75 | * <p> |
|
| 76 | * <b>4.2</b> |
|
| 77 | * <code><i>string</i> translate(<i>string</i>,<i>string</i>,<i>string</i>)</code> |
|
| 78 | * </p> |
|
| 79 | * |
|
| 80 | * <blockquote src="http://www.w3.org/TR/xpath#function-translate"> |
|
| 81 | * <p> |
|
| 82 | * The <b><a href="http://www.w3.org/TR/xpath#function-translate">translate</a></b> function |
|
| 83 | * returns the first argument string with occurrences of characters in |
|
| 84 | * the second argument string replaced by the character at the |
|
| 85 | * corresponding position in the third argument string. For example, |
|
| 86 | * <code>translate("bar","abc","ABC")</code> returns the string |
|
| 87 | * <code>BAr</code>. If there is a character in the second argument |
|
| 88 | * string with no character at a corresponding position in the third |
|
| 89 | * argument string (because the second argument string is longer than |
|
| 90 | * the third argument string), then occurrences of that character in the |
|
| 91 | * first argument string are removed. For example, |
|
| 92 | * <code>translate("--aaa--","abc-","ABC")</code> returns |
|
| 93 | * <code>"AAA"</code>. If a character occurs more than once in the |
|
| 94 | * second argument string, then the first occurrence determines the |
|
| 95 | * replacement character. If the third argument string is longer than |
|
| 96 | * the second argument string, then excess characters are ignored. |
|
| 97 | * </p> |
|
| 98 | * |
|
| 99 | * <blockquote> <b>NOTE: </b>The <b>translate</b> function is not a |
|
| 100 | * sufficient solution for case conversion in all languages. A future |
|
| 101 | * version of XPath may provide additional functions for case |
|
| 102 | * conversion.</blockquote> |
|
| 103 | * |
|
| 104 | * </blockquote> |
|
| 105 | * |
|
| 106 | * @author Jan Dvorak ( jan.dvorak @ mathan.cz ) |
|
| 107 | * |
|
| 108 | * @see <a href="http://www.w3.org/TR/xpath#function-translate" |
|
| 109 | * target="_top">Section 4.2 of the XPath Specification</a> |
|
| 110 | */ |
|
| 111 | public class TranslateFunction implements Function |
|
| 112 | { |
|
| 113 | ||
| 114 | /* The translation is done thru a HashMap. Performance tip (for anyone |
|
| 115 | * who needs to improve the performance of this particular function): |
|
| 116 | * Cache the HashMaps, once they are constructed. */ |
|
| 117 | ||
| 118 | /** |
|
| 119 | * Create a new <code>TranslateFunction</code> object. |
|
| 120 | */ |
|
| 121 | 294 | public TranslateFunction() {} |
| 122 | ||
| 123 | ||
| 124 | /** Returns a copy of the first argument in which |
|
| 125 | * characters found in the second argument are replaced by |
|
| 126 | * corresponding characters from the third argument. |
|
| 127 | * |
|
| 128 | * @param context the context at the point in the |
|
| 129 | * expression when the function is called |
|
| 130 | * @param args a list that contains exactly three items |
|
| 131 | * |
|
| 132 | * @return a <code>String</code> built from <code>args.get(0)</code> |
|
| 133 | * in which occurrences of characters in <code>args.get(1)</code> |
|
| 134 | * are replaced by the corresponding characters in <code>args.get(2)</code> |
|
| 135 | * |
|
| 136 | * @throws FunctionCallException if <code>args</code> does not have exactly three items |
|
| 137 | */ |
|
| 138 | public Object call(Context context, |
|
| 139 | List args) throws FunctionCallException |
|
| 140 | { |
|
| 141 | 306 | if (args.size() == 3) { |
| 142 | 300 | return evaluate( args.get(0), |
| 143 | args.get(1), |
|
| 144 | args.get(2), |
|
| 145 | context.getNavigator() ); |
|
| 146 | } |
|
| 147 | ||
| 148 | 6 | throw new FunctionCallException( "translate() requires three arguments." ); |
| 149 | } |
|
| 150 | ||
| 151 | /** |
|
| 152 | * Returns a copy of <code>strArg</code> in which |
|
| 153 | * characters found in <code>fromArg</code> are replaced by |
|
| 154 | * corresponding characters from <code>toArg</code>. |
|
| 155 | * If necessary each argument is first converted to it string-value |
|
| 156 | * as if by the XPath <code>string()</code> function. |
|
| 157 | * |
|
| 158 | * @param strArg the base string |
|
| 159 | * @param fromArg the characters to be replaced |
|
| 160 | * @param toArg the characters they will be replaced by |
|
| 161 | * @param nav the <code>Navigator</code> used to calculate the string-values of the arguments. |
|
| 162 | * |
|
| 163 | * @return a copy of <code>strArg</code> in which |
|
| 164 | * characters found in <code>fromArg</code> are replaced by |
|
| 165 | * corresponding characters from <code>toArg</code> |
|
| 166 | * |
|
| 167 | * @throws FunctionCallException if one of the arguments is a malformed Unicode string; |
|
| 168 | * that is, if surrogate characters don't line up properly |
|
| 169 | * |
|
| 170 | */ |
|
| 171 | public static String evaluate(Object strArg, |
|
| 172 | Object fromArg, |
|
| 173 | Object toArg, |
|
| 174 | Navigator nav) throws FunctionCallException |
|
| 175 | { |
|
| 176 | 300 | String inStr = StringFunction.evaluate( strArg, nav ); |
| 177 | 300 | String fromStr = StringFunction.evaluate( fromArg, nav ); |
| 178 | 300 | String toStr = StringFunction.evaluate( toArg, nav ); |
| 179 | ||
| 180 | // Initialize the mapping in a HashMap |
|
| 181 | 300 | Map characterMap = new HashMap(); |
| 182 | 300 | String[] fromCharacters = toUnicodeCharacters(fromStr); |
| 183 | 300 | String[] toCharacters = toUnicodeCharacters(toStr); |
| 184 | 288 | int fromLen = fromCharacters.length; |
| 185 | 288 | int toLen = toCharacters.length; |
| 186 | 1128 | for ( int i = 0; i < fromLen; i++ ) { |
| 187 | 840 | String cFrom = fromCharacters[i]; |
| 188 | 840 | if ( characterMap.containsKey( cFrom ) ) { |
| 189 | // We've seen the character before, ignore |
|
| 190 | 24 | continue; |
| 191 | } |
|
| 192 | ||
| 193 | 816 | if ( i < toLen ) { |
| 194 | // Will change |
|
| 195 | 672 | characterMap.put( cFrom, toCharacters[i] ); |
| 196 | } |
|
| 197 | else { |
|
| 198 | // Will delete |
|
| 199 | 144 | characterMap.put( cFrom, null ); |
| 200 | } |
|
| 201 | } |
|
| 202 | ||
| 203 | // Process the input string thru the map |
|
| 204 | 288 | StringBuffer outStr = new StringBuffer( inStr.length() ); |
| 205 | 288 | String[] inCharacters = toUnicodeCharacters(inStr); |
| 206 | 288 | int inLen = inCharacters.length; |
| 207 | 1314 | for ( int i = 0; i < inLen; i++ ) { |
| 208 | 1026 | String cIn = inCharacters[i]; |
| 209 | 1026 | if ( characterMap.containsKey( cIn ) ) { |
| 210 | 822 | String cTo = (String) characterMap.get( cIn ); |
| 211 | 822 | if ( cTo != null ) { |
| 212 | 678 | outStr.append( cTo ); |
| 213 | } |
|
| 214 | } |
|
| 215 | else { |
|
| 216 | 204 | outStr.append( cIn ); |
| 217 | } |
|
| 218 | } |
|
| 219 | ||
| 220 | 288 | return outStr.toString(); |
| 221 | } |
|
| 222 | ||
| 223 | private static String[] toUnicodeCharacters(String s) throws FunctionCallException { |
|
| 224 | ||
| 225 | 888 | String[] result = new String[s.length()]; |
| 226 | 888 | int stringLength = 0; |
| 227 | 3540 | for (int i = 0; i < s.length(); i++) { |
| 228 | 2664 | char c1 = s.charAt(i); |
| 229 | 2664 | if (isHighSurrogate(c1)) { |
| 230 | try { |
|
| 231 | 54 | char c2 = s.charAt(i+1); |
| 232 | 54 | if (isLowSurrogate(c2)) { |
| 233 | 42 | result[stringLength] = (c1 + "" + c2).intern(); |
| 234 | 42 | i++; |
| 235 | } |
|
| 236 | else { |
|
| 237 | 12 | throw new FunctionCallException("Mismatched surrogate pair in translate function"); |
| 238 | } |
|
| 239 | } |
|
| 240 | 0 | catch (StringIndexOutOfBoundsException ex) { |
| 241 | 0 | throw new FunctionCallException("High surrogate without low surrogate at end of string passed to translate function"); |
| 242 | 42 | } |
| 243 | } |
|
| 244 | else { |
|
| 245 | 2610 | result[stringLength]=String.valueOf(c1).intern(); |
| 246 | } |
|
| 247 | 2652 | stringLength++; |
| 248 | } |
|
| 249 | ||
| 250 | 876 | if (stringLength == result.length) return result; |
| 251 | ||
| 252 | // trim array |
|
| 253 | 42 | String[] trimmed = new String[stringLength]; |
| 254 | 42 | System.arraycopy(result, 0, trimmed, 0, stringLength); |
| 255 | 42 | return trimmed; |
| 256 | ||
| 257 | } |
|
| 258 | ||
| 259 | private static boolean isHighSurrogate(char c) { |
|
| 260 | 2664 | return c >= 0xD800 && c <= 0xDBFF; |
| 261 | } |
|
| 262 | ||
| 263 | private static boolean isLowSurrogate(char c) { |
|
| 264 | 54 | return c >= 0xDC00 && c <= 0xDFFF; |
| 265 | } |
|
| 266 | ||
| 267 | } |