| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
| SubstringFunction |
|
| 9.333333333333334;9.333 |
| 1 | /* |
|
| 2 | * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/SubstringFunction.java,v 1.15 2005/06/28 13:44:45 elharo Exp $ |
|
| 3 | * $Revision: 1.15 $ |
|
| 4 | * $Date: 2005/06/28 13:44:45 $ |
|
| 5 | * |
|
| 6 | * ==================================================================== |
|
| 7 | * |
|
| 8 | * Copyright (C) 2000-2002 bob mcwhirter & James Strachan. |
|
| 9 | * All rights reserved. |
|
| 10 | * |
|
| 11 | * Redistribution and use in source and binary forms, with or without |
|
| 12 | * modification, are permitted provided that the following conditions |
|
| 13 | * are met: |
|
| 14 | * |
|
| 15 | * 1. Redistributions of source code must retain the above copyright |
|
| 16 | * notice, this list of conditions, and the following disclaimer. |
|
| 17 | * |
|
| 18 | * 2. Redistributions in binary form must reproduce the above copyright |
|
| 19 | * notice, this list of conditions, and the disclaimer that follows |
|
| 20 | * these conditions in the documentation and/or other materials |
|
| 21 | * provided with the distribution. |
|
| 22 | * |
|
| 23 | * 3. The name "Jaxen" must not be used to endorse or promote products |
|
| 24 | * derived from this software without prior written permission. For |
|
| 25 | * written permission, please contact license@jaxen.org. |
|
| 26 | * |
|
| 27 | * 4. Products derived from this software may not be called "Jaxen", nor |
|
| 28 | * may "Jaxen" appear in their name, without prior written permission |
|
| 29 | * from the Jaxen Project Management (pm@jaxen.org). |
|
| 30 | * |
|
| 31 | * In addition, we request (but do not require) that you include in the |
|
| 32 | * end-user documentation provided with the redistribution and/or in the |
|
| 33 | * software itself an acknowledgement equivalent to the following: |
|
| 34 | * "This product includes software developed by the |
|
| 35 | * Jaxen Project <http://www.jaxen.org/>." |
|
| 36 | * Alternatively, the acknowledgment may be graphical using the logos |
|
| 37 | * available at http://www.jaxen.org/ |
|
| 38 | * |
|
| 39 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
| 40 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
| 41 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
| 42 | * DISCLAIMED. IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT |
|
| 43 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
| 44 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
| 45 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
| 46 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
| 47 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
| 48 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
| 49 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
| 50 | * SUCH DAMAGE. |
|
| 51 | * |
|
| 52 | * ==================================================================== |
|
| 53 | * This software consists of voluntary contributions made by many |
|
| 54 | * individuals on behalf of the Jaxen Project and was originally |
|
| 55 | * created by bob mcwhirter <bob@werken.com> and |
|
| 56 | * James Strachan <jstrachan@apache.org>. For more information on the |
|
| 57 | * Jaxen Project, please see <http://www.jaxen.org/>. |
|
| 58 | * |
|
| 59 | */ |
|
| 60 | package org.jaxen.function; |
|
| 61 | ||
| 62 | import java.util.List; |
|
| 63 | ||
| 64 | import org.jaxen.Context; |
|
| 65 | import org.jaxen.Function; |
|
| 66 | import org.jaxen.FunctionCallException; |
|
| 67 | import org.jaxen.Navigator; |
|
| 68 | /** |
|
| 69 | * <p> |
|
| 70 | * <b>4.2</b> |
|
| 71 | * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code> |
|
| 72 | * </p> |
|
| 73 | * |
|
| 74 | * <blockquote src="http://www.w3.org/TR/xpath"> |
|
| 75 | * <p>The <b>substring</b> function returns the |
|
| 76 | * substring of the first argument starting at the position specified in |
|
| 77 | * the second argument with length specified in the third argument. For |
|
| 78 | * example, |
|
| 79 | * |
|
| 80 | * <code>substring("12345",2,3)</code> returns <code>"234"</code>. |
|
| 81 | * If the third argument is not specified, it returns the substring |
|
| 82 | * starting at the position specified in the second argument and |
|
| 83 | * continuing to the end of the string. For example, |
|
| 84 | * <code>substring("12345",2)</code> returns <code>"2345"</code>. |
|
| 85 | * </p> |
|
| 86 | * |
|
| 87 | * <p> |
|
| 88 | * More precisely, each character in the string (see <a |
|
| 89 | * href="http://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a |
|
| 90 | * numeric position: the position of the first character is 1, the |
|
| 91 | * position of the second character is 2 and so on. |
|
| 92 | * </p> |
|
| 93 | * |
|
| 94 | * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in |
|
| 95 | * which the <code>String.substring</code> method treats the position |
|
| 96 | * of the first character as 0.</blockquote> |
|
| 97 | * |
|
| 98 | * <p> |
|
| 99 | * The returned substring contains those characters for which the |
|
| 100 | * position of the character is greater than or equal to the rounded |
|
| 101 | * value of the second argument and, if the third argument is specified, |
|
| 102 | * less than the sum of the rounded value of the second argument and the |
|
| 103 | * rounded value of the third argument; the comparisons and addition |
|
| 104 | * used for the above follow the standard IEEE 754 rules; rounding is |
|
| 105 | * done as if by a call to the <b><a href="#function-round">round</a></b> |
|
| 106 | * function. The following examples illustrate various unusual cases: |
|
| 107 | * </p> |
|
| 108 | * |
|
| 109 | * <ul> |
|
| 110 | * |
|
| 111 | * <li> |
|
| 112 | * <p> |
|
| 113 | * <code>substring("12345", 1.5, 2.6)</code> returns |
|
| 114 | * <code>"234"</code> |
|
| 115 | * </p> |
|
| 116 | * </li> |
|
| 117 | * |
|
| 118 | * <li> |
|
| 119 | * <p> |
|
| 120 | * <code>substring("12345", 0, 3)</code> returns <code>"12"</code> |
|
| 121 | * |
|
| 122 | * </p> |
|
| 123 | * </li> |
|
| 124 | * |
|
| 125 | * <li> |
|
| 126 | * <p> |
|
| 127 | * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code> |
|
| 128 | * </p> |
|
| 129 | * </li> |
|
| 130 | * |
|
| 131 | * <li> |
|
| 132 | * <p>. |
|
| 133 | * <code>substring("12345", 1, 0 div 0)</code> returns |
|
| 134 | * |
|
| 135 | * <code>""</code> |
|
| 136 | * </p> |
|
| 137 | * </li> |
|
| 138 | * |
|
| 139 | * <li> |
|
| 140 | * <p> |
|
| 141 | * <code>substring("12345", -42, 1 div 0)</code> returns |
|
| 142 | * <code>"12345"</code> |
|
| 143 | * </p> |
|
| 144 | * </li> |
|
| 145 | * |
|
| 146 | * <li> |
|
| 147 | * <p> |
|
| 148 | * |
|
| 149 | * <code>substring("12345", -1 div 0, 1 div 0)</code> returns |
|
| 150 | * <code>""</code> </blockquote> |
|
| 151 | * |
|
| 152 | * @author bob mcwhirter (bob @ werken.com) |
|
| 153 | * |
|
| 154 | * @see <a href="http://www.w3.org/TR/xpath#function-substring" |
|
| 155 | * target="_top">Section 4.2 of the XPath Specification</a> |
|
| 156 | */ |
|
| 157 | public class SubstringFunction implements Function |
|
| 158 | { |
|
| 159 | ||
| 160 | /** |
|
| 161 | * Create a new <code>SubstringFunction</code> object. |
|
| 162 | */ |
|
| 163 | 294 | public SubstringFunction() {} |
| 164 | ||
| 165 | ||
| 166 | /** Returns a substring of an XPath string-value by character index. |
|
| 167 | * |
|
| 168 | * @param context the context at the point in the |
|
| 169 | * expression when the function is called |
|
| 170 | * @param args a list that contains two or three items |
|
| 171 | * |
|
| 172 | * @return a <code>String</code> containing the specifed character subsequence of |
|
| 173 | * the original string or the string-value of the context node |
|
| 174 | * |
|
| 175 | * @throws FunctionCallException if <code>args</code> has more than three |
|
| 176 | * or less than two items |
|
| 177 | */ |
|
| 178 | public Object call(Context context, |
|
| 179 | List args) throws FunctionCallException |
|
| 180 | { |
|
| 181 | 312 | final int argc = args.size(); |
| 182 | 312 | if (argc < 2 || argc > 3){ |
| 183 | 12 | throw new FunctionCallException( "substring() requires two or three arguments." ); |
| 184 | } |
|
| 185 | ||
| 186 | 300 | final Navigator nav = context.getNavigator(); |
| 187 | ||
| 188 | 300 | final String str = StringFunction.evaluate(args.get(0), nav ); |
| 189 | // The spec doesn't really address this case |
|
| 190 | 300 | if (str == null) { |
| 191 | 0 | return ""; |
| 192 | } |
|
| 193 | ||
| 194 | 300 | final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue(); |
| 195 | ||
| 196 | 300 | if (stringLength == 0) { |
| 197 | 6 | return ""; |
| 198 | } |
|
| 199 | ||
| 200 | 294 | Double d1 = NumberFunction.evaluate(args.get(1), nav); |
| 201 | ||
| 202 | 294 | if (d1.isNaN()){ |
| 203 | 30 | return ""; |
| 204 | } |
|
| 205 | // Round the value and subtract 1 as Java strings are zero based |
|
| 206 | 264 | int start = RoundFunction.evaluate(d1, nav).intValue() - 1; |
| 207 | ||
| 208 | 264 | int substringLength = stringLength; |
| 209 | 264 | if (argc == 3){ |
| 210 | 222 | Double d2 = NumberFunction.evaluate(args.get(2), nav); |
| 211 | ||
| 212 | 222 | if (!d2.isNaN()){ |
| 213 | 192 | substringLength = RoundFunction.evaluate(d2, nav ).intValue(); |
| 214 | } |
|
| 215 | else { |
|
| 216 | 30 | substringLength = 0; |
| 217 | } |
|
| 218 | } |
|
| 219 | ||
| 220 | 264 | if (substringLength < 0) return ""; |
| 221 | ||
| 222 | 252 | int end = start + substringLength; |
| 223 | 252 | if (argc == 2) end = stringLength; |
| 224 | ||
| 225 | // negative start is treated as 0 |
|
| 226 | 252 | if ( start < 0){ |
| 227 | 72 | start = 0; |
| 228 | } |
|
| 229 | 180 | else if (start > stringLength){ |
| 230 | 30 | return ""; |
| 231 | } |
|
| 232 | ||
| 233 | 222 | if (end > stringLength){ |
| 234 | 66 | end = stringLength; |
| 235 | } |
|
| 236 | 156 | else if (end < start) return ""; |
| 237 | ||
| 238 | 216 | if (stringLength == str.length()) { |
| 239 | // easy case; no surrogate pairs |
|
| 240 | 198 | return str.substring(start, end); |
| 241 | } |
|
| 242 | else { |
|
| 243 | 18 | return unicodeSubstring(str, start, end); |
| 244 | } |
|
| 245 | ||
| 246 | } |
|
| 247 | ||
| 248 | private static String unicodeSubstring(String s, int start, int end) { |
|
| 249 | ||
| 250 | 18 | StringBuffer result = new StringBuffer(s.length()); |
| 251 | 66 | for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) { |
| 252 | 48 | char c = s.charAt(jChar); |
| 253 | 48 | if (uChar >= start) result.append(c); |
| 254 | 48 | if (c >= 0xD800) { // get the low surrogate |
| 255 | // ???? we could check here that this is indeed a low surroagte |
|
| 256 | // we could also catch StringIndexOutOfBoundsException |
|
| 257 | 18 | jChar++; |
| 258 | 18 | if (uChar >= start) result.append(s.charAt(jChar)); |
| 259 | } |
|
| 260 | } |
|
| 261 | 18 | return result.toString(); |
| 262 | } |
|
| 263 | } |