1 /*
2 * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/function/StringLengthFunction.java,v 1.10 2005/06/28 13:44:45 elharo Exp $
3 * $Revision: 1.10 $
4 * $Date: 2005/06/28 13:44:45 $
5 *
6 * ====================================================================
7 *
8 * Copyright (C) 2000-2002 bob mcwhirter & James Strachan.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions, and the following disclaimer.
17 *
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions, and the disclaimer that follows
20 * these conditions in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * 3. The name "Jaxen" must not be used to endorse or promote products
24 * derived from this software without prior written permission. For
25 * written permission, please contact license@jaxen.org.
26 *
27 * 4. Products derived from this software may not be called "Jaxen", nor
28 * may "Jaxen" appear in their name, without prior written permission
29 * from the Jaxen Project Management (pm@jaxen.org).
30 *
31 * In addition, we request (but do not require) that you include in the
32 * end-user documentation provided with the redistribution and/or in the
33 * software itself an acknowledgement equivalent to the following:
34 * "This product includes software developed by the
35 * Jaxen Project <http://www.jaxen.org/>."
36 * Alternatively, the acknowledgment may be graphical using the logos
37 * available at http://www.jaxen.org/
38 *
39 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42 * DISCLAIMED. IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT
43 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * ====================================================================
53 * This software consists of voluntary contributions made by many
54 * individuals on behalf of the Jaxen Project and was originally
55 * created by bob mcwhirter <bob@werken.com> and
56 * James Strachan <jstrachan@apache.org>. For more information on the
57 * Jaxen Project, please see <http://www.jaxen.org/>.
58 *
59 * $Id: StringLengthFunction.java,v 1.10 2005/06/28 13:44:45 elharo Exp $
60 */
61
62
63 package org.jaxen.function;
64
65 import java.util.List;
66
67 import org.jaxen.Context;
68 import org.jaxen.Function;
69 import org.jaxen.FunctionCallException;
70 import org.jaxen.Navigator;
71
72 /***
73 * <p><b>4.2</b> <code><i>number</i> string-length(<i>string</i>)</code></p>
74 *
75 * <p>
76 * The <b>string-length</b> function returns the number of <strong>Unicode characters</strong>
77 * in its argument. This is <strong>not</strong> necessarily
78 * the same as the number <strong>Java chars</strong>
79 * in the corresponding Java string. In particular, if the Java <code>String</code>
80 * contains surrogate pairs each such pair will be counted as only one character
81 * by this function. If the argument is omitted,
82 * it returns the length of the string-value of the context node.
83 * </p>
84 *
85 * @author bob mcwhirter (bob @ werken.com)
86 * @see <a href="http://www.w3.org/TR/xpath#function-string-length" target="_top">Section
87 * 4.2 of the XPath Specification</a>
88 */
89 public class StringLengthFunction implements Function
90 {
91
92
93 /***
94 * Create a new <code>StringLengthFunction</code> object.
95 */
96 public StringLengthFunction() {}
97
98
99 /***
100 * <p>
101 * Returns the number of Unicode characters in the string-value of the argument.
102 * </p>
103 *
104 * @param context the context at the point in the
105 * expression when the function is called
106 * @param args a list containing the item whose string-value is to be counted.
107 * If empty, the length of the context node's string-value is returned.
108 *
109 * @return a <code>Double</code> giving the number of Unicode characters
110 *
111 * @throws FunctionCallException if args has more than one item
112 */
113 public Object call(Context context,
114 List args) throws FunctionCallException
115 {
116 if (args.size() == 0)
117 {
118 return evaluate( context.getNodeSet(),
119 context.getNavigator() );
120 }
121 else if (args.size() == 1)
122 {
123 return evaluate( args.get(0),
124 context.getNavigator() );
125 }
126
127 throw new FunctionCallException( "string-length() requires one argument." );
128 }
129
130 /***
131 * <p>
132 * Returns the number of Unicode characters in the string-value of
133 * an object.
134 * </p>
135 *
136 * @param obj the object whose string-value is counted
137 * @param nav used to calculate the string-values of the first two arguments
138 *
139 * @return a <code>Double</code> giving the number of Unicode characters
140 *
141 * @throws FunctionCallException if the string contains mismatched surrogates
142 */
143 public static Double evaluate(Object obj, Navigator nav) throws FunctionCallException
144 {
145
146 // could/should I push the mismnatching checks into StringFunction.evaluate()????
147 String str = StringFunction.evaluate( obj, nav );
148 // String.length() counts UTF-16 code points; not Unicode characters
149 char[] data = str.toCharArray();
150 int length = 0;
151 for (int i = 0; i < data.length; i++) {
152 char c = data[i];
153 length++;
154 // if this is a high surrogate; assume the next character is
155 // is a low surrogate and skip it
156 if (c >= 0xD800) {
157 try {
158 char low = data[i+1];
159 if (low < 0xDC00 || low > 0xDFFF) {
160 throw new FunctionCallException("Bad surrogate pair in string " + str);
161 }
162 i++; // increment past low surrogate
163 }
164 catch (ArrayIndexOutOfBoundsException ex) {
165 throw new FunctionCallException("Bad surrogate pair in string " + str);
166 }
167 }
168 }
169 return new Double(length);
170 }
171
172 }