001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang3.text;
018
019 import org.apache.commons.lang3.StringUtils;
020 import org.apache.commons.lang3.SystemUtils;
021
022 /**
023 * <p>Operations on Strings that contain words.</p>
024 *
025 * <p>This class tries to handle <code>null</code> input gracefully.
026 * An exception will not be thrown for a <code>null</code> input.
027 * Each method documents its behaviour in more detail.</p>
028 *
029 * @author Apache Software Foundation
030 * @author Apache Jakarta Velocity
031 * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
032 * @author Gary Gregory
033 * @since 2.0
034 * @version $Id: WordUtils.java 925967 2010-03-22 06:16:49Z bayard $
035 */
036 public class WordUtils {
037
038 /**
039 * <p><code>WordUtils</code> instances should NOT be constructed in
040 * standard programming. Instead, the class should be used as
041 * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
042 *
043 * <p>This constructor is public to permit tools that require a JavaBean
044 * instance to operate.</p>
045 */
046 public WordUtils() {
047 super();
048 }
049
050 // Wrapping
051 //--------------------------------------------------------------------------
052 /**
053 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
054 *
055 * <p>New lines will be separated by the system property line separator.
056 * Very long words, such as URLs will <i>not</i> be wrapped.</p>
057 *
058 * <p>Leading spaces on a new line are stripped.
059 * Trailing spaces are not stripped.</p>
060 *
061 * <pre>
062 * WordUtils.wrap(null, *) = null
063 * WordUtils.wrap("", *) = ""
064 * </pre>
065 *
066 * @param str the String to be word wrapped, may be null
067 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
068 * @return a line with newlines inserted, <code>null</code> if null input
069 */
070 public static String wrap(String str, int wrapLength) {
071 return wrap(str, wrapLength, null, false);
072 }
073
074 /**
075 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
076 *
077 * <p>Leading spaces on a new line are stripped.
078 * Trailing spaces are not stripped.</p>
079 *
080 * <pre>
081 * WordUtils.wrap(null, *, *, *) = null
082 * WordUtils.wrap("", *, *, *) = ""
083 * </pre>
084 *
085 * @param str the String to be word wrapped, may be null
086 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
087 * @param newLineStr the string to insert for a new line,
088 * <code>null</code> uses the system property line separator
089 * @param wrapLongWords true if long words (such as URLs) should be wrapped
090 * @return a line with newlines inserted, <code>null</code> if null input
091 */
092 public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
093 if (str == null) {
094 return null;
095 }
096 if (newLineStr == null) {
097 newLineStr = SystemUtils.LINE_SEPARATOR;
098 }
099 if (wrapLength < 1) {
100 wrapLength = 1;
101 }
102 int inputLineLength = str.length();
103 int offset = 0;
104 StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
105
106 while ((inputLineLength - offset) > wrapLength) {
107 if (str.charAt(offset) == ' ') {
108 offset++;
109 continue;
110 }
111 int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
112
113 if (spaceToWrapAt >= offset) {
114 // normal case
115 wrappedLine.append(str.substring(offset, spaceToWrapAt));
116 wrappedLine.append(newLineStr);
117 offset = spaceToWrapAt + 1;
118
119 } else {
120 // really long word or URL
121 if (wrapLongWords) {
122 // wrap really long word one line at a time
123 wrappedLine.append(str.substring(offset, wrapLength + offset));
124 wrappedLine.append(newLineStr);
125 offset += wrapLength;
126 } else {
127 // do not wrap really long word, just extend beyond limit
128 spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
129 if (spaceToWrapAt >= 0) {
130 wrappedLine.append(str.substring(offset, spaceToWrapAt));
131 wrappedLine.append(newLineStr);
132 offset = spaceToWrapAt + 1;
133 } else {
134 wrappedLine.append(str.substring(offset));
135 offset = inputLineLength;
136 }
137 }
138 }
139 }
140
141 // Whatever is left in line is short enough to just pass through
142 wrappedLine.append(str.substring(offset));
143
144 return wrappedLine.toString();
145 }
146
147 // Capitalizing
148 //-----------------------------------------------------------------------
149 /**
150 * <p>Capitalizes all the whitespace separated words in a String.
151 * Only the first letter of each word is changed. To convert the
152 * rest of each word to lowercase at the same time,
153 * use {@link #capitalizeFully(String)}.</p>
154 *
155 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
156 * A <code>null</code> input String returns <code>null</code>.
157 * Capitalization uses the unicode title case, normally equivalent to
158 * upper case.</p>
159 *
160 * <pre>
161 * WordUtils.capitalize(null) = null
162 * WordUtils.capitalize("") = ""
163 * WordUtils.capitalize("i am FINE") = "I Am FINE"
164 * </pre>
165 *
166 * @param str the String to capitalize, may be null
167 * @return capitalized String, <code>null</code> if null String input
168 * @see #uncapitalize(String)
169 * @see #capitalizeFully(String)
170 */
171 public static String capitalize(String str) {
172 return capitalize(str, null);
173 }
174
175 /**
176 * <p>Capitalizes all the delimiter separated words in a String.
177 * Only the first letter of each word is changed. To convert the
178 * rest of each word to lowercase at the same time,
179 * use {@link #capitalizeFully(String, char[])}.</p>
180 *
181 * <p>The delimiters represent a set of characters understood to separate words.
182 * The first string character and the first non-delimiter character after a
183 * delimiter will be capitalized. </p>
184 *
185 * <p>A <code>null</code> input String returns <code>null</code>.
186 * Capitalization uses the unicode title case, normally equivalent to
187 * upper case.</p>
188 *
189 * <pre>
190 * WordUtils.capitalize(null, *) = null
191 * WordUtils.capitalize("", *) = ""
192 * WordUtils.capitalize(*, new char[0]) = *
193 * WordUtils.capitalize("i am fine", null) = "I Am Fine"
194 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
195 * </pre>
196 *
197 * @param str the String to capitalize, may be null
198 * @param delimiters set of characters to determine capitalization, null means whitespace
199 * @return capitalized String, <code>null</code> if null String input
200 * @see #uncapitalize(String)
201 * @see #capitalizeFully(String)
202 * @since 2.1
203 */
204 public static String capitalize(String str, char... delimiters) {
205 int delimLen = (delimiters == null ? -1 : delimiters.length);
206 if (str == null || str.length() == 0 || delimLen == 0) {
207 return str;
208 }
209 int strLen = str.length();
210 StringBuilder buffer = new StringBuilder(strLen);
211 boolean capitalizeNext = true;
212 for (int i = 0; i < strLen; i++) {
213 char ch = str.charAt(i);
214
215 if (isDelimiter(ch, delimiters)) {
216 buffer.append(ch);
217 capitalizeNext = true;
218 } else if (capitalizeNext) {
219 buffer.append(Character.toTitleCase(ch));
220 capitalizeNext = false;
221 } else {
222 buffer.append(ch);
223 }
224 }
225 return buffer.toString();
226 }
227
228 //-----------------------------------------------------------------------
229 /**
230 * <p>Converts all the whitespace separated words in a String into capitalized words,
231 * that is each word is made up of a titlecase character and then a series of
232 * lowercase characters. </p>
233 *
234 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
235 * A <code>null</code> input String returns <code>null</code>.
236 * Capitalization uses the unicode title case, normally equivalent to
237 * upper case.</p>
238 *
239 * <pre>
240 * WordUtils.capitalizeFully(null) = null
241 * WordUtils.capitalizeFully("") = ""
242 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
243 * </pre>
244 *
245 * @param str the String to capitalize, may be null
246 * @return capitalized String, <code>null</code> if null String input
247 */
248 public static String capitalizeFully(String str) {
249 return capitalizeFully(str, null);
250 }
251
252 /**
253 * <p>Converts all the delimiter separated words in a String into capitalized words,
254 * that is each word is made up of a titlecase character and then a series of
255 * lowercase characters. </p>
256 *
257 * <p>The delimiters represent a set of characters understood to separate words.
258 * The first string character and the first non-delimiter character after a
259 * delimiter will be capitalized. </p>
260 *
261 * <p>A <code>null</code> input String returns <code>null</code>.
262 * Capitalization uses the unicode title case, normally equivalent to
263 * upper case.</p>
264 *
265 * <pre>
266 * WordUtils.capitalizeFully(null, *) = null
267 * WordUtils.capitalizeFully("", *) = ""
268 * WordUtils.capitalizeFully(*, null) = *
269 * WordUtils.capitalizeFully(*, new char[0]) = *
270 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
271 * </pre>
272 *
273 * @param str the String to capitalize, may be null
274 * @param delimiters set of characters to determine capitalization, null means whitespace
275 * @return capitalized String, <code>null</code> if null String input
276 * @since 2.1
277 */
278 public static String capitalizeFully(String str, char... delimiters) {
279 int delimLen = (delimiters == null ? -1 : delimiters.length);
280 if (str == null || str.length() == 0 || delimLen == 0) {
281 return str;
282 }
283 str = str.toLowerCase();
284 return capitalize(str, delimiters);
285 }
286
287 //-----------------------------------------------------------------------
288 /**
289 * <p>Uncapitalizes all the whitespace separated words in a String.
290 * Only the first letter of each word is changed.</p>
291 *
292 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
293 * A <code>null</code> input String returns <code>null</code>.</p>
294 *
295 * <pre>
296 * WordUtils.uncapitalize(null) = null
297 * WordUtils.uncapitalize("") = ""
298 * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
299 * </pre>
300 *
301 * @param str the String to uncapitalize, may be null
302 * @return uncapitalized String, <code>null</code> if null String input
303 * @see #capitalize(String)
304 */
305 public static String uncapitalize(String str) {
306 return uncapitalize(str, null);
307 }
308
309 /**
310 * <p>Uncapitalizes all the whitespace separated words in a String.
311 * Only the first letter of each word is changed.</p>
312 *
313 * <p>The delimiters represent a set of characters understood to separate words.
314 * The first string character and the first non-delimiter character after a
315 * delimiter will be uncapitalized. </p>
316 *
317 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
318 * A <code>null</code> input String returns <code>null</code>.</p>
319 *
320 * <pre>
321 * WordUtils.uncapitalize(null, *) = null
322 * WordUtils.uncapitalize("", *) = ""
323 * WordUtils.uncapitalize(*, null) = *
324 * WordUtils.uncapitalize(*, new char[0]) = *
325 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
326 * </pre>
327 *
328 * @param str the String to uncapitalize, may be null
329 * @param delimiters set of characters to determine uncapitalization, null means whitespace
330 * @return uncapitalized String, <code>null</code> if null String input
331 * @see #capitalize(String)
332 * @since 2.1
333 */
334 public static String uncapitalize(String str, char... delimiters) {
335 int delimLen = (delimiters == null ? -1 : delimiters.length);
336 if (str == null || str.length() == 0 || delimLen == 0) {
337 return str;
338 }
339 int strLen = str.length();
340 StringBuilder buffer = new StringBuilder(strLen);
341 boolean uncapitalizeNext = true;
342 for (int i = 0; i < strLen; i++) {
343 char ch = str.charAt(i);
344
345 if (isDelimiter(ch, delimiters)) {
346 buffer.append(ch);
347 uncapitalizeNext = true;
348 } else if (uncapitalizeNext) {
349 buffer.append(Character.toLowerCase(ch));
350 uncapitalizeNext = false;
351 } else {
352 buffer.append(ch);
353 }
354 }
355 return buffer.toString();
356 }
357
358 //-----------------------------------------------------------------------
359 /**
360 * <p>Swaps the case of a String using a word based algorithm.</p>
361 *
362 * <ul>
363 * <li>Upper case character converts to Lower case</li>
364 * <li>Title case character converts to Lower case</li>
365 * <li>Lower case character after Whitespace or at start converts to Title case</li>
366 * <li>Other Lower case character converts to Upper case</li>
367 * </ul>
368 *
369 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
370 * A <code>null</code> input String returns <code>null</code>.</p>
371 *
372 * <pre>
373 * StringUtils.swapCase(null) = null
374 * StringUtils.swapCase("") = ""
375 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
376 * </pre>
377 *
378 * @param str the String to swap case, may be null
379 * @return the changed String, <code>null</code> if null String input
380 */
381 public static String swapCase(String str) {
382 int strLen;
383 if (str == null || (strLen = str.length()) == 0) {
384 return str;
385 }
386 StringBuilder buffer = new StringBuilder(strLen);
387
388 boolean whitespace = true;
389 char ch = 0;
390 char tmp = 0;
391
392 for (int i = 0; i < strLen; i++) {
393 ch = str.charAt(i);
394 if (Character.isUpperCase(ch)) {
395 tmp = Character.toLowerCase(ch);
396 } else if (Character.isTitleCase(ch)) {
397 tmp = Character.toLowerCase(ch);
398 } else if (Character.isLowerCase(ch)) {
399 if (whitespace) {
400 tmp = Character.toTitleCase(ch);
401 } else {
402 tmp = Character.toUpperCase(ch);
403 }
404 } else {
405 tmp = ch;
406 }
407 buffer.append(tmp);
408 whitespace = Character.isWhitespace(ch);
409 }
410 return buffer.toString();
411 }
412
413 //-----------------------------------------------------------------------
414 /**
415 * <p>Extracts the initial letters from each word in the String.</p>
416 *
417 * <p>The first letter of the string and all first letters after
418 * whitespace are returned as a new string.
419 * Their case is not changed.</p>
420 *
421 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
422 * A <code>null</code> input String returns <code>null</code>.</p>
423 *
424 * <pre>
425 * WordUtils.initials(null) = null
426 * WordUtils.initials("") = ""
427 * WordUtils.initials("Ben John Lee") = "BJL"
428 * WordUtils.initials("Ben J.Lee") = "BJ"
429 * </pre>
430 *
431 * @param str the String to get initials from, may be null
432 * @return String of initial letters, <code>null</code> if null String input
433 * @see #initials(String,char[])
434 * @since 2.2
435 */
436 public static String initials(String str) {
437 return initials(str, null);
438 }
439
440 /**
441 * <p>Extracts the initial letters from each word in the String.</p>
442 *
443 * <p>The first letter of the string and all first letters after the
444 * defined delimiters are returned as a new string.
445 * Their case is not changed.</p>
446 *
447 * <p>If the delimiters array is null, then Whitespace is used.
448 * Whitespace is defined by {@link Character#isWhitespace(char)}.
449 * A <code>null</code> input String returns <code>null</code>.
450 * An empty delimiter array returns an empty String.</p>
451 *
452 * <pre>
453 * WordUtils.initials(null, *) = null
454 * WordUtils.initials("", *) = ""
455 * WordUtils.initials("Ben John Lee", null) = "BJL"
456 * WordUtils.initials("Ben J.Lee", null) = "BJ"
457 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
458 * WordUtils.initials(*, new char[0]) = ""
459 * </pre>
460 *
461 * @param str the String to get initials from, may be null
462 * @param delimiters set of characters to determine words, null means whitespace
463 * @return String of initial letters, <code>null</code> if null String input
464 * @see #initials(String)
465 * @since 2.2
466 */
467 public static String initials(String str, char... delimiters) {
468 if (str == null || str.length() == 0) {
469 return str;
470 }
471 if (delimiters != null && delimiters.length == 0) {
472 return "";
473 }
474 int strLen = str.length();
475 char[] buf = new char[strLen / 2 + 1];
476 int count = 0;
477 boolean lastWasGap = true;
478 for (int i = 0; i < strLen; i++) {
479 char ch = str.charAt(i);
480
481 if (isDelimiter(ch, delimiters)) {
482 lastWasGap = true;
483 } else if (lastWasGap) {
484 buf[count++] = ch;
485 lastWasGap = false;
486 } else {
487 // ignore ch
488 }
489 }
490 return new String(buf, 0, count);
491 }
492
493 //-----------------------------------------------------------------------
494 /**
495 * Is the character a delimiter.
496 *
497 * @param ch the character to check
498 * @param delimiters the delimiters
499 * @return true if it is a delimiter
500 */
501 private static boolean isDelimiter(char ch, char[] delimiters) {
502 if (delimiters == null) {
503 return Character.isWhitespace(ch);
504 }
505 for (int i = 0, isize = delimiters.length; i < isize; i++) {
506 if (ch == delimiters[i]) {
507 return true;
508 }
509 }
510 return false;
511 }
512
513 //-----------------------------------------------------------------------
514 /**
515 * Abbreviates a string nicely.
516 *
517 * This method searches for the first space after the lower limit and abbreviates
518 * the String there. It will also append any String passed as a parameter
519 * to the end of the String. The upper limit can be specified to forcibly
520 * abbreviate a String.
521 *
522 * @param str the string to be abbreviated. If null is passed, null is returned.
523 * If the empty String is passed, the empty string is returned.
524 * @param lower the lower limit.
525 * @param upper the upper limit; specify -1 if no limit is desired.
526 * If the upper limit is lower than the lower limit, it will be
527 * adjusted to be the same as the lower limit.
528 * @param appendToEnd String to be appended to the end of the abbreviated string.
529 * This is appended ONLY if the string was indeed abbreviated.
530 * The append does not count towards the lower or upper limits.
531 * @return the abbreviated String.
532 * @since 2.4
533 */
534 public static String abbreviate(String str, int lower, int upper, String appendToEnd) {
535 // initial parameter checks
536 if (str == null) {
537 return null;
538 }
539 if (str.length() == 0) {
540 return StringUtils.EMPTY;
541 }
542
543 // if the lower value is greater than the length of the string,
544 // set to the length of the string
545 if (lower > str.length()) {
546 lower = str.length();
547 }
548 // if the upper value is -1 (i.e. no limit) or is greater
549 // than the length of the string, set to the length of the string
550 if (upper == -1 || upper > str.length()) {
551 upper = str.length();
552 }
553 // if upper is less than lower, raise it to lower
554 if (upper < lower) {
555 upper = lower;
556 }
557
558 StringBuilder result = new StringBuilder();
559 int index = StringUtils.indexOf(str, " ", lower);
560 if (index == -1) {
561 result.append(str.substring(0, upper));
562 // only if abbreviation has occured do we append the appendToEnd value
563 if (upper != str.length()) {
564 result.append(StringUtils.defaultString(appendToEnd));
565 }
566 } else if (index > upper) {
567 result.append(str.substring(0, upper));
568 result.append(StringUtils.defaultString(appendToEnd));
569 } else {
570 result.append(str.substring(0, index));
571 result.append(StringUtils.defaultString(appendToEnd));
572 }
573 return result.toString();
574 }
575
576 }