001package org.unix4j.util;
002
003import java.util.ArrayList;
004import java.util.List;
005
006import org.unix4j.line.Line;
007import org.unix4j.line.SingleCharSequenceLine;
008
009/**
010 * Utility class with static methods for strings.
011 */
012public class StringUtil {
013        /**
014         * Returns the given {@code value} as a string of fixed length {@code size}
015         * padding or truncating the value if necessary.
016         * <p>
017         * If {@code left==true}, the given {@code value} is left-aligned appending
018         * the given {@code filler} character to make up the fixed length. If the
019         * given {@code value} turns out to be longer than {@code size} when
020         * transformed into a string, it is truncated from the right.
021         * <p>
022         * If {@code left==false}, the given {@code value} is right-aligned and
023         * {@code filler} characters are added from the left if necessary. If
024         * {@code value} is longer than {@code size} it is truncated from the left.
025         * <p>
026         * Examples with {@code size=3}:
027         * <ul>
028         * <li>left=true, filler=' ', value=89 --> "89 "</li>
029         * <li>left=true, filler=' ', value=1234 --> "123"</li>
030         * <li>left=false, filler=' ', value=89 --> " 89"</li>
031         * <li>left=false, filler='0', value=89 --> "089"</li>
032         * <li>left=false, filler=' ', value=1234 --> "234"</li>
033         * </ul>
034         * 
035         * @param size
036         *            the fixed size of the returned string
037         * @param alignLeft
038         *            true if {@code value} should be left-aligned
039         * @param filler
040         *            the filler character if {@code value} is shorter than
041         *            {@code size}
042         * @param value
043         *            the value to format
044         * @return the value as a fixed size string, padded or truncated if
045         *         necessary
046         */
047        public static final String fixSizeString(int size, boolean alignLeft,
048                        char filler, long value) {
049                return fixSizeString(size, alignLeft, filler, String.valueOf(value));
050        }
051
052        /**
053         * Returns the given string {@code s} into a string of fixed length
054         * {@code size} padding or truncating the string with spaces if necessary.
055         * <p>
056         * If {@code left==true}, the given string {@code s} is left-aligned
057         * appending spaces to make up the fixed length. If {@code s} turns out to
058         * be longer than {@code size} it is truncated from the right.
059         * <p>
060         * If {@code left==false}, the given string {@code s} is right-aligned and
061         * space characters are added from the left if necessary. If {@code s} is
062         * longer than {@code size} it is truncated from the left.
063         * <p>
064         * Examples with {@code size=3}:
065         * <ul>
066         * <li>left=true, s="XY" --> "XY "</li>
067         * <li>left=true, s="Abcd" --> "Abc"</li>
068         * <li>left=false, s="XY" --> " XY"</li>
069         * <li>left=false, s="Abcd" --> "bcd"</li>
070         * </ul>
071         * 
072         * @param size
073         *            the fixed size of the returned string
074         * @param alignLeft
075         *            true if {@code value} should be left-aligned
076         * @param s
077         *            the string to format
078         * @return the string {@code s} as a fixed size string, padded or truncated
079         *         if necessary
080         */
081        public static final String fixSizeString(int size, boolean alignLeft,
082                        String s) {
083                return fixSizeString(size, alignLeft, ' ', s);
084        }
085
086        /**
087         * Returns the given string {@code s} into a string of fixed length
088         * {@code size} padding or truncating the string if necessary.
089         * <p>
090         * If {@code left==true}, the given string {@code s} is left-aligned
091         * appending the given {@code filler} character to make up the fixed length.
092         * If {@code s} turns out to be longer than {@code size} it is truncated
093         * from the right.
094         * <p>
095         * If {@code left==false}, the given string {@code s} is right-aligned and
096         * {@code filler} characters are added from the left if necessary. If
097         * {@code s} is longer than {@code size} it is truncated from the left.
098         * <p>
099         * Examples with {@code size=3}:
100         * <ul>
101         * <li>left=true, filler=' ', s="XY" --> "XY "</li>
102         * <li>left=true, filler=' ', s="Abcd" --> "Abc"</li>
103         * <li>left=false, filler=' ', s="XY" --> " XY"</li>
104         * <li>left=false, filler='0', s="12" --> "012"</li>
105         * <li>left=false, filler=' ', s="Abcd" --> "bcd"</li>
106         * </ul>
107         * 
108         * @param size
109         *            the fixed size of the returned string
110         * @param alignLeft
111         *            true if {@code value} should be left-aligned
112         * @param filler
113         *            the filler character if {@code s} is shorter than {@code size}
114         * @param s
115         *            the string to format
116         * @return the string {@code s} as a fixed size string, padded or truncated
117         *         if necessary
118         */
119        public static final String fixSizeString(int size, boolean alignLeft,
120                        char filler, String s) {
121                if (s.length() < size) {
122                        final StringBuilder sb = new StringBuilder(size);
123                        if (alignLeft)
124                                sb.append(s);
125                        for (int i = 0; i < size - s.length(); i++) {
126                                sb.append(filler);
127                        }
128                        if (!alignLeft)
129                                sb.append(s);
130                        return sb.toString();
131                } else {
132                        return alignLeft ? s.substring(0, size) : s.substring(s.length()
133                                        - size, s.length());
134                }
135        }
136
137        /**
138         * Splits the given string into lines and returns each line as a separate
139         * string in the result list. The result list will contain at least one
140         * entry unless the string is empty.
141         * <p>
142         * A trailing newline after the last line is ignored, meaning that no empty
143         * string is appended as separate line if the string ends with a newline.
144         * However multiple trailing newlines will still lead to empty line strings
145         * at the end of the list.
146         * <p>
147         * Note that all line ending characters are accepted to split lines, no
148         * matter what operating system this code is hosted on. More precisely, the
149         * {@link Line#LF LF} and {@link Line#CR CR} characters are recognized as
150         * line ending characters, either as single character or as a pair
151         * {@code CR+LF} or {@code LF+CR}.
152         * 
153         * @param s
154         *            the string to split
155         * @return a list with the lines found in {@code s}
156         */
157        public static final List<Line> splitLines(String s) {
158                final List<Line> lines = new ArrayList<Line>();
159                int start = 0;
160                int index = 0;
161                while (index < s.length()) {
162                        final char ch = s.charAt(index);
163                        if (ch == Line.LF || ch == Line.CR) {
164                                final int lineEndingStart = index;
165                                index++;
166                                if (index < s.length()) {
167                                        final char ch2 = s.charAt(index);
168                                        if (ch2 != ch && (ch2 == Line.LF || ch2 == Line.CR)) {
169                                                index++;
170                                        }
171                                }
172                                final Line line = new SingleCharSequenceLine(s, start,
173                                                lineEndingStart - start, index - lineEndingStart);
174                                lines.add(line);
175                                start = index;
176                        } else {
177                                index++;
178                        }
179                }
180                if (start < s.length()) {
181                        final Line line = new SingleCharSequenceLine(s, start, s.length()
182                                        - start, 0);
183                        lines.add(line);
184                }
185                return lines;
186        }
187
188        /**
189         * Finds and returns the start of the given sequence after trimming
190         * whitespace characters from the left.
191         * 
192         * @param s
193         *            the character sequence
194         * @return the index containing the first non-whitespace character, or the
195         *         length of the character sequence if all characters are blank
196         */
197        public static int findStartTrimWhitespace(CharSequence s) {
198                return findStartTrimWhitespace(s, 0);
199        }
200
201        /**
202         * Finds and returns the start of the given sequence after trimming
203         * whitespace characters from the left, starting at the given {@code start}
204         * index.
205         * 
206         * @param s
207         *            the character sequence
208         * @param start
209         *            the first index to consider in the char sequence
210         * @return the index containing the first non-whitespace character, or the
211         *         length of the character sequence if all characters are blank
212         */
213        public static int findStartTrimWhitespace(CharSequence s, int start) {
214                final int len = s.length();
215                for (int i = start; i < len; i++) {
216                        if (!Character.isWhitespace(s.charAt(i))) {
217                                return i;
218                        }
219                }
220                return len;
221        }
222
223        /**
224         * Finds and returns the end of the given character sequence after trimming
225         * white space characters from the right. Whitespace characters are defined
226         * by {@link Character#isWhitespace(char)}. .
227         * 
228         * @param s
229         *            the character sequence
230         * @return the index after the last non-whitespace character, or zero if all
231         *         characters are blank
232         */
233        public static int findEndTrimWhitespace(CharSequence s) {
234                for (int i = s.length(); i > 0; i--) {
235                        if (!Character.isWhitespace(s.charAt(i - 1))) {
236                                return i;
237                        }
238                }
239                return 0;
240        }
241
242        /**
243         * Finds and returns the start of the given sequence after trimming newline
244         * characters from the left. The following character sequences are treated
245         * as newline characters: "\n", "\r\n".
246         * 
247         * @param s
248         *            the character sequence
249         * @return the index containing the first character that is not part of a
250         *         newline sequence, or the length of the character sequence if all
251         *         characters are newline chars
252         */
253        public static int findStartTrimNewlineChars(CharSequence s) {
254                return findStartTrimNewlineChars(s, 0);
255        }
256
257        /**
258         * Finds and returns the start of the given sequence after trimming newline
259         * characters from the left, starting at the given {@code start} index. .
260         * The following character sequences are treated as newline characters:
261         * "\n", "\r\n".
262         * 
263         * @param s
264         *            the character sequence
265         * @param start
266         *            the first index to consider in the char sequence
267         * @return the index containing the first character that is not part of a
268         *         newline sequence, or the length of the character sequence if all
269         *         characters are newline chars
270         */
271        public static int findStartTrimNewlineChars(CharSequence s, int start) {
272                final int len = s.length();
273                for (int i = start; i < len;) {
274                        final int ch = s.charAt(i);
275                        i++;
276                        if (ch != '\n') {
277                                if (ch != '\r' || i >= len || s.charAt(i) != '\n') {
278                                        return i - 1;
279                                }
280                                i++;// increment again, it was "\r\n"
281                        }
282                }
283                return len;
284        }
285
286        /**
287         * Finds and returns the end of the given character sequence after trimming
288         * new line characters from the right. The following character sequences are
289         * treated as newline characters: "\n", "\r\n".
290         * 
291         * @param s
292         *            the character sequence
293         * @return the index after the last character that is not part of a newline
294         *         sequence, or zero if all characters are newline chars
295         */
296        public static int findEndTrimNewlineChars(CharSequence s) {
297                for (int i = s.length(); i > 0;) {
298                        if (s.charAt(i - 1) != '\n') {
299                                return i;
300                        }
301                        i--;
302                        if (i > 0 && s.charAt(i - 1) == '\r') {
303                                i--;
304                        }
305                }
306                return 0;
307        }
308
309        /**
310         * Finds and returns the first whitespace character in the given sequence,
311         * or the length of the string if no whitespace is found.
312         * 
313         * @param s
314         *            the character sequence
315         * @return the index containing the first whitespace character, or the
316         *         length of the character sequence if all characters are blank
317         */
318        public static int findWhitespace(CharSequence s) {
319                return findWhitespace(s, 0);
320        }
321
322        /**
323         * Finds and returns the first whitespace character in the given sequence at
324         * or after start. Returns the length of the string if no whitespace is
325         * found.
326         * 
327         * @param s
328         *            the character sequence
329         * @param start
330         *            the first index to consider in the char sequence
331         * @return the index containing the first whitespace character at or after
332         *         start, or the length of the character sequence if all characters
333         *         are blank
334         */
335        public static int findWhitespace(CharSequence s, int start) {
336                final int len = s.length();
337                for (int i = start; i < len; i++) {
338                        if (Character.isWhitespace(s.charAt(i))) {
339                                return i;
340                        }
341                }
342                return len;
343        }
344
345        /**
346         * Returns true if and only if the string {@code s} contains the specified
347         * target string performing case insensitive string comparison.
348         * 
349         * @param source
350         *            the characters being searched.
351         * @param target
352         *            the characters being searched for.
353         * @return true if this string contains <code>s</code>, false otherwise
354         * @throws NullPointerException
355         *             if <code>s</code> is <code>null</code>
356         */
357        public static boolean containsIgnoreCase(String source, String target) {
358                return 0 <= indexOfIgnoreCase(source, target);
359        }
360
361        /**
362         * Tests if this string {@code s} starts with the specified prefix
363         * performing case insensitive string comparison.
364         * 
365         * @param s
366         *            the string to search
367         * @param prefix
368         *            the prefix.
369         * @return <code>true</code> if the character sequence represented by the
370         *         argument is a prefix of the character sequence represented by the
371         *         string s; <code>false</code> otherwise. Note also that
372         *         <code>true</code> will be returned if the argument is an empty
373         *         string or is equal to this <code>String</code> object as
374         *         determined by the {@link #equals(Object)} method.
375         */
376        public static boolean startsWithIgnoreCase(String s, String prefix) {
377                return 0 == indexOfIgnoreCase(s, prefix, 0);
378        }
379
380        /**
381         * Returns the index within the source string of the first occurrence of the
382         * specified target substring performing case insensitive string comparison.
383         * 
384         * 
385         * <p>
386         * The returned index is the smallest value <i>k</i> for which: <blockquote>
387         * 
388         * <pre>
389         * startsWithIgnoreCase(source.substring(<i>k</i>), target)
390         * </pre>
391         * 
392         * </blockquote> If no such value of <i>k</i> exists, then {@code -1} is
393         * returned.
394         * 
395         * <p>
396         * Copied from {@code String.indexOf(..)} modified to do case-insensitive
397         * search. The source is the character array being searched, and the target
398         * is the string being searched for.
399         * 
400         * @param source
401         *            the characters being searched.
402         * @param target
403         *            the characters being searched for.
404         * @return the index of the first occurrence of the specified substring
405         *         (ignoring the case), or {@code -1} if there is no such
406         *         occurrence.
407         */
408        public static final int indexOfIgnoreCase(String source, String target) {
409                return indexOfIgnoreCase(source, target, Integer.MAX_VALUE);
410        }
411
412        /**
413         * 
414         * @param source
415         *            the characters being searched.
416         * @param target
417         *            the characters being searched for.
418         * @param maxIndex
419         *            the maximum index to return (for instance 0 if only the start
420         *            of the string is of interest)
421         */
422        private static int indexOfIgnoreCase(String source, String target,
423                        int maxIndex) {
424                if (maxIndex < 0) {
425                        throw new IllegalArgumentException("maxIndex cannot be negative: "
426                                        + maxIndex);
427                }
428                final int sourceCount = source.length();
429                final int targetCount = target.length();
430                final char first = target.charAt(0);
431                int max = Math.min(maxIndex, sourceCount - targetCount);
432
433                for (int i = 0; i <= max; i++) {
434                        final char ch = source.charAt(i);
435                        /* Look for first character. */
436                        if (!equalsIgnoreCase(ch, first)) {
437                                while (++i <= max && !equalsIgnoreCase(ch, first))
438                                        ;
439                        }
440
441                        /* Found first character, now look at the rest of v2 */
442                        if (i <= max) {
443                                int j = i + 1;
444                                int end = j + targetCount - 1;
445                                for (int k = 1; j < end && equalsIgnoreCase(source.charAt(j), target.charAt(k)); j++, k++)
446                                        ;
447
448                                if (j == end) {
449                                        /* Found whole string. */
450                                        return i;
451                                }
452                        }
453                }
454                return -1;
455        }
456
457        /**
458         * Returns true if the two characters are equal if case is ignored.
459         * 
460         * @param ch1
461         *            the first character
462         * @param ch2
463         *            the second character
464         * @return true if both characters are the same according to
465         *         case-insensitive comparison
466         */
467        public static boolean equalsIgnoreCase(char ch1, char ch2) {
468                if (ch1 == ch2)
469                        return true;
470                // If characters try converting both characters to uppercase
471                ch1 = Character.toUpperCase(ch1);
472                ch2 = Character.toUpperCase(ch2);
473                if (ch1 == ch2)
474                        return true;
475                // Unfortunately, conversion to uppercase does not work properly
476                // for the Georgian alphabet, which has strange rules about case
477                // conversion. So we need to make one last check before
478                // exiting.
479                return Character.toLowerCase(ch1) == Character.toLowerCase(ch2);
480        }
481
482        // no instances
483        private StringUtil() {
484                super();
485        }
486
487}