001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.errorprone.annotations.CanIgnoreReturnValue; 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.Reader; 032import java.io.StringReader; 033import java.io.Writer; 034import java.nio.charset.Charset; 035import java.util.Iterator; 036import java.util.List; 037import javax.annotation.CheckForNull; 038import org.checkerframework.checker.nullness.qual.Nullable; 039 040/** 041 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code 042 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, 043 * it is an immutable <i>supplier</i> of {@code Reader} instances. 044 * 045 * <p>{@code CharSource} provides two kinds of methods: 046 * 047 * <ul> 048 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 049 * instance each time they are called. The caller is responsible for ensuring that the 050 * returned reader is closed. 051 * <li><b>Convenience methods:</b> These are implementations of common operations that are 052 * typically implemented by opening a reader using one of the methods in the first category, 053 * doing something and finally closing the reader that was opened. 054 * </ul> 055 * 056 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 057 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code 058 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to 059 * be an empty line at the end if the contents are terminated with a line separator. 060 * 061 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 062 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 063 * 064 * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources 065 * that provide readers that are: 066 * 067 * <ul> 068 * <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either 069 * block indefinitely or fail if the source creates an infinite reader. 070 * <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the 071 * source as they are read from it. A source that provides such readers will not be reusable, 072 * and operations that read from the stream (including {@link #length()}, in some 073 * implementations) will prevent further operations from completing as expected. 074 * </ul> 075 * 076 * @since 14.0 077 * @author Colin Decker 078 */ 079@GwtIncompatible 080@ElementTypesAreNonnullByDefault 081public abstract class CharSource { 082 083 /** Constructor for use by subclasses. */ 084 protected CharSource() {} 085 086 /** 087 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 088 * as bytes using the given {@link Charset}. 089 * 090 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 091 * the default implementation of this method will ensure that the original {@code CharSource} is 092 * returned, rather than round-trip encoding. Subclasses that override this method should behave 093 * the same way. 094 * 095 * @since 20.0 096 */ 097 @Beta 098 public ByteSource asByteSource(Charset charset) { 099 return new AsByteSource(charset); 100 } 101 102 /** 103 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 104 * reader each time it is called. 105 * 106 * <p>The caller is responsible for ensuring that the returned reader is closed. 107 * 108 * @throws IOException if an I/O error occurs while opening the reader 109 */ 110 public abstract Reader openStream() throws IOException; 111 112 /** 113 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 114 * independent reader each time it is called. 115 * 116 * <p>The caller is responsible for ensuring that the returned reader is closed. 117 * 118 * @throws IOException if an I/O error occurs while of opening the reader 119 */ 120 public BufferedReader openBufferedStream() throws IOException { 121 Reader reader = openStream(); 122 return (reader instanceof BufferedReader) 123 ? (BufferedReader) reader 124 : new BufferedReader(reader); 125 } 126 127 /** 128 * Returns the size of this source in chars, if the size can be easily determined without actually 129 * opening the data stream. 130 * 131 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code 132 * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> 133 * that this method will return a different number of chars than would be returned by reading all 134 * of the chars. 135 * 136 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 137 * return a different number of chars if the contents are changed. 138 * 139 * @since 19.0 140 */ 141 @Beta 142 public Optional<Long> lengthIfKnown() { 143 return Optional.absent(); 144 } 145 146 /** 147 * Returns the length of this source in chars, even if doing so requires opening and traversing an 148 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 149 * 150 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 151 * absent, it will fall back to a heavyweight operation that will open a stream, {@link 152 * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that 153 * were skipped. 154 * 155 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 156 * implementation, it is <i>possible</i> that this method will return a different number of chars 157 * than would be returned by reading all of the chars. 158 * 159 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 160 * number of chars if the contents are changed. 161 * 162 * @throws IOException if an I/O error occurs while reading the length of this source 163 * @since 19.0 164 */ 165 @Beta 166 public long length() throws IOException { 167 Optional<Long> lengthIfKnown = lengthIfKnown(); 168 if (lengthIfKnown.isPresent()) { 169 return lengthIfKnown.get(); 170 } 171 172 Closer closer = Closer.create(); 173 try { 174 Reader reader = closer.register(openStream()); 175 return countBySkipping(reader); 176 } catch (Throwable e) { 177 throw closer.rethrow(e); 178 } finally { 179 closer.close(); 180 } 181 } 182 183 private long countBySkipping(Reader reader) throws IOException { 184 long count = 0; 185 long read; 186 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 187 count += read; 188 } 189 return count; 190 } 191 192 /** 193 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 194 * Does not close {@code appendable} if it is {@code Closeable}. 195 * 196 * @return the number of characters copied 197 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 198 * appendable} 199 */ 200 @CanIgnoreReturnValue 201 public long copyTo(Appendable appendable) throws IOException { 202 checkNotNull(appendable); 203 204 Closer closer = Closer.create(); 205 try { 206 Reader reader = closer.register(openStream()); 207 return CharStreams.copy(reader, appendable); 208 } catch (Throwable e) { 209 throw closer.rethrow(e); 210 } finally { 211 closer.close(); 212 } 213 } 214 215 /** 216 * Copies the contents of this source to the given sink. 217 * 218 * @return the number of characters copied 219 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 220 * sink} 221 */ 222 @CanIgnoreReturnValue 223 public long copyTo(CharSink sink) throws IOException { 224 checkNotNull(sink); 225 226 Closer closer = Closer.create(); 227 try { 228 Reader reader = closer.register(openStream()); 229 Writer writer = closer.register(sink.openStream()); 230 return CharStreams.copy(reader, writer); 231 } catch (Throwable e) { 232 throw closer.rethrow(e); 233 } finally { 234 closer.close(); 235 } 236 } 237 238 /** 239 * Reads the contents of this source as a string. 240 * 241 * @throws IOException if an I/O error occurs while reading from this source 242 */ 243 public String read() throws IOException { 244 Closer closer = Closer.create(); 245 try { 246 Reader reader = closer.register(openStream()); 247 return CharStreams.toString(reader); 248 } catch (Throwable e) { 249 throw closer.rethrow(e); 250 } finally { 251 closer.close(); 252 } 253 } 254 255 /** 256 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 257 * 258 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 259 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 260 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 261 * it does. 262 * 263 * @throws IOException if an I/O error occurs while reading from this source 264 */ 265 @CheckForNull 266 public String readFirstLine() throws IOException { 267 Closer closer = Closer.create(); 268 try { 269 BufferedReader reader = closer.register(openBufferedStream()); 270 return reader.readLine(); 271 } catch (Throwable e) { 272 throw closer.rethrow(e); 273 } finally { 274 closer.close(); 275 } 276 } 277 278 /** 279 * Reads all the lines of this source as a list of strings. The returned list will be empty if 280 * this source is empty. 281 * 282 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 283 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 284 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 285 * it does. 286 * 287 * @throws IOException if an I/O error occurs while reading from this source 288 */ 289 public ImmutableList<String> readLines() throws IOException { 290 Closer closer = Closer.create(); 291 try { 292 BufferedReader reader = closer.register(openBufferedStream()); 293 List<String> result = Lists.newArrayList(); 294 String line; 295 while ((line = reader.readLine()) != null) { 296 result.add(line); 297 } 298 return ImmutableList.copyOf(result); 299 } catch (Throwable e) { 300 throw closer.rethrow(e); 301 } finally { 302 closer.close(); 303 } 304 } 305 306 /** 307 * Reads lines of text from this source, processing each line as it is read using the given {@link 308 * LineProcessor processor}. Stops when all lines have been processed or the processor returns 309 * {@code false} and returns the result produced by the processor. 310 * 311 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 312 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 313 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 314 * it does. 315 * 316 * @throws IOException if an I/O error occurs while reading from this source or if {@code 317 * processor} throws an {@code IOException} 318 * @since 16.0 319 */ 320 @Beta 321 @CanIgnoreReturnValue // some processors won't return a useful result 322 @ParametricNullness 323 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 324 checkNotNull(processor); 325 326 Closer closer = Closer.create(); 327 try { 328 Reader reader = closer.register(openStream()); 329 return CharStreams.readLines(reader, processor); 330 } catch (Throwable e) { 331 throw closer.rethrow(e); 332 } finally { 333 closer.close(); 334 } 335 } 336 337 /** 338 * Returns whether the source has zero chars. The default implementation first checks {@link 339 * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 340 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 341 * 342 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 343 * chars are actually available for reading. This means that a source may return {@code true} from 344 * {@code isEmpty()} despite having readable content. 345 * 346 * @throws IOException if an I/O error occurs 347 * @since 15.0 348 */ 349 public boolean isEmpty() throws IOException { 350 Optional<Long> lengthIfKnown = lengthIfKnown(); 351 if (lengthIfKnown.isPresent()) { 352 return lengthIfKnown.get() == 0L; 353 } 354 Closer closer = Closer.create(); 355 try { 356 Reader reader = closer.register(openStream()); 357 return reader.read() == -1; 358 } catch (Throwable e) { 359 throw closer.rethrow(e); 360 } finally { 361 closer.close(); 362 } 363 } 364 365 /** 366 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 367 * the source will contain the concatenated data from the streams of the underlying sources. 368 * 369 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 370 * close the open underlying stream. 371 * 372 * @param sources the sources to concatenate 373 * @return a {@code CharSource} containing the concatenated data 374 * @since 15.0 375 */ 376 public static CharSource concat(Iterable<? extends CharSource> sources) { 377 return new ConcatenatedCharSource(sources); 378 } 379 380 /** 381 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 382 * the source will contain the concatenated data from the streams of the underlying sources. 383 * 384 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 385 * close the open underlying stream. 386 * 387 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 388 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 389 * eagerly fetches data for each source when iterated (rather than producing sources that only 390 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 391 * possible. 392 * 393 * @param sources the sources to concatenate 394 * @return a {@code CharSource} containing the concatenated data 395 * @throws NullPointerException if any of {@code sources} is {@code null} 396 * @since 15.0 397 */ 398 public static CharSource concat(Iterator<? extends CharSource> sources) { 399 return concat(ImmutableList.copyOf(sources)); 400 } 401 402 /** 403 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 404 * the source will contain the concatenated data from the streams of the underlying sources. 405 * 406 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 407 * close the open underlying stream. 408 * 409 * @param sources the sources to concatenate 410 * @return a {@code CharSource} containing the concatenated data 411 * @throws NullPointerException if any of {@code sources} is {@code null} 412 * @since 15.0 413 */ 414 public static CharSource concat(CharSource... sources) { 415 return concat(ImmutableList.copyOf(sources)); 416 } 417 418 /** 419 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 420 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 421 * the {@code charSequence} is mutated while it is being read, so don't do that. 422 * 423 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 424 */ 425 public static CharSource wrap(CharSequence charSequence) { 426 return charSequence instanceof String 427 ? new StringCharSource((String) charSequence) 428 : new CharSequenceCharSource(charSequence); 429 } 430 431 /** 432 * Returns an immutable {@link CharSource} that contains no characters. 433 * 434 * @since 15.0 435 */ 436 public static CharSource empty() { 437 return EmptyCharSource.INSTANCE; 438 } 439 440 /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ 441 private final class AsByteSource extends ByteSource { 442 443 final Charset charset; 444 445 AsByteSource(Charset charset) { 446 this.charset = checkNotNull(charset); 447 } 448 449 @Override 450 public CharSource asCharSource(Charset charset) { 451 if (charset.equals(this.charset)) { 452 return CharSource.this; 453 } 454 return super.asCharSource(charset); 455 } 456 457 @Override 458 public InputStream openStream() throws IOException { 459 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 460 } 461 462 @Override 463 public String toString() { 464 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 465 } 466 } 467 468 private static class CharSequenceCharSource extends CharSource { 469 470 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 471 472 protected final CharSequence seq; 473 474 protected CharSequenceCharSource(CharSequence seq) { 475 this.seq = checkNotNull(seq); 476 } 477 478 @Override 479 public Reader openStream() { 480 return new CharSequenceReader(seq); 481 } 482 483 @Override 484 public String read() { 485 return seq.toString(); 486 } 487 488 @Override 489 public boolean isEmpty() { 490 return seq.length() == 0; 491 } 492 493 @Override 494 public long length() { 495 return seq.length(); 496 } 497 498 @Override 499 public Optional<Long> lengthIfKnown() { 500 return Optional.of((long) seq.length()); 501 } 502 503 /** 504 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 505 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 506 */ 507 private Iterator<String> linesIterator() { 508 return new AbstractIterator<String>() { 509 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 510 511 @Override 512 @CheckForNull 513 protected String computeNext() { 514 if (lines.hasNext()) { 515 String next = lines.next(); 516 // skip last line if it's empty 517 if (lines.hasNext() || !next.isEmpty()) { 518 return next; 519 } 520 } 521 return endOfData(); 522 } 523 }; 524 } 525 526 @Override 527 @CheckForNull 528 public String readFirstLine() { 529 Iterator<String> lines = linesIterator(); 530 return lines.hasNext() ? lines.next() : null; 531 } 532 533 @Override 534 public ImmutableList<String> readLines() { 535 return ImmutableList.copyOf(linesIterator()); 536 } 537 538 @Override 539 @ParametricNullness 540 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 541 Iterator<String> lines = linesIterator(); 542 while (lines.hasNext()) { 543 if (!processor.processLine(lines.next())) { 544 break; 545 } 546 } 547 return processor.getResult(); 548 } 549 550 @Override 551 public String toString() { 552 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 553 } 554 } 555 556 /** 557 * Subclass specialized for string instances. 558 * 559 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 560 * 561 * <ul> 562 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 563 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 564 * one with {@link CharSequence#charAt(int)}. 565 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 566 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 567 * can't change, and it is faster because many writers and appendables are optimized for 568 * appending string instances. 569 * </ul> 570 */ 571 private static class StringCharSource extends CharSequenceCharSource { 572 protected StringCharSource(String seq) { 573 super(seq); 574 } 575 576 @Override 577 public Reader openStream() { 578 return new StringReader((String) seq); 579 } 580 581 @Override 582 public long copyTo(Appendable appendable) throws IOException { 583 appendable.append(seq); 584 return seq.length(); 585 } 586 587 @Override 588 public long copyTo(CharSink sink) throws IOException { 589 checkNotNull(sink); 590 Closer closer = Closer.create(); 591 try { 592 Writer writer = closer.register(sink.openStream()); 593 writer.write((String) seq); 594 return seq.length(); 595 } catch (Throwable e) { 596 throw closer.rethrow(e); 597 } finally { 598 closer.close(); 599 } 600 } 601 } 602 603 private static final class EmptyCharSource extends StringCharSource { 604 605 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 606 607 private EmptyCharSource() { 608 super(""); 609 } 610 611 @Override 612 public String toString() { 613 return "CharSource.empty()"; 614 } 615 } 616 617 private static final class ConcatenatedCharSource extends CharSource { 618 619 private final Iterable<? extends CharSource> sources; 620 621 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 622 this.sources = checkNotNull(sources); 623 } 624 625 @Override 626 public Reader openStream() throws IOException { 627 return new MultiReader(sources.iterator()); 628 } 629 630 @Override 631 public boolean isEmpty() throws IOException { 632 for (CharSource source : sources) { 633 if (!source.isEmpty()) { 634 return false; 635 } 636 } 637 return true; 638 } 639 640 @Override 641 public Optional<Long> lengthIfKnown() { 642 long result = 0L; 643 for (CharSource source : sources) { 644 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 645 if (!lengthIfKnown.isPresent()) { 646 return Optional.absent(); 647 } 648 result += lengthIfKnown.get(); 649 } 650 return Optional.of(result); 651 } 652 653 @Override 654 public long length() throws IOException { 655 long result = 0L; 656 for (CharSource source : sources) { 657 result += source.length(); 658 } 659 return result; 660 } 661 662 @Override 663 public String toString() { 664 return "CharSource.concat(" + sources + ")"; 665 } 666 } 667}