diff --git a/crates/std/src/io.rs b/crates/std/src/io.rs index c69cb09..71b0178 100644 --- a/crates/std/src/io.rs +++ b/crates/std/src/io.rs @@ -1,15 +1,22 @@ +#[cfg(test)] +mod tests; + pub mod error; +pub use self::buffered::{BufReader, BufWriter, IntoInnerError, LineWriter}; pub use self::error::{Error, ErrorKind, Result, SimpleMessage, const_error}; +pub mod buffered; +pub mod copy; pub mod cursor; -pub mod prelude; pub mod impls; +pub mod prelude; +pub mod util; use crate::mem::{MaybeUninit, take}; +use crate::ops::{Deref, DerefMut}; use crate::{cmp, fmt, slice, str, sys}; #[unstable(feature = "read_buf", issue = "78485")] pub use core::io::{BorrowedBuf, BorrowedCursor}; use core::slice::memchr; -use crate::ops::{Deref, DerefMut}; use crate::fs::File; use io::IoBase; @@ -36,7 +43,6 @@ pub fn stdin() -> Stdin { // Part took from the real std - const DEFAULT_BUF_SIZE: usize = crate::sys::io::DEFAULT_BUF_SIZE; struct Guard<'a> { @@ -56,7 +62,10 @@ pub(crate) unsafe fn append_to_string(buf: &mut String, f: F) -> Result) -> Result, { - let mut g = Guard { len: buf.len(), buf: unsafe { buf.as_mut_vec() } }; + let mut g = Guard { + len: buf.len(), + buf: unsafe { buf.as_mut_vec() }, + }; let ret = f(g.buf); // SAFETY: the caller promises to only append data to `buf` @@ -88,7 +97,10 @@ pub(crate) fn default_read_to_end( // Optionally limit the maximum bytes read on each iteration. // This adds an arbitrary fiddle factor to allow for more data than we expect. let mut max_read_size = size_hint - .and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE)) + .and_then(|s| { + s.checked_add(1024)? + .checked_next_multiple_of(DEFAULT_BUF_SIZE) + }) .unwrap_or(DEFAULT_BUF_SIZE); let mut initialized = 0; // Extra initialized bytes from previous loop iteration @@ -229,7 +241,10 @@ pub(crate) fn default_read_vectored(read: F, bufs: &mut [IoSliceMut<'_>]) -> where F: FnOnce(&mut [u8]) -> Result, { - let buf = bufs.iter_mut().find(|b| !b.is_empty()).map_or(&mut [][..], |b| &mut **b); + let buf = bufs + .iter_mut() + .find(|b| !b.is_empty()) + .map_or(&mut [][..], |b| &mut **b); read(buf) } @@ -237,7 +252,10 @@ pub(crate) fn default_write_vectored(write: F, bufs: &[IoSlice<'_>]) -> Resul where F: FnOnce(&[u8]) -> Result, { - let buf = bufs.iter().find(|b| !b.is_empty()).map_or(&[][..], |b| &**b); + let buf = bufs + .iter() + .find(|b| !b.is_empty()) + .map_or(&[][..], |b| &**b); write(buf) } @@ -252,7 +270,11 @@ pub(crate) fn default_read_exact(this: &mut R, mut buf: &mut [ Err(e) => return Err(e), } } - if !buf.is_empty() { Err(Error::READ_EXACT_EOF) } else { Ok(()) } + if !buf.is_empty() { + Err(Error::READ_EXACT_EOF) + } else { + Ok(()) + } } pub(crate) fn default_read_buf(read: F, mut cursor: BorrowedCursor<'_>) -> Result<()> @@ -307,7 +329,10 @@ pub(crate) fn default_write_fmt( } } - let mut output = Adapter { inner: this, error: Ok(()) }; + let mut output = Adapter { + inner: this, + error: Ok(()), + }; match fmt::write(&mut output, args) { Ok(()) => Ok(()), Err(..) => { @@ -2230,7 +2255,10 @@ pub trait BufRead: Read { where Self: Sized, { - Split { buf: self, delim: byte } + Split { + buf: self, + delim: byte, + } } /// Returns an iterator over the lines of this reader. @@ -2435,7 +2463,11 @@ impl BufRead for Chain { } fn consume(&mut self, amt: usize) { - if !self.done_first { self.first.consume(amt) } else { self.second.consume(amt) } + if !self.done_first { + self.first.consume(amt) + } else { + self.second.consume(amt) + } } fn read_until(&mut self, byte: u8, buf: &mut Vec) -> Result { @@ -2465,7 +2497,10 @@ impl SizeHint for Chain { #[inline] fn upper_bound(&self) -> Option { - match (SizeHint::upper_bound(&self.first), SizeHint::upper_bound(&self.second)) { + match ( + SizeHint::upper_bound(&self.first), + SizeHint::upper_bound(&self.second), + ) { (Some(first), Some(second)) => first.checked_add(second), _ => None, } diff --git a/crates/std/src/io/buffered/bufreader.rs b/crates/std/src/io/buffered/bufreader.rs new file mode 100644 index 0000000..40441dc --- /dev/null +++ b/crates/std/src/io/buffered/bufreader.rs @@ -0,0 +1,592 @@ +mod buffer; + +use buffer::Buffer; + +use crate::fmt; +use crate::io::{ + self, BorrowedCursor, BufRead, DEFAULT_BUF_SIZE, IoSliceMut, Read, Seek, SeekFrom, SizeHint, + SpecReadByte, uninlined_slow_read_byte, +}; + +/// The `BufReader` struct adds buffering to any reader. +/// +/// It can be excessively inefficient to work directly with a [`Read`] instance. +/// For example, every call to [`read`][`TcpStream::read`] on [`TcpStream`] +/// results in a system call. A `BufReader` performs large, infrequent reads on +/// the underlying [`Read`] and maintains an in-memory buffer of the results. +/// +/// `BufReader` can improve the speed of programs that make *small* and +/// *repeated* read calls to the same file or network socket. It does not +/// help when reading very large amounts at once, or reading just one or a few +/// times. It also provides no advantage when reading from a source that is +/// already in memory, like a [Vec]\. +/// +/// When the `BufReader` is dropped, the contents of its buffer will be +/// discarded. Creating multiple instances of a `BufReader` on the same +/// stream can cause data loss. Reading from the underlying reader after +/// unwrapping the `BufReader` with [`BufReader::into_inner`] can also cause +/// data loss. +/// +/// [`TcpStream::read`]: crate::net::TcpStream::read +/// [`TcpStream`]: crate::net::TcpStream +/// +/// # Examples +/// +/// ```no_run +/// use std::io::prelude::*; +/// use std::io::BufReader; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let f = File::open("log.txt")?; +/// let mut reader = BufReader::new(f); +/// +/// let mut line = String::new(); +/// let len = reader.read_line(&mut line)?; +/// println!("First line is {len} bytes long"); +/// Ok(()) +/// } +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +pub struct BufReader { + buf: Buffer, + inner: R, +} + +impl BufReader { + /// Creates a new `BufReader` with a default buffer capacity. The default is currently 8 KiB, + /// but may change in the future. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufReader; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let f = File::open("log.txt")?; + /// let reader = BufReader::new(f); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(inner: R) -> BufReader { + BufReader::with_capacity(DEFAULT_BUF_SIZE, inner) + } + + pub(crate) fn try_new_buffer() -> io::Result { + Buffer::try_with_capacity(DEFAULT_BUF_SIZE) + } + + pub(crate) fn with_buffer(inner: R, buf: Buffer) -> Self { + Self { inner, buf } + } + + /// Creates a new `BufReader` with the specified buffer capacity. + /// + /// # Examples + /// + /// Creating a buffer with ten bytes of capacity: + /// + /// ```no_run + /// use std::io::BufReader; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let f = File::open("log.txt")?; + /// let reader = BufReader::with_capacity(10, f); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize, inner: R) -> BufReader { + BufReader { inner, buf: Buffer::with_capacity(capacity) } + } +} + +impl BufReader { + /// Attempt to look ahead `n` bytes. + /// + /// `n` must be less than or equal to `capacity`. + /// + /// The returned slice may be less than `n` bytes long if + /// end of file is reached. + /// + /// After calling this method, you may call [`consume`](BufRead::consume) + /// with a value less than or equal to `n` to advance over some or all of + /// the returned bytes. + /// + /// ## Examples + /// + /// ```rust + /// #![feature(bufreader_peek)] + /// use std::io::{Read, BufReader}; + /// + /// let mut bytes = &b"oh, hello there"[..]; + /// let mut rdr = BufReader::with_capacity(6, &mut bytes); + /// assert_eq!(rdr.peek(2).unwrap(), b"oh"); + /// let mut buf = [0; 4]; + /// rdr.read(&mut buf[..]).unwrap(); + /// assert_eq!(&buf, b"oh, "); + /// assert_eq!(rdr.peek(5).unwrap(), b"hello"); + /// let mut s = String::new(); + /// rdr.read_to_string(&mut s).unwrap(); + /// assert_eq!(&s, "hello there"); + /// assert_eq!(rdr.peek(1).unwrap().len(), 0); + /// ``` + #[unstable(feature = "bufreader_peek", issue = "128405")] + pub fn peek(&mut self, n: usize) -> io::Result<&[u8]> { + assert!(n <= self.capacity()); + while n > self.buf.buffer().len() { + if self.buf.pos() > 0 { + self.buf.backshift(); + } + let new = self.buf.read_more(&mut self.inner)?; + if new == 0 { + // end of file, no more bytes to read + return Ok(&self.buf.buffer()[..]); + } + debug_assert_eq!(self.buf.pos(), 0); + } + Ok(&self.buf.buffer()[..n]) + } +} + +impl BufReader { + /// Gets a reference to the underlying reader. + /// + /// It is inadvisable to directly read from the underlying reader. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufReader; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let f1 = File::open("log.txt")?; + /// let reader = BufReader::new(f1); + /// + /// let f2 = reader.get_ref(); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_ref(&self) -> &R { + &self.inner + } + + /// Gets a mutable reference to the underlying reader. + /// + /// It is inadvisable to directly read from the underlying reader. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufReader; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let f1 = File::open("log.txt")?; + /// let mut reader = BufReader::new(f1); + /// + /// let f2 = reader.get_mut(); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_mut(&mut self) -> &mut R { + &mut self.inner + } + + /// Returns a reference to the internally buffered data. + /// + /// Unlike [`fill_buf`], this will not attempt to fill the buffer if it is empty. + /// + /// [`fill_buf`]: BufRead::fill_buf + /// + /// # Examples + /// + /// ```no_run + /// use std::io::{BufReader, BufRead}; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let f = File::open("log.txt")?; + /// let mut reader = BufReader::new(f); + /// assert!(reader.buffer().is_empty()); + /// + /// if reader.fill_buf()?.len() > 0 { + /// assert!(!reader.buffer().is_empty()); + /// } + /// Ok(()) + /// } + /// ``` + #[stable(feature = "bufreader_buffer", since = "1.37.0")] + pub fn buffer(&self) -> &[u8] { + self.buf.buffer() + } + + /// Returns the number of bytes the internal buffer can hold at once. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::{BufReader, BufRead}; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let f = File::open("log.txt")?; + /// let mut reader = BufReader::new(f); + /// + /// let capacity = reader.capacity(); + /// let buffer = reader.fill_buf()?; + /// assert!(buffer.len() <= capacity); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "buffered_io_capacity", since = "1.46.0")] + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + + /// Unwraps this `BufReader`, returning the underlying reader. + /// + /// Note that any leftover data in the internal buffer is lost. Therefore, + /// a following read from the underlying reader may lead to data loss. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufReader; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let f1 = File::open("log.txt")?; + /// let reader = BufReader::new(f1); + /// + /// let f2 = reader.into_inner(); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_inner(self) -> R + where + R: Sized, + { + self.inner + } + + /// Invalidates all data in the internal buffer. + #[inline] + pub(in crate::io) fn discard_buffer(&mut self) { + self.buf.discard_buffer() + } +} + +// This is only used by a test which asserts that the initialization-tracking is correct. +#[cfg(test)] +impl BufReader { + #[allow(missing_docs)] + pub fn initialized(&self) -> usize { + self.buf.initialized() + } +} + +impl BufReader { + /// Seeks relative to the current position. If the new position lies within the buffer, + /// the buffer will not be flushed, allowing for more efficient seeks. + /// This method does not return the location of the underlying reader, so the caller + /// must track this information themselves if it is required. + #[stable(feature = "bufreader_seek_relative", since = "1.53.0")] + pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> { + let pos = self.buf.pos() as u64; + if offset < 0 { + if let Some(_) = pos.checked_sub((-offset) as u64) { + self.buf.unconsume((-offset) as usize); + return Ok(()); + } + } else if let Some(new_pos) = pos.checked_add(offset as u64) { + if new_pos <= self.buf.filled() as u64 { + self.buf.consume(offset as usize); + return Ok(()); + } + } + + self.seek(SeekFrom::Current(offset)).map(drop) + } +} + +impl SpecReadByte for BufReader +where + Self: Read, +{ + #[inline] + fn spec_read_byte(&mut self) -> Option> { + let mut byte = 0; + if self.buf.consume_with(1, |claimed| byte = claimed[0]) { + return Some(Ok(byte)); + } + + // Fallback case, only reached once per buffer refill. + uninlined_slow_read_byte(self) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Read for BufReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // If we don't have any buffered data and we're doing a massive read + // (larger than our internal buffer), bypass our internal buffer + // entirely. + if self.buf.pos() == self.buf.filled() && buf.len() >= self.capacity() { + self.discard_buffer(); + return self.inner.read(buf); + } + let mut rem = self.fill_buf()?; + let nread = rem.read(buf)?; + self.consume(nread); + Ok(nread) + } + + fn read_buf(&mut self, mut cursor: BorrowedCursor<'_>) -> io::Result<()> { + // If we don't have any buffered data and we're doing a massive read + // (larger than our internal buffer), bypass our internal buffer + // entirely. + if self.buf.pos() == self.buf.filled() && cursor.capacity() >= self.capacity() { + self.discard_buffer(); + return self.inner.read_buf(cursor); + } + + let prev = cursor.written(); + + let mut rem = self.fill_buf()?; + rem.read_buf(cursor.reborrow())?; // actually never fails + + self.consume(cursor.written() - prev); //slice impl of read_buf known to never unfill buf + + Ok(()) + } + + // Small read_exacts from a BufReader are extremely common when used with a deserializer. + // The default implementation calls read in a loop, which results in surprisingly poor code + // generation for the common path where the buffer has enough bytes to fill the passed-in + // buffer. + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + if self.buf.consume_with(buf.len(), |claimed| buf.copy_from_slice(claimed)) { + return Ok(()); + } + + crate::io::default_read_exact(self, buf) + } + + fn read_buf_exact(&mut self, mut cursor: BorrowedCursor<'_>) -> io::Result<()> { + if self.buf.consume_with(cursor.capacity(), |claimed| cursor.append(claimed)) { + return Ok(()); + } + + crate::io::default_read_buf_exact(self, cursor) + } + + fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { + let total_len = bufs.iter().map(|b| b.len()).sum::(); + if self.buf.pos() == self.buf.filled() && total_len >= self.capacity() { + self.discard_buffer(); + return self.inner.read_vectored(bufs); + } + let mut rem = self.fill_buf()?; + let nread = rem.read_vectored(bufs)?; + + self.consume(nread); + Ok(nread) + } + + fn is_read_vectored(&self) -> bool { + self.inner.is_read_vectored() + } + + // The inner reader might have an optimized `read_to_end`. Drain our buffer and then + // delegate to the inner implementation. + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + let inner_buf = self.buffer(); + buf.try_reserve(inner_buf.len())?; + buf.extend_from_slice(inner_buf); + let nread = inner_buf.len(); + self.discard_buffer(); + Ok(nread + self.inner.read_to_end(buf)?) + } + + // The inner reader might have an optimized `read_to_end`. Drain our buffer and then + // delegate to the inner implementation. + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + // In the general `else` case below we must read bytes into a side buffer, check + // that they are valid UTF-8, and then append them to `buf`. This requires a + // potentially large memcpy. + // + // If `buf` is empty--the most common case--we can leverage `append_to_string` + // to read directly into `buf`'s internal byte buffer, saving an allocation and + // a memcpy. + if buf.is_empty() { + // `append_to_string`'s safety relies on the buffer only being appended to since + // it only checks the UTF-8 validity of new data. If there were existing content in + // `buf` then an untrustworthy reader (i.e. `self.inner`) could not only append + // bytes but also modify existing bytes and render them invalid. On the other hand, + // if `buf` is empty then by definition any writes must be appends and + // `append_to_string` will validate all of the new bytes. + unsafe { crate::io::append_to_string(buf, |b| self.read_to_end(b)) } + } else { + // We cannot append our byte buffer directly onto the `buf` String as there could + // be an incomplete UTF-8 sequence that has only been partially read. We must read + // everything into a side buffer first and then call `from_utf8` on the complete + // buffer. + let mut bytes = Vec::new(); + self.read_to_end(&mut bytes)?; + let string = crate::str::from_utf8(&bytes).map_err(|_| io::Error::INVALID_UTF8)?; + *buf += string; + Ok(string.len()) + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl BufRead for BufReader { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.buf.fill_buf(&mut self.inner) + } + + fn consume(&mut self, amt: usize) { + self.buf.consume(amt) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for BufReader +where + R: ?Sized + fmt::Debug, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("BufReader") + .field("reader", &&self.inner) + .field( + "buffer", + &format_args!("{}/{}", self.buf.filled() - self.buf.pos(), self.capacity()), + ) + .finish() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Seek for BufReader { + /// Seek to an offset, in bytes, in the underlying reader. + /// + /// The position used for seeking with [SeekFrom::Current]\(_) is the + /// position the underlying reader would be at if the `BufReader` had no + /// internal buffer. + /// + /// Seeking always discards the internal buffer, even if the seek position + /// would otherwise fall within it. This guarantees that calling + /// [`BufReader::into_inner()`] immediately after a seek yields the underlying reader + /// at the same position. + /// + /// To seek without discarding the internal buffer, use [`BufReader::seek_relative`]. + /// + /// See [`std::io::Seek`] for more details. + /// + /// Note: In the edge case where you're seeking with [SeekFrom::Current]\(n) + /// where `n` minus the internal buffer length overflows an `i64`, two + /// seeks will be performed instead of one. If the second seek returns + /// [`Err`], the underlying reader will be left at the same position it would + /// have if you called `seek` with [SeekFrom::Current]\(0). + /// + /// [`std::io::Seek`]: Seek + fn seek(&mut self, pos: SeekFrom) -> io::Result { + let result: u64; + if let SeekFrom::Current(n) = pos { + let remainder = (self.buf.filled() - self.buf.pos()) as i64; + // it should be safe to assume that remainder fits within an i64 as the alternative + // means we managed to allocate 8 exbibytes and that's absurd. + // But it's not out of the realm of possibility for some weird underlying reader to + // support seeking by i64::MIN so we need to handle underflow when subtracting + // remainder. + if let Some(offset) = n.checked_sub(remainder) { + result = self.inner.seek(SeekFrom::Current(offset))?; + } else { + // seek backwards by our remainder, and then by the offset + self.inner.seek(SeekFrom::Current(-remainder))?; + self.discard_buffer(); + result = self.inner.seek(SeekFrom::Current(n))?; + } + } else { + // Seeking with Start/End doesn't care about our buffer length. + result = self.inner.seek(pos)?; + } + self.discard_buffer(); + Ok(result) + } + + /// Returns the current seek position from the start of the stream. + /// + /// The value returned is equivalent to `self.seek(SeekFrom::Current(0))` + /// but does not flush the internal buffer. Due to this optimization the + /// function does not guarantee that calling `.into_inner()` immediately + /// afterwards will yield the underlying reader at the same position. Use + /// [`BufReader::seek`] instead if you require that guarantee. + /// + /// # Panics + /// + /// This function will panic if the position of the inner reader is smaller + /// than the amount of buffered data. That can happen if the inner reader + /// has an incorrect implementation of [`Seek::stream_position`], or if the + /// position has gone out of sync due to calling [`Seek::seek`] directly on + /// the underlying reader. + /// + /// # Example + /// + /// ```no_run + /// use std::{ + /// io::{self, BufRead, BufReader, Seek}, + /// fs::File, + /// }; + /// + /// fn main() -> io::Result<()> { + /// let mut f = BufReader::new(File::open("foo.txt")?); + /// + /// let before = f.stream_position()?; + /// f.read_line(&mut String::new())?; + /// let after = f.stream_position()?; + /// + /// println!("The first line was {} bytes long", after - before); + /// Ok(()) + /// } + /// ``` + fn stream_position(&mut self) -> io::Result { + let remainder = (self.buf.filled() - self.buf.pos()) as u64; + self.inner.stream_position().map(|pos| { + pos.checked_sub(remainder).expect( + "overflow when subtracting remaining buffer size from inner stream position", + ) + }) + } + + /// Seeks relative to the current position. + /// + /// If the new position lies within the buffer, the buffer will not be + /// flushed, allowing for more efficient seeks. This method does not return + /// the location of the underlying reader, so the caller must track this + /// information themselves if it is required. + fn seek_relative(&mut self, offset: i64) -> io::Result<()> { + self.seek_relative(offset) + } +} + +impl SizeHint for BufReader { + #[inline] + fn lower_bound(&self) -> usize { + SizeHint::lower_bound(self.get_ref()) + self.buffer().len() + } + + #[inline] + fn upper_bound(&self) -> Option { + SizeHint::upper_bound(self.get_ref()).and_then(|up| self.buffer().len().checked_add(up)) + } +} diff --git a/crates/std/src/io/buffered/bufreader/buffer.rs b/crates/std/src/io/buffered/bufreader/buffer.rs new file mode 100644 index 0000000..ad8608b --- /dev/null +++ b/crates/std/src/io/buffered/bufreader/buffer.rs @@ -0,0 +1,155 @@ +//! An encapsulation of `BufReader`'s buffer management logic. +//! +//! This module factors out the basic functionality of `BufReader` in order to protect two core +//! invariants: +//! * `filled` bytes of `buf` are always initialized +//! * `pos` is always <= `filled` +//! Since this module encapsulates the buffer management logic, we can ensure that the range +//! `pos..filled` is always a valid index into the initialized region of the buffer. This means +//! that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so +//! without encountering any runtime bounds checks. + +use crate::cmp; +use crate::io::{self, BorrowedBuf, ErrorKind, Read}; +use crate::mem::MaybeUninit; + +pub struct Buffer { + // The buffer. + buf: Box<[MaybeUninit]>, + // The current seek offset into `buf`, must always be <= `filled`. + pos: usize, + // Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are + // initialized with bytes from a read. + filled: usize, + // This is the max number of bytes returned across all `fill_buf` calls. We track this so that we + // can accurately tell `read_buf` how many bytes of buf are initialized, to bypass as much of its + // defensive initialization as possible. Note that while this often the same as `filled`, it + // doesn't need to be. Calls to `fill_buf` are not required to actually fill the buffer, and + // omitting this is a huge perf regression for `Read` impls that do not. + initialized: usize, +} + +impl Buffer { + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + let buf = Box::new_uninit_slice(capacity); + Self { buf, pos: 0, filled: 0, initialized: 0 } + } + + #[inline] + pub fn try_with_capacity(capacity: usize) -> io::Result { + match Box::try_new_uninit_slice(capacity) { + Ok(buf) => Ok(Self { buf, pos: 0, filled: 0, initialized: 0 }), + Err(_) => { + Err(io::const_error!(ErrorKind::OutOfMemory, "failed to allocate read buffer")) + } + } + } + + #[inline] + pub fn buffer(&self) -> &[u8] { + // SAFETY: self.pos and self.filled are valid, and self.filled >= self.pos, and + // that region is initialized because those are all invariants of this type. + unsafe { self.buf.get_unchecked(self.pos..self.filled).assume_init_ref() } + } + + #[inline] + pub fn capacity(&self) -> usize { + self.buf.len() + } + + #[inline] + pub fn filled(&self) -> usize { + self.filled + } + + #[inline] + pub fn pos(&self) -> usize { + self.pos + } + + // This is only used by a test which asserts that the initialization-tracking is correct. + #[cfg(test)] + pub fn initialized(&self) -> usize { + self.initialized + } + + #[inline] + pub fn discard_buffer(&mut self) { + self.pos = 0; + self.filled = 0; + } + + #[inline] + pub fn consume(&mut self, amt: usize) { + self.pos = cmp::min(self.pos + amt, self.filled); + } + + /// If there are `amt` bytes available in the buffer, pass a slice containing those bytes to + /// `visitor` and return true. If there are not enough bytes available, return false. + #[inline] + pub fn consume_with(&mut self, amt: usize, mut visitor: V) -> bool + where + V: FnMut(&[u8]), + { + if let Some(claimed) = self.buffer().get(..amt) { + visitor(claimed); + // If the indexing into self.buffer() succeeds, amt must be a valid increment. + self.pos += amt; + true + } else { + false + } + } + + #[inline] + pub fn unconsume(&mut self, amt: usize) { + self.pos = self.pos.saturating_sub(amt); + } + + /// Read more bytes into the buffer without discarding any of its contents + pub fn read_more(&mut self, mut reader: impl Read) -> io::Result { + let mut buf = BorrowedBuf::from(&mut self.buf[self.filled..]); + let old_init = self.initialized - self.filled; + unsafe { + buf.set_init(old_init); + } + reader.read_buf(buf.unfilled())?; + self.filled += buf.len(); + self.initialized += buf.init_len() - old_init; + Ok(buf.len()) + } + + /// Remove bytes that have already been read from the buffer. + pub fn backshift(&mut self) { + self.buf.copy_within(self.pos..self.filled, 0); + self.filled -= self.pos; + self.pos = 0; + } + + #[inline] + pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> { + // If we've reached the end of our internal buffer then we need to fetch + // some more data from the reader. + // Branch using `>=` instead of the more correct `==` + // to tell the compiler that the pos..cap slice is always valid. + if self.pos >= self.filled { + debug_assert!(self.pos == self.filled); + + let mut buf = BorrowedBuf::from(&mut *self.buf); + // SAFETY: `self.filled` bytes will always have been initialized. + unsafe { + buf.set_init(self.initialized); + } + + let result = reader.read_buf(buf.unfilled()); + + self.pos = 0; + self.filled = buf.len(); + self.initialized = buf.init_len(); + + result?; + } + Ok(self.buffer()) + } +} diff --git a/crates/std/src/io/buffered/bufwriter.rs b/crates/std/src/io/buffered/bufwriter.rs new file mode 100644 index 0000000..d569fed --- /dev/null +++ b/crates/std/src/io/buffered/bufwriter.rs @@ -0,0 +1,680 @@ +use crate::io::{ + self, DEFAULT_BUF_SIZE, ErrorKind, IntoInnerError, IoSlice, Seek, SeekFrom, Write, +}; +use crate::mem::{self, ManuallyDrop}; +use crate::{error, fmt, ptr}; + +/// Wraps a writer and buffers its output. +/// +/// It can be excessively inefficient to work directly with something that +/// implements [`Write`]. For example, every call to +/// [`write`][`TcpStream::write`] on [`TcpStream`] results in a system call. A +/// `BufWriter` keeps an in-memory buffer of data and writes it to an underlying +/// writer in large, infrequent batches. +/// +/// `BufWriter` can improve the speed of programs that make *small* and +/// *repeated* write calls to the same file or network socket. It does not +/// help when writing very large amounts at once, or writing just one or a few +/// times. It also provides no advantage when writing to a destination that is +/// in memory, like a [Vec]\. +/// +/// It is critical to call [`flush`] before `BufWriter` is dropped. Though +/// dropping will attempt to flush the contents of the buffer, any errors +/// that happen in the process of dropping will be ignored. Calling [`flush`] +/// ensures that the buffer is empty and thus dropping will not even attempt +/// file operations. +/// +/// # Examples +/// +/// Let's write the numbers one through ten to a [`TcpStream`]: +/// +/// ```no_run +/// use std::io::prelude::*; +/// use std::net::TcpStream; +/// +/// let mut stream = TcpStream::connect("127.0.0.1:34254").unwrap(); +/// +/// for i in 0..10 { +/// stream.write(&[i+1]).unwrap(); +/// } +/// ``` +/// +/// Because we're not buffering, we write each one in turn, incurring the +/// overhead of a system call per byte written. We can fix this with a +/// `BufWriter`: +/// +/// ```no_run +/// use std::io::prelude::*; +/// use std::io::BufWriter; +/// use std::net::TcpStream; +/// +/// let mut stream = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); +/// +/// for i in 0..10 { +/// stream.write(&[i+1]).unwrap(); +/// } +/// stream.flush().unwrap(); +/// ``` +/// +/// By wrapping the stream with a `BufWriter`, these ten writes are all grouped +/// together by the buffer and will all be written out in one system call when +/// the `stream` is flushed. +/// +/// [`TcpStream::write`]: crate::net::TcpStream::write +/// [`TcpStream`]: crate::net::TcpStream +/// [`flush`]: BufWriter::flush +#[stable(feature = "rust1", since = "1.0.0")] +pub struct BufWriter { + // The buffer. Avoid using this like a normal `Vec` in common code paths. + // That is, don't use `buf.push`, `buf.extend_from_slice`, or any other + // methods that require bounds checking or the like. This makes an enormous + // difference to performance (we may want to stop using a `Vec` entirely). + buf: Vec, + // #30888: If the inner writer panics in a call to write, we don't want to + // write the buffered data a second time in BufWriter's destructor. This + // flag tells the Drop impl if it should skip the flush. + panicked: bool, + inner: W, +} + +impl BufWriter { + /// Creates a new `BufWriter` with a default buffer capacity. The default is currently 8 KiB, + /// but may change in the future. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(inner: W) -> BufWriter { + BufWriter::with_capacity(DEFAULT_BUF_SIZE, inner) + } + + pub(crate) fn try_new_buffer() -> io::Result> { + Vec::try_with_capacity(DEFAULT_BUF_SIZE).map_err(|_| { + io::const_error!(ErrorKind::OutOfMemory, "failed to allocate write buffer") + }) + } + + pub(crate) fn with_buffer(inner: W, buf: Vec) -> Self { + Self { inner, buf, panicked: false } + } + + /// Creates a new `BufWriter` with at least the specified buffer capacity. + /// + /// # Examples + /// + /// Creating a buffer with a buffer of at least a hundred bytes. + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let stream = TcpStream::connect("127.0.0.1:34254").unwrap(); + /// let mut buffer = BufWriter::with_capacity(100, stream); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize, inner: W) -> BufWriter { + BufWriter { inner, buf: Vec::with_capacity(capacity), panicked: false } + } + + /// Unwraps this `BufWriter`, returning the underlying writer. + /// + /// The buffer is written out before returning the writer. + /// + /// # Errors + /// + /// An [`Err`] will be returned if an error occurs while flushing the buffer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // unwrap the TcpStream and flush the buffer + /// let stream = buffer.into_inner().unwrap(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_inner(mut self) -> Result>> { + match self.flush_buf() { + Err(e) => Err(IntoInnerError::new(self, e)), + Ok(()) => Ok(self.into_parts().0), + } + } + + /// Disassembles this `BufWriter`, returning the underlying writer, and any buffered but + /// unwritten data. + /// + /// If the underlying writer panicked, it is not known what portion of the data was written. + /// In this case, we return `WriterPanicked` for the buffered data (from which the buffer + /// contents can still be recovered). + /// + /// `into_parts` makes no attempt to flush data and cannot fail. + /// + /// # Examples + /// + /// ``` + /// use std::io::{BufWriter, Write}; + /// + /// let mut buffer = [0u8; 10]; + /// let mut stream = BufWriter::new(buffer.as_mut()); + /// write!(stream, "too much data").unwrap(); + /// stream.flush().expect_err("it doesn't fit"); + /// let (recovered_writer, buffered_data) = stream.into_parts(); + /// assert_eq!(recovered_writer.len(), 0); + /// assert_eq!(&buffered_data.unwrap(), b"ata"); + /// ``` + #[stable(feature = "bufwriter_into_parts", since = "1.56.0")] + pub fn into_parts(self) -> (W, Result, WriterPanicked>) { + let mut this = ManuallyDrop::new(self); + let buf = mem::take(&mut this.buf); + let buf = if !this.panicked { Ok(buf) } else { Err(WriterPanicked { buf }) }; + + // SAFETY: double-drops are prevented by putting `this` in a ManuallyDrop that is never dropped + let inner = unsafe { ptr::read(&this.inner) }; + + (inner, buf) + } +} + +impl BufWriter { + /// Send data in our local buffer into the inner writer, looping as + /// necessary until either it's all been sent or an error occurs. + /// + /// Because all the data in the buffer has been reported to our owner as + /// "successfully written" (by returning nonzero success values from + /// `write`), any 0-length writes from `inner` must be reported as i/o + /// errors from this method. + pub(in crate::io) fn flush_buf(&mut self) -> io::Result<()> { + /// Helper struct to ensure the buffer is updated after all the writes + /// are complete. It tracks the number of written bytes and drains them + /// all from the front of the buffer when dropped. + struct BufGuard<'a> { + buffer: &'a mut Vec, + written: usize, + } + + impl<'a> BufGuard<'a> { + fn new(buffer: &'a mut Vec) -> Self { + Self { buffer, written: 0 } + } + + /// The unwritten part of the buffer + fn remaining(&self) -> &[u8] { + &self.buffer[self.written..] + } + + /// Flag some bytes as removed from the front of the buffer + fn consume(&mut self, amt: usize) { + self.written += amt; + } + + /// true if all of the bytes have been written + fn done(&self) -> bool { + self.written >= self.buffer.len() + } + } + + impl Drop for BufGuard<'_> { + fn drop(&mut self) { + if self.written > 0 { + self.buffer.drain(..self.written); + } + } + } + + let mut guard = BufGuard::new(&mut self.buf); + while !guard.done() { + self.panicked = true; + let r = self.inner.write(guard.remaining()); + self.panicked = false; + + match r { + Ok(0) => { + return Err(io::const_error!( + ErrorKind::WriteZero, + "failed to write the buffered data", + )); + } + Ok(n) => guard.consume(n), + Err(ref e) if e.is_interrupted() => {} + Err(e) => return Err(e), + } + } + Ok(()) + } + + /// Buffer some data without flushing it, regardless of the size of the + /// data. Writes as much as possible without exceeding capacity. Returns + /// the number of bytes written. + pub(super) fn write_to_buf(&mut self, buf: &[u8]) -> usize { + let available = self.spare_capacity(); + let amt_to_buffer = available.min(buf.len()); + + // SAFETY: `amt_to_buffer` is <= buffer's spare capacity by construction. + unsafe { + self.write_to_buffer_unchecked(&buf[..amt_to_buffer]); + } + + amt_to_buffer + } + + /// Gets a reference to the underlying writer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // we can use reference just like buffer + /// let reference = buffer.get_ref(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_ref(&self) -> &W { + &self.inner + } + + /// Gets a mutable reference to the underlying writer. + /// + /// It is inadvisable to directly write to the underlying writer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // we can use reference just like buffer + /// let reference = buffer.get_mut(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_mut(&mut self) -> &mut W { + &mut self.inner + } + + /// Returns a reference to the internally buffered data. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let buf_writer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // See how many bytes are currently buffered + /// let bytes_buffered = buf_writer.buffer().len(); + /// ``` + #[stable(feature = "bufreader_buffer", since = "1.37.0")] + pub fn buffer(&self) -> &[u8] { + &self.buf + } + + /// Returns a mutable reference to the internal buffer. + /// + /// This can be used to write data directly into the buffer without triggering writers + /// to the underlying writer. + /// + /// That the buffer is a `Vec` is an implementation detail. + /// Callers should not modify the capacity as there currently is no public API to do so + /// and thus any capacity changes would be unexpected by the user. + pub(in crate::io) fn buffer_mut(&mut self) -> &mut Vec { + &mut self.buf + } + + /// Returns the number of bytes the internal buffer can hold without flushing. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let buf_writer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // Check the capacity of the inner buffer + /// let capacity = buf_writer.capacity(); + /// // Calculate how many bytes can be written without flushing + /// let without_flush = capacity - buf_writer.buffer().len(); + /// ``` + #[stable(feature = "buffered_io_capacity", since = "1.46.0")] + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + + // Ensure this function does not get inlined into `write`, so that it + // remains inlineable and its common path remains as short as possible. + // If this function ends up being called frequently relative to `write`, + // it's likely a sign that the client is using an improperly sized buffer + // or their write patterns are somewhat pathological. + #[cold] + #[inline(never)] + fn write_cold(&mut self, buf: &[u8]) -> io::Result { + if buf.len() > self.spare_capacity() { + self.flush_buf()?; + } + + // Why not len > capacity? To avoid a needless trip through the buffer when the input + // exactly fills it. We'd just need to flush it to the underlying writer anyway. + if buf.len() >= self.buf.capacity() { + self.panicked = true; + let r = self.get_mut().write(buf); + self.panicked = false; + r + } else { + // Write to the buffer. In this case, we write to the buffer even if it fills it + // exactly. Doing otherwise would mean flushing the buffer, then writing this + // input to the inner writer, which in many cases would be a worse strategy. + + // SAFETY: There was either enough spare capacity already, or there wasn't and we + // flushed the buffer to ensure that there is. In the latter case, we know that there + // is because flushing ensured that our entire buffer is spare capacity, and we entered + // this block because the input buffer length is less than that capacity. In either + // case, it's safe to write the input buffer to our buffer. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(buf.len()) + } + } + + // Ensure this function does not get inlined into `write_all`, so that it + // remains inlineable and its common path remains as short as possible. + // If this function ends up being called frequently relative to `write_all`, + // it's likely a sign that the client is using an improperly sized buffer + // or their write patterns are somewhat pathological. + #[cold] + #[inline(never)] + fn write_all_cold(&mut self, buf: &[u8]) -> io::Result<()> { + // Normally, `write_all` just calls `write` in a loop. We can do better + // by calling `self.get_mut().write_all()` directly, which avoids + // round trips through the buffer in the event of a series of partial + // writes in some circumstances. + + if buf.len() > self.spare_capacity() { + self.flush_buf()?; + } + + // Why not len > capacity? To avoid a needless trip through the buffer when the input + // exactly fills it. We'd just need to flush it to the underlying writer anyway. + if buf.len() >= self.buf.capacity() { + self.panicked = true; + let r = self.get_mut().write_all(buf); + self.panicked = false; + r + } else { + // Write to the buffer. In this case, we write to the buffer even if it fills it + // exactly. Doing otherwise would mean flushing the buffer, then writing this + // input to the inner writer, which in many cases would be a worse strategy. + + // SAFETY: There was either enough spare capacity already, or there wasn't and we + // flushed the buffer to ensure that there is. In the latter case, we know that there + // is because flushing ensured that our entire buffer is spare capacity, and we entered + // this block because the input buffer length is less than that capacity. In either + // case, it's safe to write the input buffer to our buffer. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(()) + } + } + + // SAFETY: Requires `buf.len() <= self.buf.capacity() - self.buf.len()`, + // i.e., that input buffer length is less than or equal to spare capacity. + #[inline] + unsafe fn write_to_buffer_unchecked(&mut self, buf: &[u8]) { + debug_assert!(buf.len() <= self.spare_capacity()); + let old_len = self.buf.len(); + let buf_len = buf.len(); + let src = buf.as_ptr(); + unsafe { + let dst = self.buf.as_mut_ptr().add(old_len); + ptr::copy_nonoverlapping(src, dst, buf_len); + self.buf.set_len(old_len + buf_len); + } + } + + #[inline] + fn spare_capacity(&self) -> usize { + self.buf.capacity() - self.buf.len() + } +} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +/// Error returned for the buffered data from `BufWriter::into_parts`, when the underlying +/// writer has previously panicked. Contains the (possibly partly written) buffered data. +/// +/// # Example +/// +/// ``` +/// use std::io::{self, BufWriter, Write}; +/// use std::panic::{catch_unwind, AssertUnwindSafe}; +/// +/// struct PanickingWriter; +/// impl Write for PanickingWriter { +/// fn write(&mut self, buf: &[u8]) -> io::Result { panic!() } +/// fn flush(&mut self) -> io::Result<()> { panic!() } +/// } +/// +/// let mut stream = BufWriter::new(PanickingWriter); +/// write!(stream, "some data").unwrap(); +/// let result = catch_unwind(AssertUnwindSafe(|| { +/// stream.flush().unwrap() +/// })); +/// assert!(result.is_err()); +/// let (recovered_writer, buffered_data) = stream.into_parts(); +/// assert!(matches!(recovered_writer, PanickingWriter)); +/// assert_eq!(buffered_data.unwrap_err().into_inner(), b"some data"); +/// ``` +pub struct WriterPanicked { + buf: Vec, +} + +impl WriterPanicked { + /// Returns the perhaps-unwritten data. Some of this data may have been written by the + /// panicking call(s) to the underlying writer, so simply writing it again is not a good idea. + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "bufwriter_into_parts", since = "1.56.0")] + pub fn into_inner(self) -> Vec { + self.buf + } +} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +impl error::Error for WriterPanicked {} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +impl fmt::Display for WriterPanicked { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + "BufWriter inner writer panicked, what data remains unwritten is not known".fmt(f) + } +} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +impl fmt::Debug for WriterPanicked { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("WriterPanicked") + .field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity())) + .finish() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Write for BufWriter { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + // Use < instead of <= to avoid a needless trip through the buffer in some cases. + // See `write_cold` for details. + if buf.len() < self.spare_capacity() { + // SAFETY: safe by above conditional. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(buf.len()) + } else { + self.write_cold(buf) + } + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { + // Use < instead of <= to avoid a needless trip through the buffer in some cases. + // See `write_all_cold` for details. + if buf.len() < self.spare_capacity() { + // SAFETY: safe by above conditional. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(()) + } else { + self.write_all_cold(buf) + } + } + + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + // FIXME: Consider applying `#[inline]` / `#[inline(never)]` optimizations already applied + // to `write` and `write_all`. The performance benefits can be significant. See #79930. + if self.get_ref().is_write_vectored() { + // We have to handle the possibility that the total length of the buffers overflows + // `usize` (even though this can only happen if multiple `IoSlice`s reference the + // same underlying buffer, as otherwise the buffers wouldn't fit in memory). If the + // computation overflows, then surely the input cannot fit in our buffer, so we forward + // to the inner writer's `write_vectored` method to let it handle it appropriately. + let mut saturated_total_len: usize = 0; + + for buf in bufs { + saturated_total_len = saturated_total_len.saturating_add(buf.len()); + + if saturated_total_len > self.spare_capacity() && !self.buf.is_empty() { + // Flush if the total length of the input exceeds our buffer's spare capacity. + // If we would have overflowed, this condition also holds, and we need to flush. + self.flush_buf()?; + } + + if saturated_total_len >= self.buf.capacity() { + // Forward to our inner writer if the total length of the input is greater than or + // equal to our buffer capacity. If we would have overflowed, this condition also + // holds, and we punt to the inner writer. + self.panicked = true; + let r = self.get_mut().write_vectored(bufs); + self.panicked = false; + return r; + } + } + + // `saturated_total_len < self.buf.capacity()` implies that we did not saturate. + + // SAFETY: We checked whether or not the spare capacity was large enough above. If + // it was, then we're safe already. If it wasn't, we flushed, making sufficient + // room for any input <= the buffer size, which includes this input. + unsafe { + bufs.iter().for_each(|b| self.write_to_buffer_unchecked(b)); + }; + + Ok(saturated_total_len) + } else { + let mut iter = bufs.iter(); + let mut total_written = if let Some(buf) = iter.by_ref().find(|&buf| !buf.is_empty()) { + // This is the first non-empty slice to write, so if it does + // not fit in the buffer, we still get to flush and proceed. + if buf.len() > self.spare_capacity() { + self.flush_buf()?; + } + if buf.len() >= self.buf.capacity() { + // The slice is at least as large as the buffering capacity, + // so it's better to write it directly, bypassing the buffer. + self.panicked = true; + let r = self.get_mut().write(buf); + self.panicked = false; + return r; + } else { + // SAFETY: We checked whether or not the spare capacity was large enough above. + // If it was, then we're safe already. If it wasn't, we flushed, making + // sufficient room for any input <= the buffer size, which includes this input. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + buf.len() + } + } else { + return Ok(0); + }; + debug_assert!(total_written != 0); + for buf in iter { + if buf.len() <= self.spare_capacity() { + // SAFETY: safe by above conditional. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + // This cannot overflow `usize`. If we are here, we've written all of the bytes + // so far to our buffer, and we've ensured that we never exceed the buffer's + // capacity. Therefore, `total_written` <= `self.buf.capacity()` <= `usize::MAX`. + total_written += buf.len(); + } else { + break; + } + } + Ok(total_written) + } + } + + fn is_write_vectored(&self) -> bool { + true + } + + fn flush(&mut self) -> io::Result<()> { + self.flush_buf().and_then(|()| self.get_mut().flush()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for BufWriter +where + W: fmt::Debug, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("BufWriter") + .field("writer", &&self.inner) + .field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity())) + .finish() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Seek for BufWriter { + /// Seek to the offset, in bytes, in the underlying writer. + /// + /// Seeking always writes out the internal buffer before seeking. + fn seek(&mut self, pos: SeekFrom) -> io::Result { + self.flush_buf()?; + self.get_mut().seek(pos) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Drop for BufWriter { + fn drop(&mut self) { + if !self.panicked { + // dtors should not panic, so we ignore a failed flush + let _r = self.flush_buf(); + } + } +} diff --git a/crates/std/src/io/buffered/linewriter.rs b/crates/std/src/io/buffered/linewriter.rs new file mode 100644 index 0000000..cc6921b --- /dev/null +++ b/crates/std/src/io/buffered/linewriter.rs @@ -0,0 +1,235 @@ +use crate::fmt; +use crate::io::buffered::LineWriterShim; +use crate::io::{self, BufWriter, IntoInnerError, IoSlice, Write}; + +/// Wraps a writer and buffers output to it, flushing whenever a newline +/// (`0x0a`, `'\n'`) is detected. +/// +/// The [`BufWriter`] struct wraps a writer and buffers its output. +/// But it only does this batched write when it goes out of scope, or when the +/// internal buffer is full. Sometimes, you'd prefer to write each line as it's +/// completed, rather than the entire buffer at once. Enter `LineWriter`. It +/// does exactly that. +/// +/// Like [`BufWriter`], a `LineWriter`’s buffer will also be flushed when the +/// `LineWriter` goes out of scope or when its internal buffer is full. +/// +/// If there's still a partial line in the buffer when the `LineWriter` is +/// dropped, it will flush those contents. +/// +/// # Examples +/// +/// We can use `LineWriter` to write one line at a time, significantly +/// reducing the number of actual writes to the file. +/// +/// ```no_run +/// use std::fs::{self, File}; +/// use std::io::prelude::*; +/// use std::io::LineWriter; +/// +/// fn main() -> std::io::Result<()> { +/// let road_not_taken = b"I shall be telling this with a sigh +/// Somewhere ages and ages hence: +/// Two roads diverged in a wood, and I - +/// I took the one less traveled by, +/// And that has made all the difference."; +/// +/// let file = File::create("poem.txt")?; +/// let mut file = LineWriter::new(file); +/// +/// file.write_all(b"I shall be telling this with a sigh")?; +/// +/// // No bytes are written until a newline is encountered (or +/// // the internal buffer is filled). +/// assert_eq!(fs::read_to_string("poem.txt")?, ""); +/// file.write_all(b"\n")?; +/// assert_eq!( +/// fs::read_to_string("poem.txt")?, +/// "I shall be telling this with a sigh\n", +/// ); +/// +/// // Write the rest of the poem. +/// file.write_all(b"Somewhere ages and ages hence: +/// Two roads diverged in a wood, and I - +/// I took the one less traveled by, +/// And that has made all the difference.")?; +/// +/// // The last line of the poem doesn't end in a newline, so +/// // we have to flush or drop the `LineWriter` to finish +/// // writing. +/// file.flush()?; +/// +/// // Confirm the whole poem was written. +/// assert_eq!(fs::read("poem.txt")?, &road_not_taken[..]); +/// Ok(()) +/// } +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +pub struct LineWriter { + inner: BufWriter, +} + +impl LineWriter { + /// Creates a new `LineWriter`. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use std::io::LineWriter; + /// + /// fn main() -> std::io::Result<()> { + /// let file = File::create("poem.txt")?; + /// let file = LineWriter::new(file); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(inner: W) -> LineWriter { + // Lines typically aren't that long, don't use a giant buffer + LineWriter::with_capacity(1024, inner) + } + + /// Creates a new `LineWriter` with at least the specified capacity for the + /// internal buffer. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use std::io::LineWriter; + /// + /// fn main() -> std::io::Result<()> { + /// let file = File::create("poem.txt")?; + /// let file = LineWriter::with_capacity(100, file); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize, inner: W) -> LineWriter { + LineWriter { inner: BufWriter::with_capacity(capacity, inner) } + } + + /// Gets a mutable reference to the underlying writer. + /// + /// Caution must be taken when calling methods on the mutable reference + /// returned as extra writes could corrupt the output stream. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use std::io::LineWriter; + /// + /// fn main() -> std::io::Result<()> { + /// let file = File::create("poem.txt")?; + /// let mut file = LineWriter::new(file); + /// + /// // we can use reference just like file + /// let reference = file.get_mut(); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_mut(&mut self) -> &mut W { + self.inner.get_mut() + } + + /// Unwraps this `LineWriter`, returning the underlying writer. + /// + /// The internal buffer is written out before returning the writer. + /// + /// # Errors + /// + /// An [`Err`] will be returned if an error occurs while flushing the buffer. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use std::io::LineWriter; + /// + /// fn main() -> std::io::Result<()> { + /// let file = File::create("poem.txt")?; + /// + /// let writer: LineWriter = LineWriter::new(file); + /// + /// let file: File = writer.into_inner()?; + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_inner(self) -> Result>> { + self.inner.into_inner().map_err(|err| err.new_wrapped(|inner| LineWriter { inner })) + } +} + +impl LineWriter { + /// Gets a reference to the underlying writer. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use std::io::LineWriter; + /// + /// fn main() -> std::io::Result<()> { + /// let file = File::create("poem.txt")?; + /// let file = LineWriter::new(file); + /// + /// let reference = file.get_ref(); + /// Ok(()) + /// } + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_ref(&self) -> &W { + self.inner.get_ref() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Write for LineWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + LineWriterShim::new(&mut self.inner).write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } + + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + LineWriterShim::new(&mut self.inner).write_vectored(bufs) + } + + fn is_write_vectored(&self) -> bool { + self.inner.is_write_vectored() + } + + fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { + LineWriterShim::new(&mut self.inner).write_all(buf) + } + + fn write_all_vectored(&mut self, bufs: &mut [IoSlice<'_>]) -> io::Result<()> { + LineWriterShim::new(&mut self.inner).write_all_vectored(bufs) + } + + fn write_fmt(&mut self, fmt: fmt::Arguments<'_>) -> io::Result<()> { + LineWriterShim::new(&mut self.inner).write_fmt(fmt) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for LineWriter +where + W: fmt::Debug, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("LineWriter") + .field("writer", &self.get_ref()) + .field( + "buffer", + &format_args!("{}/{}", self.inner.buffer().len(), self.inner.capacity()), + ) + .finish_non_exhaustive() + } +} diff --git a/crates/std/src/io/buffered/linewritershim.rs b/crates/std/src/io/buffered/linewritershim.rs new file mode 100644 index 0000000..967e248 --- /dev/null +++ b/crates/std/src/io/buffered/linewritershim.rs @@ -0,0 +1,297 @@ +use core::slice::memchr; + +use crate::io::{self, BufWriter, IoSlice, Write}; + +/// Private helper struct for implementing the line-buffered writing logic. +/// +/// This shim temporarily wraps a BufWriter, and uses its internals to +/// implement a line-buffered writer (specifically by using the internal +/// methods like write_to_buf and flush_buf). In this way, a more +/// efficient abstraction can be created than one that only had access to +/// `write` and `flush`, without needlessly duplicating a lot of the +/// implementation details of BufWriter. This also allows existing +/// `BufWriters` to be temporarily given line-buffering logic; this is what +/// enables Stdout to be alternately in line-buffered or block-buffered mode. +#[derive(Debug)] +pub struct LineWriterShim<'a, W: ?Sized + Write> { + buffer: &'a mut BufWriter, +} + +impl<'a, W: ?Sized + Write> LineWriterShim<'a, W> { + pub fn new(buffer: &'a mut BufWriter) -> Self { + Self { buffer } + } + + /// Gets a reference to the inner writer (that is, the writer + /// wrapped by the BufWriter). + fn inner(&self) -> &W { + self.buffer.get_ref() + } + + /// Gets a mutable reference to the inner writer (that is, the writer + /// wrapped by the BufWriter). Be careful with this writer, as writes to + /// it will bypass the buffer. + fn inner_mut(&mut self) -> &mut W { + self.buffer.get_mut() + } + + /// Gets the content currently buffered in self.buffer + fn buffered(&self) -> &[u8] { + self.buffer.buffer() + } + + /// Flushes the buffer iff the last byte is a newline (indicating that an + /// earlier write only succeeded partially, and we want to retry flushing + /// the buffered line before continuing with a subsequent write). + fn flush_if_completed_line(&mut self) -> io::Result<()> { + match self.buffered().last().copied() { + Some(b'\n') => self.buffer.flush_buf(), + _ => Ok(()), + } + } +} + +impl<'a, W: ?Sized + Write> Write for LineWriterShim<'a, W> { + /// Writes some data into this BufWriter with line buffering. + /// + /// This means that, if any newlines are present in the data, the data up to + /// the last newline is sent directly to the underlying writer, and data + /// after it is buffered. Returns the number of bytes written. + /// + /// This function operates on a "best effort basis"; in keeping with the + /// convention of `Write::write`, it makes at most one attempt to write + /// new data to the underlying writer. If that write only reports a partial + /// success, the remaining data will be buffered. + /// + /// Because this function attempts to send completed lines to the underlying + /// writer, it will also flush the existing buffer if it ends with a + /// newline, even if the incoming data does not contain any newlines. + fn write(&mut self, buf: &[u8]) -> io::Result { + let newline_idx = match memchr::memrchr(b'\n', buf) { + // If there are no new newlines (that is, if this write is less than + // one line), just do a regular buffered write (which may flush if + // we exceed the inner buffer's size) + None => { + self.flush_if_completed_line()?; + return self.buffer.write(buf); + } + // Otherwise, arrange for the lines to be written directly to the + // inner writer. + Some(newline_idx) => newline_idx + 1, + }; + + // Flush existing content to prepare for our write. We have to do this + // before attempting to write `buf` in order to maintain consistency; + // if we add `buf` to the buffer then try to flush it all at once, + // we're obligated to return Ok(), which would mean suppressing any + // errors that occur during flush. + self.buffer.flush_buf()?; + + // This is what we're going to try to write directly to the inner + // writer. The rest will be buffered, if nothing goes wrong. + let lines = &buf[..newline_idx]; + + // Write `lines` directly to the inner writer. In keeping with the + // `write` convention, make at most one attempt to add new (unbuffered) + // data. Because this write doesn't touch the BufWriter state directly, + // and the buffer is known to be empty, we don't need to worry about + // self.buffer.panicked here. + let flushed = self.inner_mut().write(lines)?; + + // If buffer returns Ok(0), propagate that to the caller without + // doing additional buffering; otherwise we're just guaranteeing + // an "ErrorKind::WriteZero" later. + if flushed == 0 { + return Ok(0); + } + + // Now that the write has succeeded, buffer the rest (or as much of + // the rest as possible). If there were any unwritten newlines, we + // only buffer out to the last unwritten newline that fits in the + // buffer; this helps prevent flushing partial lines on subsequent + // calls to LineWriterShim::write. + + // Handle the cases in order of most-common to least-common, under + // the presumption that most writes succeed in totality, and that most + // writes are smaller than the buffer. + // - Is this a partial line (ie, no newlines left in the unwritten tail) + // - If not, does the data out to the last unwritten newline fit in + // the buffer? + // - If not, scan for the last newline that *does* fit in the buffer + let tail = if flushed >= newline_idx { + let tail = &buf[flushed..]; + // Avoid unnecessary short writes by not splitting the remaining + // bytes if they're larger than the buffer. + // They can be written in full by the next call to write. + if tail.len() >= self.buffer.capacity() { + return Ok(flushed); + } + tail + } else if newline_idx - flushed <= self.buffer.capacity() { + &buf[flushed..newline_idx] + } else { + let scan_area = &buf[flushed..]; + let scan_area = &scan_area[..self.buffer.capacity()]; + match memchr::memrchr(b'\n', scan_area) { + Some(newline_idx) => &scan_area[..newline_idx + 1], + None => scan_area, + } + }; + + let buffered = self.buffer.write_to_buf(tail); + Ok(flushed + buffered) + } + + fn flush(&mut self) -> io::Result<()> { + self.buffer.flush() + } + + /// Writes some vectored data into this BufWriter with line buffering. + /// + /// This means that, if any newlines are present in the data, the data up to + /// and including the buffer containing the last newline is sent directly to + /// the inner writer, and the data after it is buffered. Returns the number + /// of bytes written. + /// + /// This function operates on a "best effort basis"; in keeping with the + /// convention of `Write::write`, it makes at most one attempt to write + /// new data to the underlying writer. + /// + /// Because this function attempts to send completed lines to the underlying + /// writer, it will also flush the existing buffer if it contains any + /// newlines. + /// + /// Because sorting through an array of `IoSlice` can be a bit convoluted, + /// This method differs from write in the following ways: + /// + /// - It attempts to write the full content of all the buffers up to and + /// including the one containing the last newline. This means that it + /// may attempt to write a partial line, that buffer has data past the + /// newline. + /// - If the write only reports partial success, it does not attempt to + /// find the precise location of the written bytes and buffer the rest. + /// + /// If the underlying vector doesn't support vectored writing, we instead + /// simply write the first non-empty buffer with `write`. This way, we + /// get the benefits of more granular partial-line handling without losing + /// anything in efficiency + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + // If there's no specialized behavior for write_vectored, just use + // write. This has the benefit of more granular partial-line handling. + if !self.is_write_vectored() { + return match bufs.iter().find(|buf| !buf.is_empty()) { + Some(buf) => self.write(buf), + None => Ok(0), + }; + } + + // Find the buffer containing the last newline + // FIXME: This is overly slow if there are very many bufs and none contain + // newlines. e.g. writev() on Linux only writes up to 1024 slices, so + // scanning the rest is wasted effort. This makes write_all_vectored() + // quadratic. + let last_newline_buf_idx = bufs + .iter() + .enumerate() + .rev() + .find_map(|(i, buf)| memchr::memchr(b'\n', buf).map(|_| i)); + + // If there are no new newlines (that is, if this write is less than + // one line), just do a regular buffered write + let last_newline_buf_idx = match last_newline_buf_idx { + // No newlines; just do a normal buffered write + None => { + self.flush_if_completed_line()?; + return self.buffer.write_vectored(bufs); + } + Some(i) => i, + }; + + // Flush existing content to prepare for our write + self.buffer.flush_buf()?; + + // This is what we're going to try to write directly to the inner + // writer. The rest will be buffered, if nothing goes wrong. + let (lines, tail) = bufs.split_at(last_newline_buf_idx + 1); + + // Write `lines` directly to the inner writer. In keeping with the + // `write` convention, make at most one attempt to add new (unbuffered) + // data. Because this write doesn't touch the BufWriter state directly, + // and the buffer is known to be empty, we don't need to worry about + // self.panicked here. + let flushed = self.inner_mut().write_vectored(lines)?; + + // If inner returns Ok(0), propagate that to the caller without + // doing additional buffering; otherwise we're just guaranteeing + // an "ErrorKind::WriteZero" later. + if flushed == 0 { + return Ok(0); + } + + // Don't try to reconstruct the exact amount written; just bail + // in the event of a partial write + let mut lines_len: usize = 0; + for buf in lines { + // With overlapping/duplicate slices the total length may in theory + // exceed usize::MAX + lines_len = lines_len.saturating_add(buf.len()); + if flushed < lines_len { + return Ok(flushed); + } + } + + // Now that the write has succeeded, buffer the rest (or as much of the + // rest as possible) + let buffered: usize = tail + .iter() + .filter(|buf| !buf.is_empty()) + .map(|buf| self.buffer.write_to_buf(buf)) + .take_while(|&n| n > 0) + .sum(); + + Ok(flushed + buffered) + } + + fn is_write_vectored(&self) -> bool { + self.inner().is_write_vectored() + } + + /// Writes some data into this BufWriter with line buffering. + /// + /// This means that, if any newlines are present in the data, the data up to + /// the last newline is sent directly to the underlying writer, and data + /// after it is buffered. + /// + /// Because this function attempts to send completed lines to the underlying + /// writer, it will also flush the existing buffer if it contains any + /// newlines, even if the incoming data does not contain any newlines. + fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { + match memchr::memrchr(b'\n', buf) { + // If there are no new newlines (that is, if this write is less than + // one line), just do a regular buffered write (which may flush if + // we exceed the inner buffer's size) + None => { + self.flush_if_completed_line()?; + self.buffer.write_all(buf) + } + Some(newline_idx) => { + let (lines, tail) = buf.split_at(newline_idx + 1); + + if self.buffered().is_empty() { + self.inner_mut().write_all(lines)?; + } else { + // If there is any buffered data, we add the incoming lines + // to that buffer before flushing, which saves us at least + // one write call. We can't really do this with `write`, + // since we can't do this *and* not suppress errors *and* + // report a consistent state to the caller in a return + // value, but here in write_all it's fine. + self.buffer.write_all(lines)?; + self.buffer.flush_buf()?; + } + + self.buffer.write_all(tail) + } + } + } +} diff --git a/crates/std/src/io/buffered/mod.rs b/crates/std/src/io/buffered/mod.rs new file mode 100644 index 0000000..e36f2d9 --- /dev/null +++ b/crates/std/src/io/buffered/mod.rs @@ -0,0 +1,189 @@ +//! Buffering wrappers for I/O traits + +mod bufreader; +mod bufwriter; +mod linewriter; +mod linewritershim; + +#[cfg(test)] +mod tests; + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +pub use bufwriter::WriterPanicked; +use linewritershim::LineWriterShim; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use self::{bufreader::BufReader, bufwriter::BufWriter, linewriter::LineWriter}; +use crate::io::Error; +use crate::{error, fmt}; + +/// An error returned by [`BufWriter::into_inner`] which combines an error that +/// happened while writing out the buffer, and the buffered writer object +/// which may be used to recover from the condition. +/// +/// # Examples +/// +/// ```no_run +/// use std::io::BufWriter; +/// use std::net::TcpStream; +/// +/// let mut stream = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); +/// +/// // do stuff with the stream +/// +/// // we want to get our `TcpStream` back, so let's try: +/// +/// let stream = match stream.into_inner() { +/// Ok(s) => s, +/// Err(e) => { +/// // Here, e is an IntoInnerError +/// panic!("An error occurred"); +/// } +/// }; +/// ``` +#[derive(Debug)] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct IntoInnerError(W, Error); + +impl IntoInnerError { + /// Constructs a new IntoInnerError + fn new(writer: W, error: Error) -> Self { + Self(writer, error) + } + + /// Helper to construct a new IntoInnerError; intended to help with + /// adapters that wrap other adapters + fn new_wrapped(self, f: impl FnOnce(W) -> W2) -> IntoInnerError { + let Self(writer, error) = self; + IntoInnerError::new(f(writer), error) + } + + /// Returns the error which caused the call to [`BufWriter::into_inner()`] + /// to fail. + /// + /// This error was returned when attempting to write the internal buffer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut stream = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // do stuff with the stream + /// + /// // we want to get our `TcpStream` back, so let's try: + /// + /// let stream = match stream.into_inner() { + /// Ok(s) => s, + /// Err(e) => { + /// // Here, e is an IntoInnerError, let's log the inner error. + /// // + /// // We'll just 'log' to stdout for this example. + /// println!("{}", e.error()); + /// + /// panic!("An unexpected error occurred."); + /// } + /// }; + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn error(&self) -> &Error { + &self.1 + } + + /// Returns the buffered writer instance which generated the error. + /// + /// The returned object can be used for error recovery, such as + /// re-inspecting the buffer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut stream = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // do stuff with the stream + /// + /// // we want to get our `TcpStream` back, so let's try: + /// + /// let stream = match stream.into_inner() { + /// Ok(s) => s, + /// Err(e) => { + /// // Here, e is an IntoInnerError, let's re-examine the buffer: + /// let buffer = e.into_inner(); + /// + /// // do stuff to try to recover + /// + /// // afterwards, let's just return the stream + /// buffer.into_inner().unwrap() + /// } + /// }; + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_inner(self) -> W { + self.0 + } + + /// Consumes the [`IntoInnerError`] and returns the error which caused the call to + /// [`BufWriter::into_inner()`] to fail. Unlike `error`, this can be used to + /// obtain ownership of the underlying error. + /// + /// # Example + /// ``` + /// use std::io::{BufWriter, ErrorKind, Write}; + /// + /// let mut not_enough_space = [0u8; 10]; + /// let mut stream = BufWriter::new(not_enough_space.as_mut()); + /// write!(stream, "this cannot be actually written").unwrap(); + /// let into_inner_err = stream.into_inner().expect_err("now we discover it's too small"); + /// let err = into_inner_err.into_error(); + /// assert_eq!(err.kind(), ErrorKind::WriteZero); + /// ``` + #[stable(feature = "io_into_inner_error_parts", since = "1.55.0")] + pub fn into_error(self) -> Error { + self.1 + } + + /// Consumes the [`IntoInnerError`] and returns the error which caused the call to + /// [`BufWriter::into_inner()`] to fail, and the underlying writer. + /// + /// This can be used to simply obtain ownership of the underlying error; it can also be used for + /// advanced error recovery. + /// + /// # Example + /// ``` + /// use std::io::{BufWriter, ErrorKind, Write}; + /// + /// let mut not_enough_space = [0u8; 10]; + /// let mut stream = BufWriter::new(not_enough_space.as_mut()); + /// write!(stream, "this cannot be actually written").unwrap(); + /// let into_inner_err = stream.into_inner().expect_err("now we discover it's too small"); + /// let (err, recovered_writer) = into_inner_err.into_parts(); + /// assert_eq!(err.kind(), ErrorKind::WriteZero); + /// assert_eq!(recovered_writer.buffer(), b"t be actually written"); + /// ``` + #[stable(feature = "io_into_inner_error_parts", since = "1.55.0")] + pub fn into_parts(self) -> (Error, W) { + (self.1, self.0) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl From> for Error { + fn from(iie: IntoInnerError) -> Error { + iie.1 + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl error::Error for IntoInnerError {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for IntoInnerError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.error().fmt(f) + } +} diff --git a/crates/std/src/io/copy.rs b/crates/std/src/io/copy.rs new file mode 100644 index 0000000..b28fea8 --- /dev/null +++ b/crates/std/src/io/copy.rs @@ -0,0 +1,297 @@ +use super::{BorrowedBuf, BufReader, BufWriter, DEFAULT_BUF_SIZE, Read, Result, Write}; +use crate::alloc::Allocator; +use crate::cmp; +use alloc_crate::collections::VecDeque; +use crate::io::IoSlice; +use crate::mem::MaybeUninit; +use crate::sys::io::{CopyState, kernel_copy}; + +#[cfg(test)] +mod tests; + +/// Copies the entire contents of a reader into a writer. +/// +/// This function will continuously read data from `reader` and then +/// write it into `writer` in a streaming fashion until `reader` +/// returns EOF. +/// +/// On success, the total number of bytes that were copied from +/// `reader` to `writer` is returned. +/// +/// If you want to copy the contents of one file to another and you’re +/// working with filesystem paths, see the [`fs::copy`] function. +/// +/// [`fs::copy`]: crate::fs::copy +/// +/// # Errors +/// +/// This function will return an error immediately if any call to [`read`] or +/// [`write`] returns an error. All instances of [`ErrorKind::Interrupted`] are +/// handled by this function and the underlying operation is retried. +/// +/// [`read`]: Read::read +/// [`write`]: Write::write +/// [`ErrorKind::Interrupted`]: crate::io::ErrorKind::Interrupted +/// +/// # Examples +/// +/// ``` +/// use std::io; +/// +/// fn main() -> io::Result<()> { +/// let mut reader: &[u8] = b"hello"; +/// let mut writer: Vec = vec![]; +/// +/// io::copy(&mut reader, &mut writer)?; +/// +/// assert_eq!(&b"hello"[..], &writer[..]); +/// Ok(()) +/// } +/// ``` +/// +/// # Platform-specific behavior +/// +/// On Linux (including Android), this function uses `copy_file_range(2)`, +/// `sendfile(2)` or `splice(2)` syscalls to move data directly between file +/// descriptors if possible. +/// +/// Note that platform-specific behavior [may change in the future][changes]. +/// +/// [changes]: crate::io#platform-specific-behavior +#[stable(feature = "rust1", since = "1.0.0")] +pub fn copy(reader: &mut R, writer: &mut W) -> Result +where + R: Read, + W: Write, +{ + match kernel_copy(reader, writer)? { + CopyState::Ended(copied) => Ok(copied), + CopyState::Fallback(copied) => { + generic_copy(reader, writer).map(|additional| copied + additional) + } + } +} + +/// The userspace read-write-loop implementation of `io::copy` that is used when +/// OS-specific specializations for copy offloading are not available or not applicable. +fn generic_copy(reader: &mut R, writer: &mut W) -> Result +where + R: Read, + W: Write, +{ + let read_buf = BufferedReaderSpec::buffer_size(reader); + let write_buf = BufferedWriterSpec::buffer_size(writer); + + if read_buf >= DEFAULT_BUF_SIZE && read_buf >= write_buf { + return BufferedReaderSpec::copy_to(reader, writer); + } + + BufferedWriterSpec::copy_from(writer, reader) +} + +/// Specialization of the read-write loop that reuses the internal +/// buffer of a BufReader. If there's no buffer then the writer side +/// should be used instead. +trait BufferedReaderSpec { + fn buffer_size(&self) -> usize; + + fn copy_to(&mut self, to: &mut (impl Write + ?Sized)) -> Result; +} + +impl BufferedReaderSpec for T +where + Self: Read, + T: ?Sized, +{ + #[inline] + default fn buffer_size(&self) -> usize { + 0 + } + + default fn copy_to(&mut self, _to: &mut (impl Write + ?Sized)) -> Result { + unreachable!("only called from specializations") + } +} + +impl BufferedReaderSpec for &[u8] { + fn buffer_size(&self) -> usize { + // prefer this specialization since the source "buffer" is all we'll ever need, + // even if it's small + usize::MAX + } + + fn copy_to(&mut self, to: &mut (impl Write + ?Sized)) -> Result { + let len = self.len(); + to.write_all(self)?; + *self = &self[len..]; + Ok(len as u64) + } +} + +impl BufferedReaderSpec for VecDeque { + fn buffer_size(&self) -> usize { + // prefer this specialization since the source "buffer" is all we'll ever need, + // even if it's small + usize::MAX + } + + fn copy_to(&mut self, to: &mut (impl Write + ?Sized)) -> Result { + let len = self.len(); + let (front, back) = self.as_slices(); + let bufs = &mut [IoSlice::new(front), IoSlice::new(back)]; + to.write_all_vectored(bufs)?; + self.clear(); + Ok(len as u64) + } +} + +impl BufferedReaderSpec for BufReader +where + Self: Read, + I: ?Sized, +{ + fn buffer_size(&self) -> usize { + self.capacity() + } + + fn copy_to(&mut self, to: &mut (impl Write + ?Sized)) -> Result { + let mut len = 0; + + loop { + // Hack: this relies on `impl Read for BufReader` always calling fill_buf + // if the buffer is empty, even for empty slices. + // It can't be called directly here since specialization prevents us + // from adding I: Read + match self.read(&mut []) { + Ok(_) => {} + Err(e) if e.is_interrupted() => continue, + Err(e) => return Err(e), + } + let buf = self.buffer(); + if self.buffer().len() == 0 { + return Ok(len); + } + + // In case the writer side is a BufWriter then its write_all + // implements an optimization that passes through large + // buffers to the underlying writer. That code path is #[cold] + // but we're still avoiding redundant memcopies when doing + // a copy between buffered inputs and outputs. + to.write_all(buf)?; + len += buf.len() as u64; + self.discard_buffer(); + } + } +} + +/// Specialization of the read-write loop that either uses a stack buffer +/// or reuses the internal buffer of a BufWriter +trait BufferedWriterSpec: Write { + fn buffer_size(&self) -> usize; + + fn copy_from(&mut self, reader: &mut R) -> Result; +} + +impl BufferedWriterSpec for W { + #[inline] + default fn buffer_size(&self) -> usize { + 0 + } + + default fn copy_from(&mut self, reader: &mut R) -> Result { + stack_buffer_copy(reader, self) + } +} + +impl BufferedWriterSpec for BufWriter { + fn buffer_size(&self) -> usize { + self.capacity() + } + + fn copy_from(&mut self, reader: &mut R) -> Result { + if self.capacity() < DEFAULT_BUF_SIZE { + return stack_buffer_copy(reader, self); + } + + let mut len = 0; + let mut init = 0; + + loop { + let buf = self.buffer_mut(); + let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into(); + + unsafe { + // SAFETY: init is either 0 or the init_len from the previous iteration. + read_buf.set_init(init); + } + + if read_buf.capacity() >= DEFAULT_BUF_SIZE { + let mut cursor = read_buf.unfilled(); + match reader.read_buf(cursor.reborrow()) { + Ok(()) => { + let bytes_read = cursor.written(); + + if bytes_read == 0 { + return Ok(len); + } + + init = read_buf.init_len() - bytes_read; + len += bytes_read as u64; + + // SAFETY: BorrowedBuf guarantees all of its filled bytes are init + unsafe { buf.set_len(buf.len() + bytes_read) }; + + // Read again if the buffer still has enough capacity, as BufWriter itself would do + // This will occur if the reader returns short reads + } + Err(ref e) if e.is_interrupted() => {} + Err(e) => return Err(e), + } + } else { + // All the bytes that were already in the buffer are initialized, + // treat them as such when the buffer is flushed. + init += buf.len(); + + self.flush_buf()?; + } + } + } +} + +impl BufferedWriterSpec for Vec { + fn buffer_size(&self) -> usize { + cmp::max(DEFAULT_BUF_SIZE, self.capacity() - self.len()) + } + + fn copy_from(&mut self, reader: &mut R) -> Result { + reader.read_to_end(self).map(|bytes| u64::try_from(bytes).expect("usize overflowed u64")) + } +} + +fn stack_buffer_copy( + reader: &mut R, + writer: &mut W, +) -> Result { + let buf: &mut [_] = &mut [MaybeUninit::uninit(); DEFAULT_BUF_SIZE]; + let mut buf: BorrowedBuf<'_> = buf.into(); + + let mut len = 0; + + loop { + match reader.read_buf(buf.unfilled()) { + Ok(()) => {} + Err(e) if e.is_interrupted() => continue, + Err(e) => return Err(e), + }; + + if buf.filled().is_empty() { + break; + } + + len += buf.filled().len() as u64; + writer.write_all(buf.filled())?; + buf.clear(); + } + + Ok(len) +} diff --git a/crates/std/src/io/cursor.rs b/crates/std/src/io/cursor.rs index 9067b86..d7131e2 100644 --- a/crates/std/src/io/cursor.rs +++ b/crates/std/src/io/cursor.rs @@ -1,5 +1,5 @@ -// #[cfg(test)] -// mod tests; +#[cfg(test)] +mod tests; use crate::alloc::Allocator; use crate::cmp; diff --git a/crates/std/src/io/cursor/tests.rs b/crates/std/src/io/cursor/tests.rs new file mode 100644 index 0000000..d7c203c --- /dev/null +++ b/crates/std/src/io/cursor/tests.rs @@ -0,0 +1,567 @@ +use crate::io::prelude::*; +use crate::io::{Cursor, IoSlice, IoSliceMut, SeekFrom}; + +#[test] +fn test_vec_writer() { + let mut writer = Vec::new(); + assert_eq!(writer.write(&[0]).unwrap(), 1); + assert_eq!(writer.write(&[1, 2, 3]).unwrap(), 3); + assert_eq!(writer.write(&[4, 5, 6, 7]).unwrap(), 4); + assert_eq!( + writer + .write_vectored(&[IoSlice::new(&[]), IoSlice::new(&[8, 9]), IoSlice::new(&[10])],) + .unwrap(), + 3 + ); + let b: &[_] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + assert_eq!(writer, b); +} + +#[test] +fn test_mem_writer() { + let mut writer = Cursor::new(Vec::new()); + writer.set_position(10); + assert_eq!(writer.write(&[0]).unwrap(), 1); + assert_eq!(writer.write(&[1, 2, 3]).unwrap(), 3); + assert_eq!(writer.write(&[4, 5, 6, 7]).unwrap(), 4); + assert_eq!( + writer + .write_vectored(&[IoSlice::new(&[]), IoSlice::new(&[8, 9]), IoSlice::new(&[10])],) + .unwrap(), + 3 + ); + let b: &[_] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + assert_eq!(&writer.get_ref()[..10], &[0; 10]); + assert_eq!(&writer.get_ref()[10..], b); +} + +#[test] +fn test_mem_writer_preallocated() { + let mut writer = Cursor::new(vec![0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10]); + assert_eq!(writer.write(&[0]).unwrap(), 1); + assert_eq!(writer.write(&[1, 2, 3]).unwrap(), 3); + assert_eq!(writer.write(&[4, 5, 6, 7]).unwrap(), 4); + let b: &[_] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + assert_eq!(&writer.get_ref()[..], b); +} + +#[test] +fn test_mem_mut_writer() { + let mut vec = Vec::new(); + let mut writer = Cursor::new(&mut vec); + assert_eq!(writer.write(&[0]).unwrap(), 1); + assert_eq!(writer.write(&[1, 2, 3]).unwrap(), 3); + assert_eq!(writer.write(&[4, 5, 6, 7]).unwrap(), 4); + assert_eq!( + writer + .write_vectored(&[IoSlice::new(&[]), IoSlice::new(&[8, 9]), IoSlice::new(&[10])],) + .unwrap(), + 3 + ); + let b: &[_] = &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + assert_eq!(&writer.get_ref()[..], b); +} + +fn test_slice_writer(writer: &mut Cursor) +where + T: AsRef<[u8]>, + Cursor: Write, +{ + assert_eq!(writer.position(), 0); + assert_eq!(writer.write(&[0]).unwrap(), 1); + assert_eq!(writer.position(), 1); + assert_eq!(writer.write(&[1, 2, 3]).unwrap(), 3); + assert_eq!(writer.write(&[4, 5, 6, 7]).unwrap(), 4); + assert_eq!(writer.position(), 8); + assert_eq!(writer.write(&[]).unwrap(), 0); + assert_eq!(writer.position(), 8); + + assert_eq!(writer.write(&[8, 9]).unwrap(), 1); + assert_eq!(writer.write(&[10]).unwrap(), 0); + let b: &[_] = &[0, 1, 2, 3, 4, 5, 6, 7, 8]; + assert_eq!(writer.get_ref().as_ref(), b); +} + +fn test_slice_writer_vectored(writer: &mut Cursor) +where + T: AsRef<[u8]>, + Cursor: Write, +{ + assert_eq!(writer.position(), 0); + assert_eq!(writer.write_vectored(&[IoSlice::new(&[0])]).unwrap(), 1); + assert_eq!(writer.position(), 1); + assert_eq!( + writer.write_vectored(&[IoSlice::new(&[1, 2, 3]), IoSlice::new(&[4, 5, 6, 7]),]).unwrap(), + 7, + ); + assert_eq!(writer.position(), 8); + assert_eq!(writer.write_vectored(&[]).unwrap(), 0); + assert_eq!(writer.position(), 8); + + assert_eq!(writer.write_vectored(&[IoSlice::new(&[8, 9])]).unwrap(), 1); + assert_eq!(writer.write_vectored(&[IoSlice::new(&[10])]).unwrap(), 0); + let b: &[_] = &[0, 1, 2, 3, 4, 5, 6, 7, 8]; + assert_eq!(writer.get_ref().as_ref(), b); +} + +#[test] +fn test_box_slice_writer() { + let mut writer = Cursor::new(vec![0u8; 9].into_boxed_slice()); + test_slice_writer(&mut writer); +} + +#[test] +fn test_box_slice_writer_vectored() { + let mut writer = Cursor::new(vec![0u8; 9].into_boxed_slice()); + test_slice_writer_vectored(&mut writer); +} + +#[test] +fn test_array_writer() { + let mut writer = Cursor::new([0u8; 9]); + test_slice_writer(&mut writer); +} + +#[test] +fn test_array_writer_vectored() { + let mut writer = Cursor::new([0u8; 9]); + test_slice_writer_vectored(&mut writer); +} + +#[test] +fn test_buf_writer() { + let mut buf = [0 as u8; 9]; + let mut writer = Cursor::new(&mut buf[..]); + test_slice_writer(&mut writer); +} + +#[test] +fn test_buf_writer_vectored() { + let mut buf = [0 as u8; 9]; + let mut writer = Cursor::new(&mut buf[..]); + test_slice_writer_vectored(&mut writer); +} + +#[test] +fn test_buf_writer_seek() { + let mut buf = [0 as u8; 8]; + { + let mut writer = Cursor::new(&mut buf[..]); + assert_eq!(writer.position(), 0); + assert_eq!(writer.write(&[1]).unwrap(), 1); + assert_eq!(writer.position(), 1); + + assert_eq!(writer.seek(SeekFrom::Start(2)).unwrap(), 2); + assert_eq!(writer.position(), 2); + assert_eq!(writer.write(&[2]).unwrap(), 1); + assert_eq!(writer.position(), 3); + + assert_eq!(writer.seek(SeekFrom::Current(-2)).unwrap(), 1); + assert_eq!(writer.position(), 1); + assert_eq!(writer.write(&[3]).unwrap(), 1); + assert_eq!(writer.position(), 2); + + assert_eq!(writer.seek(SeekFrom::End(-1)).unwrap(), 7); + assert_eq!(writer.position(), 7); + assert_eq!(writer.write(&[4]).unwrap(), 1); + assert_eq!(writer.position(), 8); + } + let b: &[_] = &[1, 3, 2, 0, 0, 0, 0, 4]; + assert_eq!(buf, b); +} + +#[test] +fn test_buf_writer_error() { + let mut buf = [0 as u8; 2]; + let mut writer = Cursor::new(&mut buf[..]); + assert_eq!(writer.write(&[0]).unwrap(), 1); + assert_eq!(writer.write(&[0, 0]).unwrap(), 1); + assert_eq!(writer.write(&[0, 0]).unwrap(), 0); +} + +#[test] +fn test_mem_reader() { + let mut reader = Cursor::new(vec![0, 1, 2, 3, 4, 5, 6, 7]); + let mut buf = []; + assert_eq!(reader.read(&mut buf).unwrap(), 0); + assert_eq!(reader.position(), 0); + let mut buf = [0]; + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.position(), 1); + let b: &[_] = &[0]; + assert_eq!(buf, b); + let mut buf = [0; 4]; + assert_eq!(reader.read(&mut buf).unwrap(), 4); + assert_eq!(reader.position(), 5); + let b: &[_] = &[1, 2, 3, 4]; + assert_eq!(buf, b); + assert_eq!(reader.read(&mut buf).unwrap(), 3); + let b: &[_] = &[5, 6, 7]; + assert_eq!(&buf[..3], b); + assert_eq!(reader.read(&mut buf).unwrap(), 0); +} + +#[test] +fn test_mem_reader_vectored() { + let mut reader = Cursor::new(vec![0, 1, 2, 3, 4, 5, 6, 7]); + let mut buf = []; + assert_eq!(reader.read_vectored(&mut [IoSliceMut::new(&mut buf)]).unwrap(), 0); + assert_eq!(reader.position(), 0); + let mut buf = [0]; + assert_eq!( + reader.read_vectored(&mut [IoSliceMut::new(&mut []), IoSliceMut::new(&mut buf),]).unwrap(), + 1, + ); + assert_eq!(reader.position(), 1); + let b: &[_] = &[0]; + assert_eq!(buf, b); + let mut buf1 = [0; 4]; + let mut buf2 = [0; 4]; + assert_eq!( + reader + .read_vectored(&mut [IoSliceMut::new(&mut buf1), IoSliceMut::new(&mut buf2),]) + .unwrap(), + 7, + ); + let b1: &[_] = &[1, 2, 3, 4]; + let b2: &[_] = &[5, 6, 7]; + assert_eq!(buf1, b1); + assert_eq!(&buf2[..3], b2); + assert_eq!(reader.read(&mut buf).unwrap(), 0); +} + +#[test] +fn test_boxed_slice_reader() { + let mut reader = Cursor::new(vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice()); + let mut buf = []; + assert_eq!(reader.read(&mut buf).unwrap(), 0); + assert_eq!(reader.position(), 0); + let mut buf = [0]; + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.position(), 1); + let b: &[_] = &[0]; + assert_eq!(buf, b); + let mut buf = [0; 4]; + assert_eq!(reader.read(&mut buf).unwrap(), 4); + assert_eq!(reader.position(), 5); + let b: &[_] = &[1, 2, 3, 4]; + assert_eq!(buf, b); + assert_eq!(reader.read(&mut buf).unwrap(), 3); + let b: &[_] = &[5, 6, 7]; + assert_eq!(&buf[..3], b); + assert_eq!(reader.read(&mut buf).unwrap(), 0); +} + +#[test] +fn test_boxed_slice_reader_vectored() { + let mut reader = Cursor::new(vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice()); + let mut buf = []; + assert_eq!(reader.read_vectored(&mut [IoSliceMut::new(&mut buf)]).unwrap(), 0); + assert_eq!(reader.position(), 0); + let mut buf = [0]; + assert_eq!( + reader.read_vectored(&mut [IoSliceMut::new(&mut []), IoSliceMut::new(&mut buf),]).unwrap(), + 1, + ); + assert_eq!(reader.position(), 1); + let b: &[_] = &[0]; + assert_eq!(buf, b); + let mut buf1 = [0; 4]; + let mut buf2 = [0; 4]; + assert_eq!( + reader + .read_vectored(&mut [IoSliceMut::new(&mut buf1), IoSliceMut::new(&mut buf2)],) + .unwrap(), + 7, + ); + let b1: &[_] = &[1, 2, 3, 4]; + let b2: &[_] = &[5, 6, 7]; + assert_eq!(buf1, b1); + assert_eq!(&buf2[..3], b2); + assert_eq!(reader.read(&mut buf).unwrap(), 0); +} + +#[test] +fn read_to_end() { + let mut reader = Cursor::new(vec![0, 1, 2, 3, 4, 5, 6, 7]); + let mut v = Vec::new(); + reader.read_to_end(&mut v).unwrap(); + assert_eq!(v, [0, 1, 2, 3, 4, 5, 6, 7]); +} + +#[test] +fn test_slice_reader() { + let in_buf = vec![0, 1, 2, 3, 4, 5, 6, 7]; + let reader = &mut &in_buf[..]; + let mut buf = []; + assert_eq!(reader.read(&mut buf).unwrap(), 0); + let mut buf = [0]; + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.len(), 7); + let b: &[_] = &[0]; + assert_eq!(&buf[..], b); + let mut buf = [0; 4]; + assert_eq!(reader.read(&mut buf).unwrap(), 4); + assert_eq!(reader.len(), 3); + let b: &[_] = &[1, 2, 3, 4]; + assert_eq!(&buf[..], b); + assert_eq!(reader.read(&mut buf).unwrap(), 3); + let b: &[_] = &[5, 6, 7]; + assert_eq!(&buf[..3], b); + assert_eq!(reader.read(&mut buf).unwrap(), 0); +} + +#[test] +fn test_slice_reader_vectored() { + let in_buf = vec![0, 1, 2, 3, 4, 5, 6, 7]; + let reader = &mut &in_buf[..]; + let mut buf = []; + assert_eq!(reader.read_vectored(&mut [IoSliceMut::new(&mut buf)]).unwrap(), 0); + let mut buf = [0]; + assert_eq!( + reader.read_vectored(&mut [IoSliceMut::new(&mut []), IoSliceMut::new(&mut buf),]).unwrap(), + 1, + ); + assert_eq!(reader.len(), 7); + let b: &[_] = &[0]; + assert_eq!(buf, b); + let mut buf1 = [0; 4]; + let mut buf2 = [0; 4]; + assert_eq!( + reader + .read_vectored(&mut [IoSliceMut::new(&mut buf1), IoSliceMut::new(&mut buf2)],) + .unwrap(), + 7, + ); + let b1: &[_] = &[1, 2, 3, 4]; + let b2: &[_] = &[5, 6, 7]; + assert_eq!(buf1, b1); + assert_eq!(&buf2[..3], b2); + assert_eq!(reader.read(&mut buf).unwrap(), 0); +} + +#[test] +fn test_read_exact() { + let in_buf = vec![0, 1, 2, 3, 4, 5, 6, 7]; + let reader = &mut &in_buf[..]; + let mut buf = []; + assert!(reader.read_exact(&mut buf).is_ok()); + let mut buf = [8]; + assert!(reader.read_exact(&mut buf).is_ok()); + assert_eq!(buf[0], 0); + assert_eq!(reader.len(), 7); + let mut buf = [0, 0, 0, 0, 0, 0, 0]; + assert!(reader.read_exact(&mut buf).is_ok()); + assert_eq!(buf, [1, 2, 3, 4, 5, 6, 7]); + assert_eq!(reader.len(), 0); + let mut buf = [0]; + assert!(reader.read_exact(&mut buf).is_err()); +} + +#[test] +fn test_buf_reader() { + let in_buf = vec![0, 1, 2, 3, 4, 5, 6, 7]; + let mut reader = Cursor::new(&in_buf[..]); + let mut buf = []; + assert_eq!(reader.read(&mut buf).unwrap(), 0); + assert_eq!(reader.position(), 0); + let mut buf = [0]; + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.position(), 1); + let b: &[_] = &[0]; + assert_eq!(buf, b); + let mut buf = [0; 4]; + assert_eq!(reader.read(&mut buf).unwrap(), 4); + assert_eq!(reader.position(), 5); + let b: &[_] = &[1, 2, 3, 4]; + assert_eq!(buf, b); + assert_eq!(reader.read(&mut buf).unwrap(), 3); + let b: &[_] = &[5, 6, 7]; + assert_eq!(&buf[..3], b); + assert_eq!(reader.read(&mut buf).unwrap(), 0); +} + +#[test] +fn seek_past_end() { + let buf = [0xff]; + let mut r = Cursor::new(&buf[..]); + assert_eq!(r.seek(SeekFrom::Start(10)).unwrap(), 10); + assert_eq!(r.read(&mut [0]).unwrap(), 0); + + let mut r = Cursor::new(vec![10]); + assert_eq!(r.seek(SeekFrom::Start(10)).unwrap(), 10); + assert_eq!(r.read(&mut [0]).unwrap(), 0); + + let mut buf = [0]; + let mut r = Cursor::new(&mut buf[..]); + assert_eq!(r.seek(SeekFrom::Start(10)).unwrap(), 10); + assert_eq!(r.write(&[3]).unwrap(), 0); + + let mut r = Cursor::new(vec![10].into_boxed_slice()); + assert_eq!(r.seek(SeekFrom::Start(10)).unwrap(), 10); + assert_eq!(r.write(&[3]).unwrap(), 0); +} + +#[test] +fn seek_past_i64() { + let buf = [0xff]; + let mut r = Cursor::new(&buf[..]); + assert_eq!(r.seek(SeekFrom::Start(6)).unwrap(), 6); + assert_eq!(r.seek(SeekFrom::Current(0x7ffffffffffffff0)).unwrap(), 0x7ffffffffffffff6); + assert_eq!(r.seek(SeekFrom::Current(0x10)).unwrap(), 0x8000000000000006); + assert_eq!(r.seek(SeekFrom::Current(0)).unwrap(), 0x8000000000000006); + assert!(r.seek(SeekFrom::Current(0x7ffffffffffffffd)).is_err()); + assert_eq!(r.seek(SeekFrom::Current(-0x8000000000000000)).unwrap(), 6); + + let mut r = Cursor::new(vec![10]); + assert_eq!(r.seek(SeekFrom::Start(6)).unwrap(), 6); + assert_eq!(r.seek(SeekFrom::Current(0x7ffffffffffffff0)).unwrap(), 0x7ffffffffffffff6); + assert_eq!(r.seek(SeekFrom::Current(0x10)).unwrap(), 0x8000000000000006); + assert_eq!(r.seek(SeekFrom::Current(0)).unwrap(), 0x8000000000000006); + assert!(r.seek(SeekFrom::Current(0x7ffffffffffffffd)).is_err()); + assert_eq!(r.seek(SeekFrom::Current(-0x8000000000000000)).unwrap(), 6); + + let mut buf = [0]; + let mut r = Cursor::new(&mut buf[..]); + assert_eq!(r.seek(SeekFrom::Start(6)).unwrap(), 6); + assert_eq!(r.seek(SeekFrom::Current(0x7ffffffffffffff0)).unwrap(), 0x7ffffffffffffff6); + assert_eq!(r.seek(SeekFrom::Current(0x10)).unwrap(), 0x8000000000000006); + assert_eq!(r.seek(SeekFrom::Current(0)).unwrap(), 0x8000000000000006); + assert!(r.seek(SeekFrom::Current(0x7ffffffffffffffd)).is_err()); + assert_eq!(r.seek(SeekFrom::Current(-0x8000000000000000)).unwrap(), 6); + + let mut r = Cursor::new(vec![10].into_boxed_slice()); + assert_eq!(r.seek(SeekFrom::Start(6)).unwrap(), 6); + assert_eq!(r.seek(SeekFrom::Current(0x7ffffffffffffff0)).unwrap(), 0x7ffffffffffffff6); + assert_eq!(r.seek(SeekFrom::Current(0x10)).unwrap(), 0x8000000000000006); + assert_eq!(r.seek(SeekFrom::Current(0)).unwrap(), 0x8000000000000006); + assert!(r.seek(SeekFrom::Current(0x7ffffffffffffffd)).is_err()); + assert_eq!(r.seek(SeekFrom::Current(-0x8000000000000000)).unwrap(), 6); +} + +#[test] +fn seek_before_0() { + let buf = [0xff]; + let mut r = Cursor::new(&buf[..]); + assert!(r.seek(SeekFrom::End(-2)).is_err()); + + let mut r = Cursor::new(vec![10]); + assert!(r.seek(SeekFrom::End(-2)).is_err()); + + let mut buf = [0]; + let mut r = Cursor::new(&mut buf[..]); + assert!(r.seek(SeekFrom::End(-2)).is_err()); + + let mut r = Cursor::new(vec![10].into_boxed_slice()); + assert!(r.seek(SeekFrom::End(-2)).is_err()); +} + +#[test] +fn test_seekable_mem_writer() { + let mut writer = Cursor::new(Vec::::new()); + assert_eq!(writer.position(), 0); + assert_eq!(writer.write(&[0]).unwrap(), 1); + assert_eq!(writer.position(), 1); + assert_eq!(writer.write(&[1, 2, 3]).unwrap(), 3); + assert_eq!(writer.write(&[4, 5, 6, 7]).unwrap(), 4); + assert_eq!(writer.position(), 8); + let b: &[_] = &[0, 1, 2, 3, 4, 5, 6, 7]; + assert_eq!(&writer.get_ref()[..], b); + + assert_eq!(writer.seek(SeekFrom::Start(0)).unwrap(), 0); + assert_eq!(writer.position(), 0); + assert_eq!(writer.write(&[3, 4]).unwrap(), 2); + let b: &[_] = &[3, 4, 2, 3, 4, 5, 6, 7]; + assert_eq!(&writer.get_ref()[..], b); + + assert_eq!(writer.seek(SeekFrom::Current(1)).unwrap(), 3); + assert_eq!(writer.write(&[0, 1]).unwrap(), 2); + let b: &[_] = &[3, 4, 2, 0, 1, 5, 6, 7]; + assert_eq!(&writer.get_ref()[..], b); + + assert_eq!(writer.seek(SeekFrom::End(-1)).unwrap(), 7); + assert_eq!(writer.write(&[1, 2]).unwrap(), 2); + let b: &[_] = &[3, 4, 2, 0, 1, 5, 6, 1, 2]; + assert_eq!(&writer.get_ref()[..], b); + + assert_eq!(writer.seek(SeekFrom::End(1)).unwrap(), 10); + assert_eq!(writer.write(&[1]).unwrap(), 1); + let b: &[_] = &[3, 4, 2, 0, 1, 5, 6, 1, 2, 0, 1]; + assert_eq!(&writer.get_ref()[..], b); +} + +#[test] +fn vec_seek_past_end() { + let mut r = Cursor::new(Vec::new()); + assert_eq!(r.seek(SeekFrom::Start(10)).unwrap(), 10); + assert_eq!(r.write(&[3]).unwrap(), 1); +} + +#[test] +fn vec_seek_before_0() { + let mut r = Cursor::new(Vec::new()); + assert!(r.seek(SeekFrom::End(-2)).is_err()); +} + +#[test] +#[cfg(target_pointer_width = "32")] +fn vec_seek_and_write_past_usize_max() { + let mut c = Cursor::new(Vec::new()); + c.set_position(usize::MAX as u64 + 1); + assert!(c.write_all(&[1, 2, 3]).is_err()); +} + +#[test] +fn test_partial_eq() { + assert_eq!(Cursor::new(Vec::::new()), Cursor::new(Vec::::new())); +} + +#[test] +fn test_eq() { + struct AssertEq(pub T); + + let _: AssertEq>> = AssertEq(Cursor::new(Vec::new())); +} + +#[allow(dead_code)] +fn const_cursor() { + const CURSOR: Cursor<&[u8]> = Cursor::new(&[0]); + const _: &&[u8] = CURSOR.get_ref(); + const _: u64 = CURSOR.position(); +} + +#[bench] +fn bench_write_vec(b: &mut test::Bencher) { + let slice = &[1; 128]; + + b.iter(|| { + let mut buf = b"some random data to overwrite".to_vec(); + let mut cursor = Cursor::new(&mut buf); + + let _ = cursor.write_all(slice); + test::black_box(&cursor); + }) +} + +#[bench] +fn bench_write_vec_vectored(b: &mut test::Bencher) { + let slices = [ + IoSlice::new(&[1; 128]), + IoSlice::new(&[2; 256]), + IoSlice::new(&[3; 512]), + IoSlice::new(&[4; 1024]), + IoSlice::new(&[5; 2048]), + IoSlice::new(&[6; 4096]), + IoSlice::new(&[7; 8192]), + IoSlice::new(&[8; 8192 * 2]), + ]; + + b.iter(|| { + let mut buf = b"some random data to overwrite".to_vec(); + let mut cursor = Cursor::new(&mut buf); + + let mut slices = slices; + let _ = cursor.write_all_vectored(&mut slices); + test::black_box(&cursor); + }) +} diff --git a/crates/std/src/io/impls.rs b/crates/std/src/io/impls.rs index ad4bb2a..980e915 100644 --- a/crates/std/src/io/impls.rs +++ b/crates/std/src/io/impls.rs @@ -1,5 +1,5 @@ -// #[cfg(test)] -// mod tests; +#[cfg(test)] +mod tests; use crate::alloc::Allocator; use alloc_crate::collections::VecDeque; diff --git a/crates/std/src/io/impls/tests.rs b/crates/std/src/io/impls/tests.rs new file mode 100644 index 0000000..d1cd84a --- /dev/null +++ b/crates/std/src/io/impls/tests.rs @@ -0,0 +1,57 @@ +use crate::io::prelude::*; + +#[bench] +fn bench_read_slice(b: &mut test::Bencher) { + let buf = [5; 1024]; + let mut dst = [0; 128]; + + b.iter(|| { + let mut rd = &buf[..]; + for _ in 0..8 { + let _ = rd.read(&mut dst); + test::black_box(&dst); + } + }) +} + +#[bench] +fn bench_write_slice(b: &mut test::Bencher) { + let mut buf = [0; 1024]; + let src = [5; 128]; + + b.iter(|| { + let mut wr = &mut buf[..]; + for _ in 0..8 { + let _ = wr.write_all(&src); + test::black_box(&wr); + } + }) +} + +#[bench] +fn bench_read_vec(b: &mut test::Bencher) { + let buf = vec![5; 1024]; + let mut dst = [0; 128]; + + b.iter(|| { + let mut rd = &buf[..]; + for _ in 0..8 { + let _ = rd.read(&mut dst); + test::black_box(&dst); + } + }) +} + +#[bench] +fn bench_write_vec(b: &mut test::Bencher) { + let mut buf = Vec::with_capacity(1024); + let src = [5; 128]; + + b.iter(|| { + let mut wr = &mut buf[..]; + for _ in 0..8 { + let _ = wr.write_all(&src); + test::black_box(&wr); + } + }) +} diff --git a/crates/std/src/io/tests.rs b/crates/std/src/io/tests.rs new file mode 100644 index 0000000..b22988d --- /dev/null +++ b/crates/std/src/io/tests.rs @@ -0,0 +1,947 @@ +use super::{BorrowedBuf, Cursor, SeekFrom, repeat}; +use crate::cmp::{self, min}; +use crate::io::{ + self, BufRead, BufReader, DEFAULT_BUF_SIZE, IoSlice, IoSliceMut, Read, Seek, Write, +}; +use crate::mem::MaybeUninit; +use crate::ops::Deref; + +#[test] +fn read_until() { + let mut buf = Cursor::new(&b"12"[..]); + let mut v = Vec::new(); + assert_eq!(buf.read_until(b'3', &mut v).unwrap(), 2); + assert_eq!(v, b"12"); + + let mut buf = Cursor::new(&b"1233"[..]); + let mut v = Vec::new(); + assert_eq!(buf.read_until(b'3', &mut v).unwrap(), 3); + assert_eq!(v, b"123"); + v.truncate(0); + assert_eq!(buf.read_until(b'3', &mut v).unwrap(), 1); + assert_eq!(v, b"3"); + v.truncate(0); + assert_eq!(buf.read_until(b'3', &mut v).unwrap(), 0); + assert_eq!(v, []); +} + +#[test] +fn skip_until() { + let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore\0"; + let mut reader = BufReader::new(bytes); + + // read from the bytes, alternating between + // consuming `read\0`s and skipping `ignore\0`s + loop { + // consume `read\0` + let mut out = Vec::new(); + let read = reader.read_until(0, &mut out).unwrap(); + if read == 0 { + // eof + break; + } else { + assert_eq!(out, b"read\0"); + assert_eq!(read, b"read\0".len()); + } + + // skip past `ignore\0` + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, b"ignore\0".len()); + } + + // ensure we are at the end of the byte slice and that we can skip no further + // also ensure skip_until matches the behavior of read_until at EOF + let skipped = reader.skip_until(0).unwrap(); + assert_eq!(skipped, 0); +} + +#[test] +fn split() { + let buf = Cursor::new(&b"12"[..]); + let mut s = buf.split(b'3'); + assert_eq!(s.next().unwrap().unwrap(), vec![b'1', b'2']); + assert!(s.next().is_none()); + + let buf = Cursor::new(&b"1233"[..]); + let mut s = buf.split(b'3'); + assert_eq!(s.next().unwrap().unwrap(), vec![b'1', b'2']); + assert_eq!(s.next().unwrap().unwrap(), vec![]); + assert!(s.next().is_none()); +} + +#[test] +fn read_line() { + let mut buf = Cursor::new(&b"12"[..]); + let mut v = String::new(); + assert_eq!(buf.read_line(&mut v).unwrap(), 2); + assert_eq!(v, "12"); + + let mut buf = Cursor::new(&b"12\n\n"[..]); + let mut v = String::new(); + assert_eq!(buf.read_line(&mut v).unwrap(), 3); + assert_eq!(v, "12\n"); + v.truncate(0); + assert_eq!(buf.read_line(&mut v).unwrap(), 1); + assert_eq!(v, "\n"); + v.truncate(0); + assert_eq!(buf.read_line(&mut v).unwrap(), 0); + assert_eq!(v, ""); +} + +#[test] +fn lines() { + let buf = Cursor::new(&b"12\r"[..]); + let mut s = buf.lines(); + assert_eq!(s.next().unwrap().unwrap(), "12\r".to_string()); + assert!(s.next().is_none()); + + let buf = Cursor::new(&b"12\r\n\n"[..]); + let mut s = buf.lines(); + assert_eq!(s.next().unwrap().unwrap(), "12".to_string()); + assert_eq!(s.next().unwrap().unwrap(), "".to_string()); + assert!(s.next().is_none()); +} + +#[test] +fn buf_read_has_data_left() { + let mut buf = Cursor::new(&b"abcd"[..]); + assert!(buf.has_data_left().unwrap()); + buf.read_exact(&mut [0; 2]).unwrap(); + assert!(buf.has_data_left().unwrap()); + buf.read_exact(&mut [0; 2]).unwrap(); + assert!(!buf.has_data_left().unwrap()); +} + +#[test] +fn read_to_end() { + let mut c = Cursor::new(&b""[..]); + let mut v = Vec::new(); + assert_eq!(c.read_to_end(&mut v).unwrap(), 0); + assert_eq!(v, []); + + let mut c = Cursor::new(&b"1"[..]); + let mut v = Vec::new(); + assert_eq!(c.read_to_end(&mut v).unwrap(), 1); + assert_eq!(v, b"1"); + + let cap = if cfg!(miri) { 1024 } else { 1024 * 1024 }; + let data = (0..cap).map(|i| (i / 3) as u8).collect::>(); + let mut v = Vec::new(); + let (a, b) = data.split_at(data.len() / 2); + assert_eq!(Cursor::new(a).read_to_end(&mut v).unwrap(), a.len()); + assert_eq!(Cursor::new(b).read_to_end(&mut v).unwrap(), b.len()); + assert_eq!(v, data); +} + +#[test] +fn read_to_string() { + let mut c = Cursor::new(&b""[..]); + let mut v = String::new(); + assert_eq!(c.read_to_string(&mut v).unwrap(), 0); + assert_eq!(v, ""); + + let mut c = Cursor::new(&b"1"[..]); + let mut v = String::new(); + assert_eq!(c.read_to_string(&mut v).unwrap(), 1); + assert_eq!(v, "1"); + + let mut c = Cursor::new(&b"\xff"[..]); + let mut v = String::new(); + assert!(c.read_to_string(&mut v).is_err()); +} + +#[test] +fn read_exact() { + let mut buf = [0; 4]; + + let mut c = Cursor::new(&b""[..]); + assert_eq!(c.read_exact(&mut buf).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); + + let mut c = Cursor::new(&b"123"[..]).chain(Cursor::new(&b"456789"[..])); + c.read_exact(&mut buf).unwrap(); + assert_eq!(&buf, b"1234"); + c.read_exact(&mut buf).unwrap(); + assert_eq!(&buf, b"5678"); + assert_eq!(c.read_exact(&mut buf).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); +} + +#[test] +fn read_exact_slice() { + let mut buf = [0; 4]; + + let mut c = &b""[..]; + assert_eq!(c.read_exact(&mut buf).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); + + let mut c = &b"123"[..]; + assert_eq!(c.read_exact(&mut buf).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); + // make sure the optimized (early returning) method is being used + assert_eq!(&buf, &[0; 4]); + + let mut c = &b"1234"[..]; + c.read_exact(&mut buf).unwrap(); + assert_eq!(&buf, b"1234"); + + let mut c = &b"56789"[..]; + c.read_exact(&mut buf).unwrap(); + assert_eq!(&buf, b"5678"); + assert_eq!(c, b"9"); +} + +#[test] +fn read_buf_exact() { + let buf: &mut [_] = &mut [0; 4]; + let mut buf: BorrowedBuf<'_> = buf.into(); + + let mut c = Cursor::new(&b""[..]); + assert_eq!(c.read_buf_exact(buf.unfilled()).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); + + let mut c = Cursor::new(&b"123456789"[..]); + c.read_buf_exact(buf.unfilled()).unwrap(); + assert_eq!(buf.filled(), b"1234"); + + buf.clear(); + + c.read_buf_exact(buf.unfilled()).unwrap(); + assert_eq!(buf.filled(), b"5678"); + + buf.clear(); + + assert_eq!(c.read_buf_exact(buf.unfilled()).unwrap_err().kind(), io::ErrorKind::UnexpectedEof); +} + +#[test] +#[should_panic] +fn borrowed_cursor_advance_overflow() { + let mut buf = [0; 512]; + let mut buf = BorrowedBuf::from(&mut buf[..]); + buf.unfilled().advance(1); + buf.unfilled().advance(usize::MAX); +} + +#[test] +fn take_eof() { + struct R; + + impl Read for R { + fn read(&mut self, _: &mut [u8]) -> io::Result { + Err(io::const_error!(io::ErrorKind::Other, "")) + } + } + impl BufRead for R { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + Err(io::const_error!(io::ErrorKind::Other, "")) + } + fn consume(&mut self, _amt: usize) {} + } + + let mut buf = [0; 1]; + assert_eq!(0, R.take(0).read(&mut buf).unwrap()); + assert_eq!(b"", R.take(0).fill_buf().unwrap()); +} + +fn cmp_bufread(mut br1: Br1, mut br2: Br2, exp: &[u8]) { + let mut cat = Vec::new(); + loop { + let consume = { + let buf1 = br1.fill_buf().unwrap(); + let buf2 = br2.fill_buf().unwrap(); + let minlen = if buf1.len() < buf2.len() { buf1.len() } else { buf2.len() }; + assert_eq!(buf1[..minlen], buf2[..minlen]); + cat.extend_from_slice(&buf1[..minlen]); + minlen + }; + if consume == 0 { + break; + } + br1.consume(consume); + br2.consume(consume); + } + assert_eq!(br1.fill_buf().unwrap().len(), 0); + assert_eq!(br2.fill_buf().unwrap().len(), 0); + assert_eq!(&cat[..], &exp[..]) +} + +#[test] +fn chain_bufread() { + let testdata = b"ABCDEFGHIJKL"; + let chain1 = + (&testdata[..3]).chain(&testdata[3..6]).chain(&testdata[6..9]).chain(&testdata[9..]); + let chain2 = (&testdata[..4]).chain(&testdata[4..8]).chain(&testdata[8..]); + cmp_bufread(chain1, chain2, &testdata[..]); +} + +#[test] +fn chain_splitted_char() { + let chain = b"\xc3".chain(b"\xa9".as_slice()); + assert_eq!(crate::io::read_to_string(chain).unwrap(), "é"); + + let mut chain = b"\xc3".chain(b"\xa9\n".as_slice()); + let mut buf = String::new(); + assert_eq!(chain.read_line(&mut buf).unwrap(), 3); + assert_eq!(buf, "é\n"); +} + +#[test] +fn bufreader_size_hint() { + let testdata = b"ABCDEFGHIJKL"; + let mut buf_reader = BufReader::new(&testdata[..]); + assert_eq!(buf_reader.buffer().len(), 0); + + let buffer_length = testdata.len(); + buf_reader.fill_buf().unwrap(); + + // Check that size hint matches buffer contents + let mut buffered_bytes = buf_reader.bytes(); + let (lower_bound, _upper_bound) = buffered_bytes.size_hint(); + assert_eq!(lower_bound, buffer_length); + + // Check that size hint matches buffer contents after advancing + buffered_bytes.next().unwrap().unwrap(); + let (lower_bound, _upper_bound) = buffered_bytes.size_hint(); + assert_eq!(lower_bound, buffer_length - 1); +} + +#[test] +fn empty_size_hint() { + let size_hint = io::empty().bytes().size_hint(); + assert_eq!(size_hint, (0, Some(0))); +} + +#[test] +fn slice_size_hint() { + let size_hint = (&[1, 2, 3]).bytes().size_hint(); + assert_eq!(size_hint, (3, Some(3))); +} + +#[test] +fn take_size_hint() { + let size_hint = (&[1, 2, 3]).take(2).bytes().size_hint(); + assert_eq!(size_hint, (2, Some(2))); + + let size_hint = (&[1, 2, 3]).take(4).bytes().size_hint(); + assert_eq!(size_hint, (3, Some(3))); + + let size_hint = io::repeat(0).take(3).bytes().size_hint(); + assert_eq!(size_hint, (3, Some(3))); +} + +#[test] +fn chain_empty_size_hint() { + let chain = io::empty().chain(io::empty()); + let size_hint = chain.bytes().size_hint(); + assert_eq!(size_hint, (0, Some(0))); +} + +#[test] +fn chain_size_hint() { + let testdata = b"ABCDEFGHIJKL"; + let mut buf_reader_1 = BufReader::new(&testdata[..6]); + let mut buf_reader_2 = BufReader::new(&testdata[6..]); + + buf_reader_1.fill_buf().unwrap(); + buf_reader_2.fill_buf().unwrap(); + + let chain = buf_reader_1.chain(buf_reader_2); + let size_hint = chain.bytes().size_hint(); + assert_eq!(size_hint, (testdata.len(), Some(testdata.len()))); +} + +#[test] +fn chain_zero_length_read_is_not_eof() { + let a = b"A"; + let b = b"B"; + let mut s = String::new(); + let mut chain = (&a[..]).chain(&b[..]); + chain.read(&mut []).unwrap(); + chain.read_to_string(&mut s).unwrap(); + assert_eq!("AB", s); +} + +#[bench] +#[cfg_attr(miri, ignore)] // Miri isn't fast... +fn bench_read_to_end(b: &mut test::Bencher) { + b.iter(|| { + let mut lr = repeat(1).take(10000000); + let mut vec = Vec::with_capacity(1024); + super::default_read_to_end(&mut lr, &mut vec, None) + }); +} + +#[test] +fn seek_len() -> io::Result<()> { + let mut c = Cursor::new(vec![0; 15]); + assert_eq!(c.stream_len()?, 15); + + c.seek(SeekFrom::End(0))?; + let old_pos = c.stream_position()?; + assert_eq!(c.stream_len()?, 15); + assert_eq!(c.stream_position()?, old_pos); + + c.seek(SeekFrom::Start(7))?; + c.seek(SeekFrom::Current(2))?; + let old_pos = c.stream_position()?; + assert_eq!(c.stream_len()?, 15); + assert_eq!(c.stream_position()?, old_pos); + + Ok(()) +} + +#[test] +fn seek_position() -> io::Result<()> { + // All `asserts` are duplicated here to make sure the method does not + // change anything about the seek state. + let mut c = Cursor::new(vec![0; 15]); + assert_eq!(c.stream_position()?, 0); + assert_eq!(c.stream_position()?, 0); + + c.seek(SeekFrom::End(0))?; + assert_eq!(c.stream_position()?, 15); + assert_eq!(c.stream_position()?, 15); + + c.seek(SeekFrom::Start(7))?; + c.seek(SeekFrom::Current(2))?; + assert_eq!(c.stream_position()?, 9); + assert_eq!(c.stream_position()?, 9); + + c.seek(SeekFrom::End(-3))?; + c.seek(SeekFrom::Current(1))?; + c.seek(SeekFrom::Current(-5))?; + assert_eq!(c.stream_position()?, 8); + assert_eq!(c.stream_position()?, 8); + + c.rewind()?; + assert_eq!(c.stream_position()?, 0); + assert_eq!(c.stream_position()?, 0); + + Ok(()) +} + +#[test] +fn take_seek() -> io::Result<()> { + let mut buf = Cursor::new(b"0123456789"); + buf.set_position(2); + let mut take = buf.by_ref().take(4); + let mut buf1 = [0u8; 1]; + let mut buf2 = [0u8; 2]; + assert_eq!(take.position(), 0); + + assert_eq!(take.seek(SeekFrom::Start(0))?, 0); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'2', b'3']); + assert_eq!(take.seek(SeekFrom::Start(1))?, 1); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'3', b'4']); + assert_eq!(take.seek(SeekFrom::Start(2))?, 2); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'4', b'5']); + assert_eq!(take.seek(SeekFrom::Start(3))?, 3); + take.read_exact(&mut buf1)?; + assert_eq!(buf1, [b'5']); + assert_eq!(take.seek(SeekFrom::Start(4))?, 4); + assert_eq!(take.read(&mut buf1)?, 0); + + assert_eq!(take.seek(SeekFrom::End(0))?, 4); + assert_eq!(take.seek(SeekFrom::End(-1))?, 3); + take.read_exact(&mut buf1)?; + assert_eq!(buf1, [b'5']); + assert_eq!(take.seek(SeekFrom::End(-2))?, 2); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'4', b'5']); + assert_eq!(take.seek(SeekFrom::End(-3))?, 1); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'3', b'4']); + assert_eq!(take.seek(SeekFrom::End(-4))?, 0); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'2', b'3']); + + assert_eq!(take.seek(SeekFrom::Current(0))?, 2); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'4', b'5']); + + assert_eq!(take.seek(SeekFrom::Current(-3))?, 1); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'3', b'4']); + + assert_eq!(take.seek(SeekFrom::Current(-1))?, 2); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'4', b'5']); + + assert_eq!(take.seek(SeekFrom::Current(-4))?, 0); + take.read_exact(&mut buf2)?; + assert_eq!(buf2, [b'2', b'3']); + + assert_eq!(take.seek(SeekFrom::Current(2))?, 4); + assert_eq!(take.read(&mut buf1)?, 0); + + Ok(()) +} + +#[test] +fn take_seek_error() { + let buf = Cursor::new(b"0123456789"); + let mut take = buf.take(2); + assert!(take.seek(SeekFrom::Start(3)).is_err()); + assert!(take.seek(SeekFrom::End(1)).is_err()); + assert!(take.seek(SeekFrom::End(-3)).is_err()); + assert!(take.seek(SeekFrom::Current(-1)).is_err()); + assert!(take.seek(SeekFrom::Current(3)).is_err()); +} + +struct ExampleHugeRangeOfZeroes { + position: u64, +} + +impl Read for ExampleHugeRangeOfZeroes { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let max = buf.len().min(usize::MAX); + for i in 0..max { + if self.position == u64::MAX { + return Ok(i); + } + self.position += 1; + buf[i] = 0; + } + Ok(max) + } +} + +impl Seek for ExampleHugeRangeOfZeroes { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + match pos { + io::SeekFrom::Start(i) => self.position = i, + io::SeekFrom::End(i) if i >= 0 => self.position = u64::MAX, + io::SeekFrom::End(i) => self.position = self.position - i.unsigned_abs(), + io::SeekFrom::Current(i) => { + self.position = if i >= 0 { + self.position.saturating_add(i.unsigned_abs()) + } else { + self.position.saturating_sub(i.unsigned_abs()) + }; + } + } + Ok(self.position) + } +} + +#[test] +fn take_seek_big_offsets() -> io::Result<()> { + let inner = ExampleHugeRangeOfZeroes { position: 1 }; + let mut take = inner.take(u64::MAX - 2); + assert_eq!(take.seek(io::SeekFrom::Start(u64::MAX - 2))?, u64::MAX - 2); + assert_eq!(take.inner.position, u64::MAX - 1); + assert_eq!(take.seek(io::SeekFrom::Start(0))?, 0); + assert_eq!(take.inner.position, 1); + assert_eq!(take.seek(io::SeekFrom::End(-1))?, u64::MAX - 3); + assert_eq!(take.inner.position, u64::MAX - 2); + Ok(()) +} + +// A simple example reader which uses the default implementation of +// read_to_end. +struct ExampleSliceReader<'a> { + slice: &'a [u8], +} + +impl<'a> Read for ExampleSliceReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let len = cmp::min(self.slice.len(), buf.len()); + buf[..len].copy_from_slice(&self.slice[..len]); + self.slice = &self.slice[len..]; + Ok(len) + } +} + +#[test] +fn test_read_to_end_capacity() -> io::Result<()> { + let input = &b"foo"[..]; + + // read_to_end() takes care not to over-allocate when a buffer is the + // exact size needed. + let mut vec1 = Vec::with_capacity(input.len()); + ExampleSliceReader { slice: input }.read_to_end(&mut vec1)?; + assert_eq!(vec1.len(), input.len()); + assert_eq!(vec1.capacity(), input.len(), "did not allocate more"); + + Ok(()) +} + +#[test] +fn io_slice_mut_advance_slices() { + let mut buf1 = [1; 8]; + let mut buf2 = [2; 16]; + let mut buf3 = [3; 8]; + let mut bufs = &mut [ + IoSliceMut::new(&mut buf1), + IoSliceMut::new(&mut buf2), + IoSliceMut::new(&mut buf3), + ][..]; + + // Only in a single buffer.. + IoSliceMut::advance_slices(&mut bufs, 1); + assert_eq!(bufs[0].deref(), [1; 7].as_ref()); + assert_eq!(bufs[1].deref(), [2; 16].as_ref()); + assert_eq!(bufs[2].deref(), [3; 8].as_ref()); + + // Removing a buffer, leaving others as is. + IoSliceMut::advance_slices(&mut bufs, 7); + assert_eq!(bufs[0].deref(), [2; 16].as_ref()); + assert_eq!(bufs[1].deref(), [3; 8].as_ref()); + + // Removing a buffer and removing from the next buffer. + IoSliceMut::advance_slices(&mut bufs, 18); + assert_eq!(bufs[0].deref(), [3; 6].as_ref()); +} + +#[test] +#[should_panic] +fn io_slice_mut_advance_slices_empty_slice() { + let mut empty_bufs = &mut [][..]; + IoSliceMut::advance_slices(&mut empty_bufs, 1); +} + +#[test] +#[should_panic] +fn io_slice_mut_advance_slices_beyond_total_length() { + let mut buf1 = [1; 8]; + let mut bufs = &mut [IoSliceMut::new(&mut buf1)][..]; + + IoSliceMut::advance_slices(&mut bufs, 9); + assert!(bufs.is_empty()); +} + +#[test] +fn io_slice_advance_slices() { + let buf1 = [1; 8]; + let buf2 = [2; 16]; + let buf3 = [3; 8]; + let mut bufs = &mut [IoSlice::new(&buf1), IoSlice::new(&buf2), IoSlice::new(&buf3)][..]; + + // Only in a single buffer.. + IoSlice::advance_slices(&mut bufs, 1); + assert_eq!(bufs[0].deref(), [1; 7].as_ref()); + assert_eq!(bufs[1].deref(), [2; 16].as_ref()); + assert_eq!(bufs[2].deref(), [3; 8].as_ref()); + + // Removing a buffer, leaving others as is. + IoSlice::advance_slices(&mut bufs, 7); + assert_eq!(bufs[0].deref(), [2; 16].as_ref()); + assert_eq!(bufs[1].deref(), [3; 8].as_ref()); + + // Removing a buffer and removing from the next buffer. + IoSlice::advance_slices(&mut bufs, 18); + assert_eq!(bufs[0].deref(), [3; 6].as_ref()); +} + +#[test] +#[should_panic] +fn io_slice_advance_slices_empty_slice() { + let mut empty_bufs = &mut [][..]; + IoSlice::advance_slices(&mut empty_bufs, 1); +} + +#[test] +#[should_panic] +fn io_slice_advance_slices_beyond_total_length() { + let buf1 = [1; 8]; + let mut bufs = &mut [IoSlice::new(&buf1)][..]; + + IoSlice::advance_slices(&mut bufs, 9); + assert!(bufs.is_empty()); +} + +#[test] +fn io_slice_as_slice() { + let buf = [1; 8]; + let slice = IoSlice::new(&buf).as_slice(); + assert_eq!(slice, buf); +} + +#[test] +fn io_slice_into_slice() { + let mut buf = [1; 8]; + let slice = IoSliceMut::new(&mut buf).into_slice(); + assert_eq!(slice, [1; 8]); +} + +/// Creates a new writer that reads from at most `n_bufs` and reads +/// `per_call` bytes (in total) per call to write. +fn test_writer(n_bufs: usize, per_call: usize) -> TestWriter { + TestWriter { n_bufs, per_call, written: Vec::new() } +} + +struct TestWriter { + n_bufs: usize, + per_call: usize, + written: Vec, +} + +impl Write for TestWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.write_vectored(&[IoSlice::new(buf)]) + } + + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + let mut left = self.per_call; + let mut written = 0; + for buf in bufs.iter().take(self.n_bufs) { + let n = min(left, buf.len()); + self.written.extend_from_slice(&buf[0..n]); + left -= n; + written += n; + } + Ok(written) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +#[test] +fn test_writer_read_from_one_buf() { + let mut writer = test_writer(1, 2); + + assert_eq!(writer.write(&[]).unwrap(), 0); + assert_eq!(writer.write_vectored(&[]).unwrap(), 0); + + // Read at most 2 bytes. + assert_eq!(writer.write(&[1, 1, 1]).unwrap(), 2); + let bufs = &[IoSlice::new(&[2, 2, 2])]; + assert_eq!(writer.write_vectored(bufs).unwrap(), 2); + + // Only read from first buf. + let bufs = &[IoSlice::new(&[3]), IoSlice::new(&[4, 4])]; + assert_eq!(writer.write_vectored(bufs).unwrap(), 1); + + assert_eq!(writer.written, &[1, 1, 2, 2, 3]); +} + +#[test] +fn test_writer_read_from_multiple_bufs() { + let mut writer = test_writer(3, 3); + + // Read at most 3 bytes from two buffers. + let bufs = &[IoSlice::new(&[1]), IoSlice::new(&[2, 2, 2])]; + assert_eq!(writer.write_vectored(bufs).unwrap(), 3); + + // Read at most 3 bytes from three buffers. + let bufs = &[IoSlice::new(&[3]), IoSlice::new(&[4]), IoSlice::new(&[5, 5])]; + assert_eq!(writer.write_vectored(bufs).unwrap(), 3); + + assert_eq!(writer.written, &[1, 2, 2, 3, 4, 5]); +} + +#[test] +fn test_write_all_vectored() { + #[rustfmt::skip] // Becomes unreadable otherwise. + let tests: Vec<(_, &'static [u8])> = vec![ + (vec![], &[]), + (vec![IoSlice::new(&[]), IoSlice::new(&[])], &[]), + (vec![IoSlice::new(&[1])], &[1]), + (vec![IoSlice::new(&[1, 2])], &[1, 2]), + (vec![IoSlice::new(&[1, 2, 3])], &[1, 2, 3]), + (vec![IoSlice::new(&[1, 2, 3, 4])], &[1, 2, 3, 4]), + (vec![IoSlice::new(&[1, 2, 3, 4, 5])], &[1, 2, 3, 4, 5]), + (vec![IoSlice::new(&[1]), IoSlice::new(&[2])], &[1, 2]), + (vec![IoSlice::new(&[1]), IoSlice::new(&[2, 2])], &[1, 2, 2]), + (vec![IoSlice::new(&[1, 1]), IoSlice::new(&[2, 2])], &[1, 1, 2, 2]), + (vec![IoSlice::new(&[1, 1]), IoSlice::new(&[2, 2, 2])], &[1, 1, 2, 2, 2]), + (vec![IoSlice::new(&[1, 1]), IoSlice::new(&[2, 2, 2])], &[1, 1, 2, 2, 2]), + (vec![IoSlice::new(&[1, 1, 1]), IoSlice::new(&[2, 2, 2])], &[1, 1, 1, 2, 2, 2]), + (vec![IoSlice::new(&[1, 1, 1]), IoSlice::new(&[2, 2, 2, 2])], &[1, 1, 1, 2, 2, 2, 2]), + (vec![IoSlice::new(&[1, 1, 1, 1]), IoSlice::new(&[2, 2, 2, 2])], &[1, 1, 1, 1, 2, 2, 2, 2]), + (vec![IoSlice::new(&[1]), IoSlice::new(&[2]), IoSlice::new(&[3])], &[1, 2, 3]), + (vec![IoSlice::new(&[1, 1]), IoSlice::new(&[2, 2]), IoSlice::new(&[3, 3])], &[1, 1, 2, 2, 3, 3]), + (vec![IoSlice::new(&[1]), IoSlice::new(&[2, 2]), IoSlice::new(&[3, 3, 3])], &[1, 2, 2, 3, 3, 3]), + (vec![IoSlice::new(&[1, 1, 1]), IoSlice::new(&[2, 2, 2]), IoSlice::new(&[3, 3, 3])], &[1, 1, 1, 2, 2, 2, 3, 3, 3]), + ]; + + let writer_configs = &[(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]; + + for (n_bufs, per_call) in writer_configs.iter().copied() { + for (mut input, wanted) in tests.clone().into_iter() { + let mut writer = test_writer(n_bufs, per_call); + assert!(writer.write_all_vectored(&mut *input).is_ok()); + assert_eq!(&*writer.written, &*wanted); + } + } +} + +// Issue 94981 +#[test] +#[should_panic = "number of read bytes exceeds limit"] +fn test_take_wrong_length() { + struct LieAboutSize(bool); + + impl Read for LieAboutSize { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // Lie about the read size at first time of read. + if core::mem::take(&mut self.0) { Ok(buf.len() + 1) } else { Ok(buf.len()) } + } + } + + let mut buffer = vec![0; 4]; + let mut reader = LieAboutSize(true).take(4); + // Primed the `Limit` by lying about the read size. + let _ = reader.read(&mut buffer[..]); +} + +#[test] +fn slice_read_exact_eof() { + let slice = &b"123456"[..]; + + let mut r = slice; + assert!(r.read_exact(&mut [0; 10]).is_err()); + assert!(r.is_empty()); + + let mut r = slice; + let buf = &mut [0; 10]; + let mut buf = BorrowedBuf::from(buf.as_mut_slice()); + assert!(r.read_buf_exact(buf.unfilled()).is_err()); + assert!(r.is_empty()); + assert_eq!(buf.filled(), b"123456"); +} + +#[test] +fn cursor_read_exact_eof() { + let slice = Cursor::new(b"123456"); + + let mut r = slice.clone(); + assert!(r.read_exact(&mut [0; 10]).is_err()); + assert!(Cursor::split(&r).1.is_empty()); + + let mut r = slice; + let buf = &mut [0; 10]; + let mut buf = BorrowedBuf::from(buf.as_mut_slice()); + assert!(r.read_buf_exact(buf.unfilled()).is_err()); + assert!(Cursor::split(&r).1.is_empty()); + assert_eq!(buf.filled(), b"123456"); +} + +#[bench] +fn bench_take_read(b: &mut test::Bencher) { + b.iter(|| { + let mut buf = [0; 64]; + + [255; 128].take(64).read(&mut buf).unwrap(); + }); +} + +#[bench] +fn bench_take_read_buf(b: &mut test::Bencher) { + b.iter(|| { + let buf: &mut [_] = &mut [MaybeUninit::uninit(); 64]; + + let mut buf: BorrowedBuf<'_> = buf.into(); + + [255; 128].take(64).read_buf(buf.unfilled()).unwrap(); + }); +} + +// Issue #120603 +#[test] +#[should_panic] +fn read_buf_broken_read() { + struct MalformedRead; + + impl Read for MalformedRead { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + // broken length calculation + Ok(buf.len() + 1) + } + } + + let _ = BufReader::new(MalformedRead).fill_buf(); +} + +#[test] +fn read_buf_full_read() { + struct FullRead; + + impl Read for FullRead { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + Ok(buf.len()) + } + } + + assert_eq!(BufReader::new(FullRead).fill_buf().unwrap().len(), DEFAULT_BUF_SIZE); +} + +struct DataAndErrorReader(&'static [u8]); + +impl Read for DataAndErrorReader { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + panic!("We want tests to use `read_buf`") + } + + fn read_buf(&mut self, buf: io::BorrowedCursor<'_>) -> io::Result<()> { + self.0.read_buf(buf).unwrap(); + Err(io::Error::other("error")) + } +} + +#[test] +fn read_buf_data_and_error_take() { + let mut buf = [0; 64]; + let mut buf = io::BorrowedBuf::from(buf.as_mut_slice()); + + let mut r = DataAndErrorReader(&[4, 5, 6]).take(1); + assert!(r.read_buf(buf.unfilled()).is_err()); + assert_eq!(buf.filled(), &[4]); + + assert!(r.read_buf(buf.unfilled()).is_ok()); + assert_eq!(buf.filled(), &[4]); + assert_eq!(r.get_ref().0, &[5, 6]); +} + +#[test] +fn read_buf_data_and_error_buf() { + let mut r = BufReader::new(DataAndErrorReader(&[4, 5, 6])); + + assert!(r.fill_buf().is_err()); + assert_eq!(r.fill_buf().unwrap(), &[4, 5, 6]); +} + +#[test] +fn read_buf_data_and_error_read_to_end() { + let mut r = DataAndErrorReader(&[4, 5, 6]); + + let mut v = Vec::with_capacity(200); + assert!(r.read_to_end(&mut v).is_err()); + + assert_eq!(v, &[4, 5, 6]); +} + +#[test] +fn read_to_end_error() { + struct ErrorReader; + + impl Read for ErrorReader { + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + Err(io::Error::other("error")) + } + } + + let mut r = [4, 5, 6].chain(ErrorReader); + + let mut v = Vec::with_capacity(200); + assert!(r.read_to_end(&mut v).is_err()); + + assert_eq!(v, &[4, 5, 6]); +} + +#[test] +fn try_oom_error() { + use alloc::alloc::Layout; + use alloc::collections::{TryReserveError, TryReserveErrorKind}; + + // We simulate a `Vec::try_reserve` error rather than attempting a huge size for real. This way + // we're not subject to the whims of optimization that might skip the actual allocation, and it + // also works for 32-bit targets and miri that might not OOM at all. + let layout = Layout::new::(); + let kind = TryReserveErrorKind::AllocError { layout, non_exhaustive: () }; + let reserve_err = TryReserveError::from(kind); + + let io_err = io::Error::from(reserve_err); + assert_eq!(io::ErrorKind::OutOfMemory, io_err.kind()); +} diff --git a/crates/std/src/io/util.rs b/crates/std/src/io/util.rs new file mode 100644 index 0000000..0410df3 --- /dev/null +++ b/crates/std/src/io/util.rs @@ -0,0 +1,448 @@ +#![allow(missing_copy_implementations)] + +#[cfg(test)] +mod tests; + +use crate::fmt; +use crate::io::{ + self, BorrowedCursor, BufRead, IoSlice, IoSliceMut, Read, Seek, SeekFrom, SizeHint, Write, +}; + +/// `Empty` ignores any data written via [`Write`], and will always be empty +/// (returning zero bytes) when read via [`Read`]. +/// +/// This struct is generally created by calling [`empty()`]. Please +/// see the documentation of [`empty()`] for more details. +#[stable(feature = "rust1", since = "1.0.0")] +#[non_exhaustive] +#[derive(Copy, Clone, Debug, Default)] +pub struct Empty; + +/// Creates a value that is always at EOF for reads, and ignores all data written. +/// +/// All calls to [`write`] on the returned instance will return [`Ok(buf.len())`] +/// and the contents of the buffer will not be inspected. +/// +/// All calls to [`read`] from the returned reader will return [`Ok(0)`]. +/// +/// [`Ok(buf.len())`]: Ok +/// [`Ok(0)`]: Ok +/// +/// [`write`]: Write::write +/// [`read`]: Read::read +/// +/// # Examples +/// +/// ```rust +/// use std::io::{self, Write}; +/// +/// let buffer = vec![1, 2, 3, 5, 8]; +/// let num_bytes = io::empty().write(&buffer).unwrap(); +/// assert_eq!(num_bytes, 5); +/// ``` +/// +/// +/// ```rust +/// use std::io::{self, Read}; +/// +/// let mut buffer = String::new(); +/// io::empty().read_to_string(&mut buffer).unwrap(); +/// assert!(buffer.is_empty()); +/// ``` +#[must_use] +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_stable(feature = "const_io_structs", since = "1.79.0")] +pub const fn empty() -> Empty { + Empty +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Read for Empty { + #[inline] + fn read(&mut self, _buf: &mut [u8]) -> io::Result { + Ok(0) + } + + #[inline] + fn read_buf(&mut self, _cursor: BorrowedCursor<'_>) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn read_vectored(&mut self, _bufs: &mut [IoSliceMut<'_>]) -> io::Result { + Ok(0) + } + + #[inline] + fn is_read_vectored(&self) -> bool { + // Do not force `Chain` or `Chain` to use vectored + // reads, unless the other reader is vectored. + false + } + + #[inline] + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + if !buf.is_empty() { Err(io::Error::READ_EXACT_EOF) } else { Ok(()) } + } + + #[inline] + fn read_buf_exact(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> { + if cursor.capacity() != 0 { Err(io::Error::READ_EXACT_EOF) } else { Ok(()) } + } + + #[inline] + fn read_to_end(&mut self, _buf: &mut Vec) -> io::Result { + Ok(0) + } + + #[inline] + fn read_to_string(&mut self, _buf: &mut String) -> io::Result { + Ok(0) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl BufRead for Empty { + #[inline] + fn fill_buf(&mut self) -> io::Result<&[u8]> { + Ok(&[]) + } + + #[inline] + fn consume(&mut self, _n: usize) {} + + #[inline] + fn has_data_left(&mut self) -> io::Result { + Ok(false) + } + + #[inline] + fn read_until(&mut self, _byte: u8, _buf: &mut Vec) -> io::Result { + Ok(0) + } + + #[inline] + fn skip_until(&mut self, _byte: u8) -> io::Result { + Ok(0) + } + + #[inline] + fn read_line(&mut self, _buf: &mut String) -> io::Result { + Ok(0) + } +} + +#[stable(feature = "empty_seek", since = "1.51.0")] +impl Seek for Empty { + #[inline] + fn seek(&mut self, _pos: SeekFrom) -> io::Result { + Ok(0) + } + + #[inline] + fn stream_len(&mut self) -> io::Result { + Ok(0) + } + + #[inline] + fn stream_position(&mut self) -> io::Result { + Ok(0) + } +} + +impl SizeHint for Empty { + #[inline] + fn upper_bound(&self) -> Option { + Some(0) + } +} + +#[stable(feature = "empty_write", since = "1.73.0")] +impl Write for Empty { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + Ok(buf.len()) + } + + #[inline] + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + let total_len = bufs.iter().map(|b| b.len()).sum(); + Ok(total_len) + } + + #[inline] + fn is_write_vectored(&self) -> bool { + true + } + + #[inline] + fn write_all(&mut self, _buf: &[u8]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_all_vectored(&mut self, _bufs: &mut [IoSlice<'_>]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_fmt(&mut self, _args: fmt::Arguments<'_>) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +#[stable(feature = "empty_write", since = "1.73.0")] +impl Write for &Empty { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + Ok(buf.len()) + } + + #[inline] + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + let total_len = bufs.iter().map(|b| b.len()).sum(); + Ok(total_len) + } + + #[inline] + fn is_write_vectored(&self) -> bool { + true + } + + #[inline] + fn write_all(&mut self, _buf: &[u8]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_all_vectored(&mut self, _bufs: &mut [IoSlice<'_>]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_fmt(&mut self, _args: fmt::Arguments<'_>) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +/// A reader which yields one byte over and over and over and over and over and... +/// +/// This struct is generally created by calling [`repeat()`]. Please +/// see the documentation of [`repeat()`] for more details. +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Repeat { + byte: u8, +} + +/// Creates an instance of a reader that infinitely repeats one byte. +/// +/// All reads from this reader will succeed by filling the specified buffer with +/// the given byte. +/// +/// # Examples +/// +/// ``` +/// use std::io::{self, Read}; +/// +/// let mut buffer = [0; 3]; +/// io::repeat(0b101).read_exact(&mut buffer).unwrap(); +/// assert_eq!(buffer, [0b101, 0b101, 0b101]); +/// ``` +#[must_use] +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_stable(feature = "const_io_structs", since = "1.79.0")] +pub const fn repeat(byte: u8) -> Repeat { + Repeat { byte } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Read for Repeat { + #[inline] + fn read(&mut self, buf: &mut [u8]) -> io::Result { + buf.fill(self.byte); + Ok(buf.len()) + } + + #[inline] + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + buf.fill(self.byte); + Ok(()) + } + + #[inline] + fn read_buf(&mut self, mut buf: BorrowedCursor<'_>) -> io::Result<()> { + // SAFETY: No uninit bytes are being written. + unsafe { buf.as_mut() }.write_filled(self.byte); + // SAFETY: the entire unfilled portion of buf has been initialized. + unsafe { buf.advance_unchecked(buf.capacity()) }; + Ok(()) + } + + #[inline] + fn read_buf_exact(&mut self, buf: BorrowedCursor<'_>) -> io::Result<()> { + self.read_buf(buf) + } + + /// This function is not supported by `io::Repeat`, because there's no end of its data + fn read_to_end(&mut self, _: &mut Vec) -> io::Result { + Err(io::Error::from(io::ErrorKind::OutOfMemory)) + } + + /// This function is not supported by `io::Repeat`, because there's no end of its data + fn read_to_string(&mut self, _: &mut String) -> io::Result { + Err(io::Error::from(io::ErrorKind::OutOfMemory)) + } + + #[inline] + fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { + let mut nwritten = 0; + for buf in bufs { + nwritten += self.read(buf)?; + } + Ok(nwritten) + } + + #[inline] + fn is_read_vectored(&self) -> bool { + true + } +} + +impl SizeHint for Repeat { + #[inline] + fn lower_bound(&self) -> usize { + usize::MAX + } + + #[inline] + fn upper_bound(&self) -> Option { + None + } +} + +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for Repeat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Repeat").finish_non_exhaustive() + } +} + +/// A writer which will move data into the void. +/// +/// This struct is generally created by calling [`sink()`]. Please +/// see the documentation of [`sink()`] for more details. +#[stable(feature = "rust1", since = "1.0.0")] +#[non_exhaustive] +#[derive(Copy, Clone, Debug, Default)] +pub struct Sink; + +/// Creates an instance of a writer which will successfully consume all data. +/// +/// All calls to [`write`] on the returned instance will return [`Ok(buf.len())`] +/// and the contents of the buffer will not be inspected. +/// +/// [`write`]: Write::write +/// [`Ok(buf.len())`]: Ok +/// +/// # Examples +/// +/// ```rust +/// use std::io::{self, Write}; +/// +/// let buffer = vec![1, 2, 3, 5, 8]; +/// let num_bytes = io::sink().write(&buffer).unwrap(); +/// assert_eq!(num_bytes, 5); +/// ``` +#[must_use] +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_stable(feature = "const_io_structs", since = "1.79.0")] +pub const fn sink() -> Sink { + Sink +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Write for Sink { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + Ok(buf.len()) + } + + #[inline] + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + let total_len = bufs.iter().map(|b| b.len()).sum(); + Ok(total_len) + } + + #[inline] + fn is_write_vectored(&self) -> bool { + true + } + + #[inline] + fn write_all(&mut self, _buf: &[u8]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_all_vectored(&mut self, _bufs: &mut [IoSlice<'_>]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_fmt(&mut self, _args: fmt::Arguments<'_>) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +#[stable(feature = "write_mt", since = "1.48.0")] +impl Write for &Sink { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result { + Ok(buf.len()) + } + + #[inline] + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + let total_len = bufs.iter().map(|b| b.len()).sum(); + Ok(total_len) + } + + #[inline] + fn is_write_vectored(&self) -> bool { + true + } + + #[inline] + fn write_all(&mut self, _buf: &[u8]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_all_vectored(&mut self, _bufs: &mut [IoSlice<'_>]) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn write_fmt(&mut self, _args: fmt::Arguments<'_>) -> io::Result<()> { + Ok(()) + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} diff --git a/justfile b/justfile index 0aa8701..1e086f8 100644 --- a/justfile +++ b/justfile @@ -31,8 +31,19 @@ update_std: @just cp_std "io/error.rs" @just cp_std "io/error/repr_bitpacked.rs" @just cp_std "io/cursor.rs" + @just cp_std "io/cursor/tests.rs" @just cp_std "io/prelude.rs" @just cp_std "io/impls.rs" + @just cp_std "io/impls/tests.rs" + @just cp_std "io/tests.rs" + @just cp_std "io/util.rs" + @just cp_std "io/copy.rs" + @just cp_std "io/buffered/mod.rs" + @just cp_std "io/buffered/bufreader.rs" + @just cp_std "io/buffered/bufreader/buffer.rs" + @just cp_std "io/buffered/bufwriter.rs" + @just cp_std "io/buffered/linewriter.rs" + @just cp_std "io/buffered/linewritershim.rs" @just cp_std "hash/mod.rs" @just cp_std "hash/random.rs" @just cp_std "num/mod.rs"