Skip to main content

object/read/
read_ref.rs

1#![allow(clippy::len_without_is_empty)]
2
3use core::convert::TryInto;
4use core::ops::Range;
5use core::{mem, result};
6
7use crate::pod::{from_bytes, slice_from_bytes, Pod};
8
9type Result<T> = result::Result<T, ()>;
10
11/// A trait for reading references to [`Pod`] types from a block of data.
12///
13/// This allows parsers to handle both of these cases:
14/// - the block of data exists in memory, and it is desirable
15///   to use references to this block instead of copying it,
16/// - the block of data exists in storage, and it is desirable
17///   to read on demand to minimize I/O and memory usage.
18///
19/// A block of data typically exists in memory as a result of using a memory
20/// mapped file, and the crate was written with this use case in mind.
21/// Reading the entire file into a `Vec` is also possible, but it often uses
22/// more I/O and memory.
23/// Both of these are handled by the `ReadRef` implementation for `&[u8]`.
24///
25/// For the second use case, the `ReadRef` trait is implemented for
26/// [`&ReadCache`](super::ReadCache). This is useful for environments where
27/// memory mapped files are not available or not suitable, such as WebAssembly.
28/// This differs from reading into a `Vec` in that it only reads the portions
29/// of the file that are needed for parsing.
30///
31/// The methods accept `self` by value because `Self` is expected to behave
32/// similar to a reference: it may be a reference with a lifetime of `'a`,
33/// or it may be a wrapper of a reference.
34///
35/// The `Clone` and `Copy` bounds are for convenience, and since `Self` is
36/// expected to be similar to a reference, these are easily satisfied.
37///
38/// Object file parsers typically use offsets to locate the structures
39/// in the block, and will most commonly use the `*_at` methods to
40/// read a structure at a known offset.
41///
42/// Occasionally file parsers will need to treat the block as a stream,
43/// and so convenience methods are provided that update an offset with
44/// the size that was read.
45//
46// An alternative would be for methods to accept `&mut self` and use a
47// `seek` method instead of the `offset` parameters, but this is less
48// convenient for implementers.
49pub trait ReadRef<'a>: Clone + Copy {
50    /// The total size of the block of data.
51    fn len(self) -> Result<u64>;
52
53    /// Get a reference to a `u8` slice at the given offset.
54    ///
55    /// Returns an error if offset or size are out of bounds.
56    fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8]>;
57
58    /// Get a reference to a delimited `u8` slice which starts at range.start.
59    ///
60    /// Does not include the delimiter.
61    ///
62    /// Returns an error if the range is out of bounds or the delimiter is
63    /// not found in the range.
64    fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8]>;
65
66    /// Get a reference to a `u8` slice at the given offset, and update the offset.
67    ///
68    /// Returns an error if offset or size are out of bounds.
69    fn read_bytes(self, offset: &mut u64, size: u64) -> Result<&'a [u8]> {
70        let bytes = self.read_bytes_at(*offset, size)?;
71        *offset = offset.wrapping_add(size);
72        Ok(bytes)
73    }
74
75    /// Get a reference to a `Pod` type at the given offset, and update the offset.
76    ///
77    /// Returns an error if offset or size are out of bounds.
78    ///
79    /// The default implementation uses `read_bytes`, and returns an error if
80    /// `read_bytes` does not return bytes with the correct alignment for `T`.
81    /// Implementors may want to provide their own implementation that ensures
82    /// the alignment can be satisfied. Alternatively, only use this method with
83    /// types that do not need alignment. The types provided by this crate do not
84    /// need alignment.
85    fn read<T: Pod>(self, offset: &mut u64) -> Result<&'a T> {
86        let size = mem::size_of::<T>().try_into().map_err(|_| ())?;
87        let bytes = self.read_bytes(offset, size)?;
88        let (t, _) = from_bytes(bytes)?;
89        Ok(t)
90    }
91
92    /// Get a reference to a `Pod` type at the given offset.
93    ///
94    /// Returns an error if offset or size are out of bounds.
95    ///
96    /// Also see the `read` method for information regarding alignment of `T`.
97    fn read_at<T: Pod>(self, mut offset: u64) -> Result<&'a T> {
98        self.read(&mut offset)
99    }
100
101    /// Get a reference to a slice of a `Pod` type at the given offset, and update the offset.
102    ///
103    /// Returns an error if offset or size are out of bounds.
104    ///
105    /// Also see the `read` method for information regarding alignment of `T`.
106    fn read_slice<T: Pod>(self, offset: &mut u64, count: usize) -> Result<&'a [T]> {
107        let size = count
108            .checked_mul(mem::size_of::<T>())
109            .ok_or(())?
110            .try_into()
111            .map_err(|_| ())?;
112        let bytes = self.read_bytes(offset, size)?;
113        let (t, _) = slice_from_bytes(bytes, count)?;
114        Ok(t)
115    }
116
117    /// Get a reference to a slice of a `Pod` type at the given offset.
118    ///
119    /// Returns an error if offset or size are out of bounds.
120    ///
121    /// Also see the `read` method for information regarding alignment of `T`.
122    fn read_slice_at<T: Pod>(self, mut offset: u64, count: usize) -> Result<&'a [T]> {
123        self.read_slice(&mut offset, count)
124    }
125}
126
127impl<'a> ReadRef<'a> for &'a [u8] {
128    fn len(self) -> Result<u64> {
129        self.len().try_into().map_err(|_| ())
130    }
131
132    fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8]> {
133        if size == 0 {
134            return Ok(&[]);
135        }
136
137        let offset: usize = offset.try_into().map_err(|_| ())?;
138        let size: usize = size.try_into().map_err(|_| ())?;
139        self.get(offset..).ok_or(())?.get(..size).ok_or(())
140    }
141
142    fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8]> {
143        let start: usize = range.start.try_into().map_err(|_| ())?;
144        let end: usize = range.end.try_into().map_err(|_| ())?;
145        let bytes = self.get(start..end).ok_or(())?;
146        match memchr::memchr(delimiter, bytes) {
147            Some(len) => {
148                // This will never fail.
149                bytes.get(..len).ok_or(())
150            }
151            None => Err(()),
152        }
153    }
154}