Skip to main content

object/read/macho/
load_command.rs

1use core::marker::PhantomData;
2use core::mem;
3
4use crate::endian::{Endian, U32};
5use crate::macho;
6use crate::pod::Pod;
7use crate::read::macho::{ExportsTrieIterator, FunctionStartsIterator, MachHeader, SymbolTable};
8use crate::read::{Bytes, Error, ReadError, ReadRef, Result, StringTable};
9
10/// An iterator for the load commands from a [`MachHeader`].
11#[derive(Debug, Default, Clone, Copy)]
12pub struct LoadCommandIterator<'data, E: Endian> {
13    endian: E,
14    data: Bytes<'data>,
15    ncmds: u32,
16}
17
18impl<'data, E: Endian> LoadCommandIterator<'data, E> {
19    pub(super) fn new(endian: E, data: &'data [u8], ncmds: u32) -> Self {
20        LoadCommandIterator {
21            endian,
22            data: Bytes(data),
23            ncmds,
24        }
25    }
26
27    /// Return the next load command.
28    pub fn next(&mut self) -> Result<Option<LoadCommandData<'data, E>>> {
29        if self.ncmds == 0 {
30            return Ok(None);
31        }
32
33        let result = self.parse().map(Some);
34        if result.is_err() {
35            self.ncmds = 0;
36        } else {
37            self.ncmds -= 1;
38        }
39        result
40    }
41
42    fn parse(&mut self) -> Result<LoadCommandData<'data, E>> {
43        let header = self
44            .data
45            .read_at::<macho::LoadCommand<E>>(0)
46            .read_error("Invalid Mach-O load command header")?;
47        let cmd = header.cmd.get(self.endian);
48        let cmdsize = header.cmdsize.get(self.endian) as usize;
49        if cmdsize < mem::size_of::<macho::LoadCommand<E>>() {
50            return Err(Error("Invalid Mach-O load command size"));
51        }
52        let data = self
53            .data
54            .read_bytes(cmdsize)
55            .read_error("Invalid Mach-O load command size")?;
56        Ok(LoadCommandData {
57            cmd,
58            data,
59            marker: Default::default(),
60        })
61    }
62}
63
64impl<'data, E: Endian> Iterator for LoadCommandIterator<'data, E> {
65    type Item = Result<LoadCommandData<'data, E>>;
66
67    fn next(&mut self) -> Option<Self::Item> {
68        self.next().transpose()
69    }
70}
71
72/// The data for a [`macho::LoadCommand`].
73#[derive(Debug, Clone, Copy)]
74pub struct LoadCommandData<'data, E: Endian> {
75    cmd: u32,
76    // Includes the header.
77    data: Bytes<'data>,
78    marker: PhantomData<E>,
79}
80
81impl<'data, E: Endian> LoadCommandData<'data, E> {
82    /// Return the `cmd` field of the [`macho::LoadCommand`].
83    ///
84    /// This is one of the `LC_` constants.
85    pub fn cmd(&self) -> u32 {
86        self.cmd
87    }
88
89    /// Return the `cmdsize` field of the [`macho::LoadCommand`].
90    pub fn cmdsize(&self) -> u32 {
91        self.data.len() as u32
92    }
93
94    /// Parse the data as the given type.
95    #[inline]
96    pub fn data<T: Pod>(&self) -> Result<&'data T> {
97        self.data
98            .read_at(0)
99            .read_error("Invalid Mach-O command size")
100    }
101
102    /// Raw bytes of this [`macho::LoadCommand`] structure.
103    pub fn raw_data(&self) -> &'data [u8] {
104        self.data.0
105    }
106
107    /// Parse a load command string value.
108    ///
109    /// Strings used by load commands are specified by offsets that are
110    /// relative to the load command header.
111    pub fn string(&self, endian: E, s: macho::LcStr<E>) -> Result<&'data [u8]> {
112        self.data
113            .read_string_at(s.offset.get(endian) as usize)
114            .read_error("Invalid load command string offset")
115    }
116
117    /// Parse the command data according to the `cmd` field.
118    pub fn variant(&self) -> Result<LoadCommandVariant<'data, E>> {
119        Ok(match self.cmd {
120            macho::LC_SEGMENT => {
121                let mut data = self.data;
122                let segment = data.read().read_error("Invalid Mach-O command size")?;
123                LoadCommandVariant::Segment32(segment, data.0)
124            }
125            macho::LC_SYMTAB => LoadCommandVariant::Symtab(self.data()?),
126            macho::LC_THREAD | macho::LC_UNIXTHREAD => {
127                let mut data = self.data;
128                let thread = data.read().read_error("Invalid Mach-O command size")?;
129                LoadCommandVariant::Thread(thread, data.0)
130            }
131            macho::LC_DYSYMTAB => LoadCommandVariant::Dysymtab(self.data()?),
132            macho::LC_LOAD_DYLIB
133            | macho::LC_LOAD_WEAK_DYLIB
134            | macho::LC_REEXPORT_DYLIB
135            | macho::LC_LAZY_LOAD_DYLIB
136            | macho::LC_LOAD_UPWARD_DYLIB => LoadCommandVariant::Dylib(self.data()?),
137            macho::LC_ID_DYLIB => LoadCommandVariant::IdDylib(self.data()?),
138            macho::LC_LOAD_DYLINKER => LoadCommandVariant::LoadDylinker(self.data()?),
139            macho::LC_ID_DYLINKER => LoadCommandVariant::IdDylinker(self.data()?),
140            macho::LC_PREBOUND_DYLIB => LoadCommandVariant::PreboundDylib(self.data()?),
141            macho::LC_ROUTINES => LoadCommandVariant::Routines32(self.data()?),
142            macho::LC_SUB_FRAMEWORK => LoadCommandVariant::SubFramework(self.data()?),
143            macho::LC_SUB_UMBRELLA => LoadCommandVariant::SubUmbrella(self.data()?),
144            macho::LC_SUB_CLIENT => LoadCommandVariant::SubClient(self.data()?),
145            macho::LC_SUB_LIBRARY => LoadCommandVariant::SubLibrary(self.data()?),
146            macho::LC_TWOLEVEL_HINTS => LoadCommandVariant::TwolevelHints(self.data()?),
147            macho::LC_PREBIND_CKSUM => LoadCommandVariant::PrebindCksum(self.data()?),
148            macho::LC_SEGMENT_64 => {
149                let mut data = self.data;
150                let segment = data.read().read_error("Invalid Mach-O command size")?;
151                LoadCommandVariant::Segment64(segment, data.0)
152            }
153            macho::LC_ROUTINES_64 => LoadCommandVariant::Routines64(self.data()?),
154            macho::LC_UUID => LoadCommandVariant::Uuid(self.data()?),
155            macho::LC_RPATH => LoadCommandVariant::Rpath(self.data()?),
156            macho::LC_TARGET_TRIPLE => LoadCommandVariant::TargetTriple(self.data()?),
157            macho::LC_CODE_SIGNATURE
158            | macho::LC_SEGMENT_SPLIT_INFO
159            | macho::LC_FUNCTION_STARTS
160            | macho::LC_DATA_IN_CODE
161            | macho::LC_DYLIB_CODE_SIGN_DRS
162            | macho::LC_LINKER_OPTIMIZATION_HINT
163            | macho::LC_DYLD_EXPORTS_TRIE
164            | macho::LC_DYLD_CHAINED_FIXUPS => LoadCommandVariant::LinkeditData(self.data()?),
165            macho::LC_ENCRYPTION_INFO => LoadCommandVariant::EncryptionInfo32(self.data()?),
166            macho::LC_DYLD_INFO | macho::LC_DYLD_INFO_ONLY => {
167                LoadCommandVariant::DyldInfo(self.data()?)
168            }
169            macho::LC_VERSION_MIN_MACOSX
170            | macho::LC_VERSION_MIN_IPHONEOS
171            | macho::LC_VERSION_MIN_TVOS
172            | macho::LC_VERSION_MIN_WATCHOS => LoadCommandVariant::VersionMin(self.data()?),
173            macho::LC_DYLD_ENVIRONMENT => LoadCommandVariant::DyldEnvironment(self.data()?),
174            macho::LC_MAIN => LoadCommandVariant::EntryPoint(self.data()?),
175            macho::LC_SOURCE_VERSION => LoadCommandVariant::SourceVersion(self.data()?),
176            macho::LC_ENCRYPTION_INFO_64 => LoadCommandVariant::EncryptionInfo64(self.data()?),
177            macho::LC_LINKER_OPTION => LoadCommandVariant::LinkerOption(self.data()?),
178            macho::LC_NOTE => LoadCommandVariant::Note(self.data()?),
179            macho::LC_BUILD_VERSION => LoadCommandVariant::BuildVersion(self.data()?),
180            macho::LC_FILESET_ENTRY => LoadCommandVariant::FilesetEntry(self.data()?),
181            _ => LoadCommandVariant::Other,
182        })
183    }
184
185    /// Try to parse this command as a [`macho::SegmentCommand32`].
186    ///
187    /// Returns the segment command and the data containing the sections.
188    pub fn segment_32(self) -> Result<Option<(&'data macho::SegmentCommand32<E>, &'data [u8])>> {
189        if self.cmd == macho::LC_SEGMENT {
190            let mut data = self.data;
191            let segment = data.read().read_error("Invalid Mach-O command size")?;
192            Ok(Some((segment, data.0)))
193        } else {
194            Ok(None)
195        }
196    }
197
198    /// Try to parse this command as a [`macho::SymtabCommand`].
199    pub fn symtab(self) -> Result<Option<&'data macho::SymtabCommand<E>>> {
200        if self.cmd == macho::LC_SYMTAB {
201            Some(self.data()).transpose()
202        } else {
203            Ok(None)
204        }
205    }
206
207    /// Try to parse this command as a [`macho::DysymtabCommand`].
208    pub fn dysymtab(self) -> Result<Option<&'data macho::DysymtabCommand<E>>> {
209        if self.cmd == macho::LC_DYSYMTAB {
210            Some(self.data()).transpose()
211        } else {
212            Ok(None)
213        }
214    }
215
216    /// Try to parse this command as a [`macho::DylibCommand`].
217    ///
218    /// See also [`Self::dylib_use_flags`] to read the optional flags field.
219    pub fn dylib(self) -> Result<Option<&'data macho::DylibCommand<E>>> {
220        if self.cmd == macho::LC_LOAD_DYLIB
221            || self.cmd == macho::LC_LOAD_WEAK_DYLIB
222            || self.cmd == macho::LC_REEXPORT_DYLIB
223            || self.cmd == macho::LC_LAZY_LOAD_DYLIB
224            || self.cmd == macho::LC_LOAD_UPWARD_DYLIB
225        {
226            Some(self.data()).transpose()
227        } else {
228            Ok(None)
229        }
230    }
231
232    /// Parse the optional flags field for a dylib load command.
233    ///
234    /// [`macho::DylibCommand`] traditionally uses the load command type to distinguish
235    /// between dylib kinds. [`macho::DylibUseCommand`] replaces this by encoding the
236    /// kinds in a bitfield appended after the standard `DylibCommand` fields. Its
237    /// presence is signalled using sentinel values in some `DylibCommand` fields.
238    ///
239    /// Returns `None` if the sentinels are absent. If `Some` is returned, the value of
240    /// [`macho::Dylib::timestamp`] should be ignored.
241    pub fn dylib_use_flags(self, endian: E, dylib: &macho::DylibCommand<E>) -> Result<Option<u32>> {
242        if dylib.dylib.name.offset.get(endian) != 28 // size of DylibUseCommand
243            || dylib.dylib.timestamp.get(endian) != macho::DYLIB_USE_MARKER
244        {
245            return Ok(None);
246        }
247        Ok(Some(
248            self.data
249                .read_at::<U32<_>>(24) // offset of DylibUseCommand::flags
250                .read_error("Invalid dylib load command size")?
251                .get(endian),
252        ))
253    }
254
255    /// Try to parse this command as a [`macho::UuidCommand`].
256    pub fn uuid(self) -> Result<Option<&'data macho::UuidCommand<E>>> {
257        if self.cmd == macho::LC_UUID {
258            Some(self.data()).transpose()
259        } else {
260            Ok(None)
261        }
262    }
263
264    /// Try to parse this command as a [`macho::SegmentCommand64`].
265    pub fn segment_64(self) -> Result<Option<(&'data macho::SegmentCommand64<E>, &'data [u8])>> {
266        if self.cmd == macho::LC_SEGMENT_64 {
267            let mut data = self.data;
268            let command = data.read().read_error("Invalid Mach-O command size")?;
269            Ok(Some((command, data.0)))
270        } else {
271            Ok(None)
272        }
273    }
274
275    /// Try to parse this command as a [`macho::DyldInfoCommand`].
276    pub fn dyld_info(self) -> Result<Option<&'data macho::DyldInfoCommand<E>>> {
277        if self.cmd == macho::LC_DYLD_INFO || self.cmd == macho::LC_DYLD_INFO_ONLY {
278            Some(self.data()).transpose()
279        } else {
280            Ok(None)
281        }
282    }
283
284    /// Try to parse this command as an [`macho::EntryPointCommand`].
285    pub fn entry_point(self) -> Result<Option<&'data macho::EntryPointCommand<E>>> {
286        if self.cmd == macho::LC_MAIN {
287            Some(self.data()).transpose()
288        } else {
289            Ok(None)
290        }
291    }
292
293    /// Try to parse this command as an `LC_UNIXTHREAD` [`macho::ThreadCommand`].
294    ///
295    /// Returns the thread command and the thread state data that follows it.
296    pub fn unix_thread(self) -> Result<Option<(&'data macho::ThreadCommand<E>, &'data [u8])>> {
297        if self.cmd == macho::LC_UNIXTHREAD {
298            let mut data = self.data;
299            let thread = data.read().read_error("Invalid Mach-O command size")?;
300            Ok(Some((thread, data.0)))
301        } else {
302            Ok(None)
303        }
304    }
305
306    /// Try to parse this command as a [`macho::BuildVersionCommand`].
307    pub fn build_version(self) -> Result<Option<&'data macho::BuildVersionCommand<E>>> {
308        if self.cmd == macho::LC_BUILD_VERSION {
309            Some(self.data()).transpose()
310        } else {
311            Ok(None)
312        }
313    }
314}
315
316/// A [`macho::LoadCommand`] that has been interpreted according to its `cmd` field.
317#[derive(Debug, Clone, Copy)]
318#[non_exhaustive]
319pub enum LoadCommandVariant<'data, E: Endian> {
320    /// `LC_SEGMENT`
321    Segment32(&'data macho::SegmentCommand32<E>, &'data [u8]),
322    /// `LC_SYMTAB`
323    Symtab(&'data macho::SymtabCommand<E>),
324    // obsolete: `LC_SYMSEG`
325    //Symseg(&'data macho::SymsegCommand<E>),
326    /// `LC_THREAD` or `LC_UNIXTHREAD`
327    Thread(&'data macho::ThreadCommand<E>, &'data [u8]),
328    // obsolete: `LC_IDFVMLIB` or `LC_LOADFVMLIB`
329    //Fvmlib(&'data macho::FvmlibCommand<E>),
330    // obsolete: `LC_IDENT`
331    //Ident(&'data macho::IdentCommand<E>),
332    // internal: `LC_FVMFILE`
333    //Fvmfile(&'data macho::FvmfileCommand<E>),
334    // internal: `LC_PREPAGE`
335    /// `LC_DYSYMTAB`
336    Dysymtab(&'data macho::DysymtabCommand<E>),
337    /// `LC_LOAD_DYLIB`, `LC_LOAD_WEAK_DYLIB`, `LC_REEXPORT_DYLIB`,
338    /// `LC_LAZY_LOAD_DYLIB`, or `LC_LOAD_UPWARD_DYLIB`
339    ///
340    /// See also [`LoadCommandData::dylib_use_flags`] to read the optional flags field.
341    Dylib(&'data macho::DylibCommand<E>),
342    /// `LC_ID_DYLIB`
343    IdDylib(&'data macho::DylibCommand<E>),
344    /// `LC_LOAD_DYLINKER`
345    LoadDylinker(&'data macho::DylinkerCommand<E>),
346    /// `LC_ID_DYLINKER`
347    IdDylinker(&'data macho::DylinkerCommand<E>),
348    /// `LC_PREBOUND_DYLIB`
349    PreboundDylib(&'data macho::PreboundDylibCommand<E>),
350    /// `LC_ROUTINES`
351    Routines32(&'data macho::RoutinesCommand32<E>),
352    /// `LC_SUB_FRAMEWORK`
353    SubFramework(&'data macho::SubFrameworkCommand<E>),
354    /// `LC_SUB_UMBRELLA`
355    SubUmbrella(&'data macho::SubUmbrellaCommand<E>),
356    /// `LC_SUB_CLIENT`
357    SubClient(&'data macho::SubClientCommand<E>),
358    /// `LC_SUB_LIBRARY`
359    SubLibrary(&'data macho::SubLibraryCommand<E>),
360    /// `LC_TWOLEVEL_HINTS`
361    TwolevelHints(&'data macho::TwolevelHintsCommand<E>),
362    /// `LC_PREBIND_CKSUM`
363    PrebindCksum(&'data macho::PrebindCksumCommand<E>),
364    /// `LC_SEGMENT_64`
365    Segment64(&'data macho::SegmentCommand64<E>, &'data [u8]),
366    /// `LC_ROUTINES_64`
367    Routines64(&'data macho::RoutinesCommand64<E>),
368    /// `LC_UUID`
369    Uuid(&'data macho::UuidCommand<E>),
370    /// `LC_RPATH`
371    Rpath(&'data macho::RpathCommand<E>),
372    /// `LC_TARGET_TRIPLE`
373    TargetTriple(&'data macho::TargetTripleCommand<E>),
374    /// `LC_CODE_SIGNATURE`, `LC_SEGMENT_SPLIT_INFO`, `LC_FUNCTION_STARTS`,
375    /// `LC_DATA_IN_CODE`, `LC_DYLIB_CODE_SIGN_DRS`, `LC_LINKER_OPTIMIZATION_HINT`,
376    /// `LC_DYLD_EXPORTS_TRIE`, or `LC_DYLD_CHAINED_FIXUPS`.
377    LinkeditData(&'data macho::LinkeditDataCommand<E>),
378    /// `LC_ENCRYPTION_INFO`
379    EncryptionInfo32(&'data macho::EncryptionInfoCommand32<E>),
380    /// `LC_DYLD_INFO` or `LC_DYLD_INFO_ONLY`
381    DyldInfo(&'data macho::DyldInfoCommand<E>),
382    /// `LC_VERSION_MIN_MACOSX`, `LC_VERSION_MIN_IPHONEOS`, `LC_VERSION_MIN_WATCHOS`,
383    /// or `LC_VERSION_MIN_TVOS`
384    VersionMin(&'data macho::VersionMinCommand<E>),
385    /// `LC_DYLD_ENVIRONMENT`
386    DyldEnvironment(&'data macho::DylinkerCommand<E>),
387    /// `LC_MAIN`
388    EntryPoint(&'data macho::EntryPointCommand<E>),
389    /// `LC_SOURCE_VERSION`
390    SourceVersion(&'data macho::SourceVersionCommand<E>),
391    /// `LC_ENCRYPTION_INFO_64`
392    EncryptionInfo64(&'data macho::EncryptionInfoCommand64<E>),
393    /// `LC_LINKER_OPTION`
394    LinkerOption(&'data macho::LinkerOptionCommand<E>),
395    /// `LC_NOTE`
396    Note(&'data macho::NoteCommand<E>),
397    /// `LC_BUILD_VERSION`
398    BuildVersion(&'data macho::BuildVersionCommand<E>),
399    /// `LC_FILESET_ENTRY`
400    FilesetEntry(&'data macho::FilesetEntryCommand<E>),
401    /// An unrecognized or obsolete load command.
402    Other,
403}
404
405impl<E: Endian> macho::SymtabCommand<E> {
406    /// Return the symbol table that this command references.
407    pub fn symbols<'data, Mach: MachHeader<Endian = E>, R: ReadRef<'data>>(
408        &self,
409        endian: E,
410        data: R,
411    ) -> Result<SymbolTable<'data, Mach, R>> {
412        let symbols = data
413            .read_slice_at(
414                self.symoff.get(endian).into(),
415                self.nsyms.get(endian) as usize,
416            )
417            .read_error("Invalid Mach-O symbol table offset or size")?;
418        let str_start: u64 = self.stroff.get(endian).into();
419        let str_end = str_start
420            .checked_add(self.strsize.get(endian).into())
421            .read_error("Invalid Mach-O string table length")?;
422        let strings = StringTable::new(data, str_start, str_end);
423        Ok(SymbolTable::new(symbols, strings))
424    }
425}
426
427impl<E: Endian> macho::DysymtabCommand<E> {
428    /// Return the table of indirect symbol indexes.
429    ///
430    /// Entries in this table are referenced by the `reserved1` field
431    /// in sections that contain symbol pointers or stubs.
432    ///
433    /// Each entry is an index into the symbol table.
434    pub fn indirect_symbols<'data, R: ReadRef<'data>>(
435        &self,
436        endian: E,
437        data: R,
438    ) -> Result<&'data [U32<E>]> {
439        data.read_slice_at(
440            self.indirectsymoff.get(endian).into(),
441            self.nindirectsyms.get(endian) as usize,
442        )
443        .read_error("Invalid Mach-O indirect symbol offset or count")
444    }
445}
446
447impl<E: Endian> macho::LinkeditDataCommand<E> {
448    /// Return an iterator over the function start addresses.
449    ///
450    /// Only works if the command is a `LC_FUNCTION_STARTS` command.
451    ///
452    /// # Arguments
453    /// * `text_segment_addr` - The VM address of the __TEXT segment.
454    pub fn function_starts<'data, R: ReadRef<'data>>(
455        &self,
456        endian: E,
457        data: R,
458        text_segment_addr: u64,
459    ) -> Result<FunctionStartsIterator<'data>> {
460        if self.cmd.get(endian) != macho::LC_FUNCTION_STARTS {
461            return Err(Error("Not a function starts command"));
462        }
463        let data = data
464            .read_bytes_at(
465                self.dataoff.get(endian).into(),
466                self.datasize.get(endian).into(),
467            )
468            .read_error("Invalid function starts offset or size")?;
469        Ok(FunctionStartsIterator::new(data, text_segment_addr))
470    }
471
472    /// Return an iterator over the exports trie.
473    ///
474    /// Only works if the command is a `LC_DYLD_EXPORTS_TRIE` command.
475    pub fn exports_trie<'data, R: ReadRef<'data>>(
476        &self,
477        endian: E,
478        data: R,
479    ) -> Result<ExportsTrieIterator<'data>> {
480        if self.cmd.get(endian) != macho::LC_DYLD_EXPORTS_TRIE {
481            return Err(Error("Not an exports trie command"));
482        }
483        let data = data
484            .read_bytes_at(
485                self.dataoff.get(endian).into(),
486                self.datasize.get(endian).into(),
487            )
488            .read_error("Invalid exports trie offset or size")?;
489        Ok(ExportsTrieIterator::new(data))
490    }
491}
492
493#[cfg(test)]
494mod tests {
495    use super::*;
496    use crate::LittleEndian;
497
498    #[test]
499    fn cmd_size_invalid() {
500        #[repr(align(16))]
501        struct Align<const N: usize>([u8; N]);
502        let mut commands = LoadCommandIterator::new(LittleEndian, &Align([0; 8]).0, 10);
503        assert!(commands.next().is_err());
504        let mut commands =
505            LoadCommandIterator::new(LittleEndian, &Align([0, 0, 0, 0, 7, 0, 0, 0, 0]).0, 10);
506        assert!(commands.next().is_err());
507        let mut commands =
508            LoadCommandIterator::new(LittleEndian, &Align([0, 0, 0, 0, 8, 0, 0, 0, 0]).0, 10);
509        assert!(commands.next().is_ok());
510    }
511
512    #[test]
513    fn function_starts_invalid_uleb128() {
514        use crate::macho;
515
516        // Invalid ULEB128: continuation bit set but no following byte
517        let data = [0x80];
518
519        let cmd = macho::LinkeditDataCommand {
520            cmd: macho::LC_FUNCTION_STARTS.into(),
521            cmdsize: 16.into(),
522            dataoff: 0.into(),
523            datasize: (data.len() as u32).into(),
524        };
525
526        let mut iter = cmd.function_starts(LittleEndian, &data[..], 0).unwrap();
527
528        // First call returns error
529        assert!(iter.next().is_err());
530        // Second call returns None (iterator exhausted)
531        assert!(iter.next().transpose().is_none());
532    }
533}