Skip to main content

object/read/macho/
symbol.rs

1use alloc::vec::Vec;
2use core::fmt::Debug;
3use core::{fmt, slice, str};
4
5use crate::endian::{self, Endianness};
6use crate::macho;
7use crate::pod::Pod;
8use crate::read::util::StringTable;
9use crate::read::{
10    self, ObjectMap, ObjectMapEntry, ObjectMapFile, ObjectSymbol, ObjectSymbolTable, ReadError,
11    ReadRef, Result, SectionIndex, SectionKind, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap,
12    SymbolMapEntry, SymbolScope, SymbolSection,
13};
14
15use super::{MachHeader, MachOFile, Section};
16
17/// A table of symbol entries in a Mach-O file.
18///
19/// Also includes the string table used for the symbol names.
20///
21/// Returned by [`macho::SymtabCommand::symbols`].
22#[derive(Debug, Clone, Copy)]
23pub struct SymbolTable<'data, Mach: MachHeader, R = &'data [u8]>
24where
25    R: ReadRef<'data>,
26{
27    symbols: &'data [Mach::Nlist],
28    strings: StringTable<'data, R>,
29}
30
31impl<'data, Mach: MachHeader, R: ReadRef<'data>> Default for SymbolTable<'data, Mach, R> {
32    fn default() -> Self {
33        SymbolTable {
34            symbols: &[],
35            strings: Default::default(),
36        }
37    }
38}
39
40impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> {
41    #[inline]
42    pub(super) fn new(symbols: &'data [Mach::Nlist], strings: StringTable<'data, R>) -> Self {
43        SymbolTable { symbols, strings }
44    }
45
46    /// Return the string table used for the symbol names.
47    #[inline]
48    pub fn strings(&self) -> StringTable<'data, R> {
49        self.strings
50    }
51
52    /// Iterate over the symbols.
53    #[inline]
54    pub fn iter(&self) -> slice::Iter<'data, Mach::Nlist> {
55        self.symbols.iter()
56    }
57
58    /// Return true if the symbol table is empty.
59    #[inline]
60    pub fn is_empty(&self) -> bool {
61        self.symbols.is_empty()
62    }
63
64    /// The number of symbols.
65    #[inline]
66    pub fn len(&self) -> usize {
67        self.symbols.len()
68    }
69
70    /// Return the symbol at the given index.
71    pub fn symbol(&self, index: SymbolIndex) -> Result<&'data Mach::Nlist> {
72        self.symbols
73            .get(index.0)
74            .read_error("Invalid Mach-O symbol index")
75    }
76
77    /// Construct a map from addresses to a user-defined map entry.
78    pub fn map<Entry: SymbolMapEntry, F: Fn(&'data Mach::Nlist) -> Option<Entry>>(
79        &self,
80        f: F,
81    ) -> SymbolMap<Entry> {
82        let mut symbols = Vec::new();
83        for nlist in self.symbols {
84            if !nlist.is_definition() {
85                continue;
86            }
87            if let Some(entry) = f(nlist) {
88                symbols.push(entry);
89            }
90        }
91        SymbolMap::new(symbols)
92    }
93
94    /// Construct a map from addresses to symbol names and object file names.
95    pub fn object_map(&self, endian: Mach::Endian) -> ObjectMap<'data> {
96        let mut symbols = Vec::new();
97        let mut objects = Vec::new();
98        let mut object = None;
99        let mut current_function = None;
100        // Each module starts with one or two N_SO symbols (path, or directory + filename)
101        // and one N_OSO symbol. The module is terminated by an empty N_SO symbol.
102        for nlist in self.symbols {
103            let n_type = nlist.n_type();
104            if n_type & macho::N_STAB == 0 {
105                continue;
106            }
107            // TODO: includes global symbols too (N_GSYM). These may need to get their
108            // address from regular symbols though.
109            match n_type {
110                macho::N_SO => {
111                    object = None;
112                }
113                macho::N_OSO => {
114                    object = None;
115                    if let Ok(name) = nlist.name(endian, self.strings) {
116                        if !name.is_empty() {
117                            object = Some(objects.len());
118                            // `N_OSO` symbol names can be either `/path/to/object.o`
119                            // or `/path/to/archive.a(object.o)`.
120                            let (path, member) = name
121                                .split_last()
122                                .and_then(|(last, head)| {
123                                    if *last != b')' {
124                                        return None;
125                                    }
126                                    let index = head.iter().position(|&x| x == b'(')?;
127                                    let (archive, rest) = head.split_at(index);
128                                    Some((archive, Some(&rest[1..])))
129                                })
130                                .unwrap_or((name, None));
131                            objects.push(ObjectMapFile::new(path, member));
132                        }
133                    }
134                }
135                macho::N_FUN => {
136                    if let Ok(name) = nlist.name(endian, self.strings) {
137                        if !name.is_empty() {
138                            current_function = Some((name, nlist.n_value(endian).into()))
139                        } else if let Some((name, address)) = current_function.take() {
140                            if let Some(object) = object {
141                                symbols.push(ObjectMapEntry::new(
142                                    address,
143                                    nlist.n_value(endian).into(),
144                                    name,
145                                    object,
146                                ));
147                            }
148                        }
149                    }
150                }
151                macho::N_STSYM => {
152                    // Static symbols have a single entry with the address of the symbol
153                    // but no size
154                    if let Ok(name) = nlist.name(endian, self.strings) {
155                        if let Some(object) = object {
156                            symbols.push(ObjectMapEntry::new(
157                                nlist.n_value(endian).into(),
158                                0,
159                                name,
160                                object,
161                            ));
162                        }
163                    }
164                }
165                _ => {}
166            }
167        }
168        ObjectMap::new(symbols, objects)
169    }
170}
171
172/// A symbol table in a [`MachOFile32`](super::MachOFile32).
173pub type MachOSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
174    MachOSymbolTable<'data, 'file, macho::MachHeader32<Endian>, R>;
175/// A symbol table in a [`MachOFile64`](super::MachOFile64).
176pub type MachOSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
177    MachOSymbolTable<'data, 'file, macho::MachHeader64<Endian>, R>;
178
179/// A symbol table in a [`MachOFile`].
180#[derive(Debug, Clone, Copy)]
181pub struct MachOSymbolTable<'data, 'file, Mach, R = &'data [u8]>
182where
183    Mach: MachHeader,
184    R: ReadRef<'data>,
185{
186    pub(super) file: &'file MachOFile<'data, Mach, R>,
187}
188
189impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbolTable<'data, 'file, Mach, R>
190where
191    Mach: MachHeader,
192    R: ReadRef<'data>,
193{
194}
195
196impl<'data, 'file, Mach, R> ObjectSymbolTable<'data> for MachOSymbolTable<'data, 'file, Mach, R>
197where
198    Mach: MachHeader,
199    R: ReadRef<'data>,
200{
201    type Symbol = MachOSymbol<'data, 'file, Mach, R>;
202    type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>;
203
204    fn symbols(&self) -> Self::SymbolIterator {
205        MachOSymbolIterator::new(self.file)
206    }
207
208    fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol> {
209        let nlist = self.file.symbols.symbol(index)?;
210        MachOSymbol::new(self.file, index, nlist).read_error("Unsupported Mach-O symbol index")
211    }
212}
213
214/// An iterator for the symbols in a [`MachOFile32`](super::MachOFile32).
215pub type MachOSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
216    MachOSymbolIterator<'data, 'file, macho::MachHeader32<Endian>, R>;
217/// An iterator for the symbols in a [`MachOFile64`](super::MachOFile64).
218pub type MachOSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
219    MachOSymbolIterator<'data, 'file, macho::MachHeader64<Endian>, R>;
220
221/// An iterator for the symbols in a [`MachOFile`].
222pub struct MachOSymbolIterator<'data, 'file, Mach, R = &'data [u8]>
223where
224    Mach: MachHeader,
225    R: ReadRef<'data>,
226{
227    file: &'file MachOFile<'data, Mach, R>,
228    index: SymbolIndex,
229}
230
231impl<'data, 'file, Mach, R> MachOSymbolIterator<'data, 'file, Mach, R>
232where
233    Mach: MachHeader,
234    R: ReadRef<'data>,
235{
236    pub(super) fn new(file: &'file MachOFile<'data, Mach, R>) -> Self {
237        MachOSymbolIterator {
238            file,
239            index: SymbolIndex(0),
240        }
241    }
242
243    pub(super) fn empty(file: &'file MachOFile<'data, Mach, R>) -> Self {
244        MachOSymbolIterator {
245            file,
246            index: SymbolIndex(file.symbols.len()),
247        }
248    }
249}
250
251impl<'data, 'file, Mach, R> fmt::Debug for MachOSymbolIterator<'data, 'file, Mach, R>
252where
253    Mach: MachHeader,
254    R: ReadRef<'data>,
255{
256    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
257        f.debug_struct("MachOSymbolIterator").finish()
258    }
259}
260
261impl<'data, 'file, Mach, R> Iterator for MachOSymbolIterator<'data, 'file, Mach, R>
262where
263    Mach: MachHeader,
264    R: ReadRef<'data>,
265{
266    type Item = MachOSymbol<'data, 'file, Mach, R>;
267
268    fn next(&mut self) -> Option<Self::Item> {
269        loop {
270            let index = self.index;
271            let nlist = self.file.symbols.symbols.get(index.0)?;
272            self.index.0 += 1;
273            if let Some(symbol) = MachOSymbol::new(self.file, index, nlist) {
274                return Some(symbol);
275            }
276        }
277    }
278}
279
280/// A symbol in a [`MachOFile32`](super::MachOFile32).
281pub type MachOSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
282    MachOSymbol<'data, 'file, macho::MachHeader32<Endian>, R>;
283/// A symbol in a [`MachOFile64`](super::MachOFile64).
284pub type MachOSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
285    MachOSymbol<'data, 'file, macho::MachHeader64<Endian>, R>;
286
287/// A symbol in a [`MachOFile`].
288///
289/// Most functionality is provided by the [`ObjectSymbol`] trait implementation.
290#[derive(Debug, Clone, Copy)]
291pub struct MachOSymbol<'data, 'file, Mach, R = &'data [u8]>
292where
293    Mach: MachHeader,
294    R: ReadRef<'data>,
295{
296    file: &'file MachOFile<'data, Mach, R>,
297    index: SymbolIndex,
298    nlist: &'data Mach::Nlist,
299}
300
301impl<'data, 'file, Mach, R> MachOSymbol<'data, 'file, Mach, R>
302where
303    Mach: MachHeader,
304    R: ReadRef<'data>,
305{
306    pub(super) fn new(
307        file: &'file MachOFile<'data, Mach, R>,
308        index: SymbolIndex,
309        nlist: &'data Mach::Nlist,
310    ) -> Option<Self> {
311        if nlist.n_type() & macho::N_STAB != 0 {
312            return None;
313        }
314        Some(MachOSymbol { file, index, nlist })
315    }
316
317    /// Get the Mach-O file containing this symbol.
318    pub fn macho_file(&self) -> &'file MachOFile<'data, Mach, R> {
319        self.file
320    }
321
322    /// Get the raw Mach-O symbol structure.
323    pub fn macho_symbol(&self) -> &'data Mach::Nlist {
324        self.nlist
325    }
326}
327
328impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbol<'data, 'file, Mach, R>
329where
330    Mach: MachHeader,
331    R: ReadRef<'data>,
332{
333}
334
335impl<'data, 'file, Mach, R> ObjectSymbol<'data> for MachOSymbol<'data, 'file, Mach, R>
336where
337    Mach: MachHeader,
338    R: ReadRef<'data>,
339{
340    #[inline]
341    fn index(&self) -> SymbolIndex {
342        self.index
343    }
344
345    fn name_bytes(&self) -> Result<&'data [u8]> {
346        self.nlist.name(self.file.endian, self.file.symbols.strings)
347    }
348
349    fn name(&self) -> Result<&'data str> {
350        let name = self.name_bytes()?;
351        str::from_utf8(name)
352            .ok()
353            .read_error("Non UTF-8 Mach-O symbol name")
354    }
355
356    #[inline]
357    fn address(&self) -> u64 {
358        self.nlist.n_value(self.file.endian).into()
359    }
360
361    #[inline]
362    fn size(&self) -> u64 {
363        0
364    }
365
366    fn kind(&self) -> SymbolKind {
367        self.section()
368            .index()
369            .and_then(|index| self.file.section_internal(index).ok())
370            .map(|section| {
371                if let Ok(name) = self.name_bytes() {
372                    // Heuristic to match LLVM's convention for section symbols; may misclassify.
373                    if self.is_local()
374                        && name.len() > 4
375                        && name.starts_with(b"ltmp")
376                        && name[4..].iter().all(|b| b.is_ascii_digit())
377                        && self.address() == section.section.addr(self.file.endian).into()
378                    {
379                        return SymbolKind::Section;
380                    }
381                }
382                match section.kind {
383                    SectionKind::Text => SymbolKind::Text,
384                    SectionKind::Data
385                    | SectionKind::ReadOnlyData
386                    | SectionKind::ReadOnlyString
387                    | SectionKind::UninitializedData
388                    | SectionKind::Common => SymbolKind::Data,
389                    SectionKind::Tls
390                    | SectionKind::UninitializedTls
391                    | SectionKind::TlsVariables => SymbolKind::Tls,
392                    _ => SymbolKind::Unknown,
393                }
394            })
395            .unwrap_or(SymbolKind::Unknown)
396    }
397
398    fn section(&self) -> SymbolSection {
399        match self.nlist.n_type() & macho::N_TYPE {
400            macho::N_UNDF => SymbolSection::Undefined,
401            macho::N_ABS => SymbolSection::Absolute,
402            macho::N_SECT => {
403                let n_sect = self.nlist.n_sect();
404                if n_sect != 0 {
405                    SymbolSection::Section(SectionIndex(n_sect as usize))
406                } else {
407                    SymbolSection::Unknown
408                }
409            }
410            _ => SymbolSection::Unknown,
411        }
412    }
413
414    #[inline]
415    fn is_undefined(&self) -> bool {
416        self.nlist.n_type() & macho::N_TYPE == macho::N_UNDF
417    }
418
419    #[inline]
420    fn is_definition(&self) -> bool {
421        self.nlist.is_definition()
422    }
423
424    #[inline]
425    fn is_common(&self) -> bool {
426        // Mach-O common symbols are based on section, not symbol
427        false
428    }
429
430    #[inline]
431    fn is_weak(&self) -> bool {
432        self.nlist.n_desc(self.file.endian) & (macho::N_WEAK_REF | macho::N_WEAK_DEF) != 0
433    }
434
435    fn scope(&self) -> SymbolScope {
436        let n_type = self.nlist.n_type();
437        if n_type & macho::N_TYPE == macho::N_UNDF {
438            SymbolScope::Unknown
439        } else if n_type & macho::N_EXT == 0 {
440            SymbolScope::Compilation
441        } else if n_type & macho::N_PEXT != 0 {
442            SymbolScope::Linkage
443        } else {
444            SymbolScope::Dynamic
445        }
446    }
447
448    #[inline]
449    fn is_global(&self) -> bool {
450        self.scope() != SymbolScope::Compilation
451    }
452
453    #[inline]
454    fn is_local(&self) -> bool {
455        self.scope() == SymbolScope::Compilation
456    }
457
458    #[inline]
459    fn flags(&self) -> SymbolFlags<SectionIndex, SymbolIndex> {
460        let n_desc = self.nlist.n_desc(self.file.endian);
461        SymbolFlags::MachO { n_desc }
462    }
463}
464
465/// A trait for generic access to [`macho::Nlist32`] and [`macho::Nlist64`].
466#[allow(missing_docs)]
467pub trait Nlist: Debug + Pod {
468    type Word: Into<u64>;
469    type Endian: endian::Endian;
470
471    fn n_strx(&self, endian: Self::Endian) -> u32;
472    fn n_type(&self) -> u8;
473    fn n_sect(&self) -> u8;
474    fn n_desc(&self, endian: Self::Endian) -> u16;
475    fn n_value(&self, endian: Self::Endian) -> Self::Word;
476
477    fn name<'data, R: ReadRef<'data>>(
478        &self,
479        endian: Self::Endian,
480        strings: StringTable<'data, R>,
481    ) -> Result<&'data [u8]> {
482        strings
483            .get(self.n_strx(endian))
484            .read_error("Invalid Mach-O symbol name offset")
485    }
486
487    /// Return true if this is a STAB symbol.
488    ///
489    /// This determines the meaning of the `n_type` field.
490    fn is_stab(&self) -> bool {
491        self.n_type() & macho::N_STAB != 0
492    }
493
494    /// Return true if this is an undefined symbol.
495    fn is_undefined(&self) -> bool {
496        let n_type = self.n_type();
497        n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_UNDF
498    }
499
500    /// Return true if the symbol is a definition of a function or data object.
501    fn is_definition(&self) -> bool {
502        let n_type = self.n_type();
503        n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_SECT
504    }
505
506    /// Return the library ordinal.
507    ///
508    /// This is either a 1-based index into the dylib load commands,
509    /// or a special ordinal.
510    #[inline]
511    fn library_ordinal(&self, endian: Self::Endian) -> u8 {
512        (self.n_desc(endian) >> 8) as u8
513    }
514}
515
516impl<Endian: endian::Endian> Nlist for macho::Nlist32<Endian> {
517    type Word = u32;
518    type Endian = Endian;
519
520    fn n_strx(&self, endian: Self::Endian) -> u32 {
521        self.n_strx.get(endian)
522    }
523    fn n_type(&self) -> u8 {
524        self.n_type
525    }
526    fn n_sect(&self) -> u8 {
527        self.n_sect
528    }
529    fn n_desc(&self, endian: Self::Endian) -> u16 {
530        self.n_desc.get(endian)
531    }
532    fn n_value(&self, endian: Self::Endian) -> Self::Word {
533        self.n_value.get(endian)
534    }
535}
536
537impl<Endian: endian::Endian> Nlist for macho::Nlist64<Endian> {
538    type Word = u64;
539    type Endian = Endian;
540
541    fn n_strx(&self, endian: Self::Endian) -> u32 {
542        self.n_strx.get(endian)
543    }
544    fn n_type(&self) -> u8 {
545        self.n_type
546    }
547    fn n_sect(&self) -> u8 {
548        self.n_sect
549    }
550    fn n_desc(&self, endian: Self::Endian) -> u16 {
551        self.n_desc.get(endian)
552    }
553    fn n_value(&self, endian: Self::Endian) -> Self::Word {
554        self.n_value.get(endian)
555    }
556}