Skip to main content

object/read/
symbol_map.rs

1use alloc::vec::Vec;
2
3use crate::read::{Object, ObjectSection, ObjectSymbol, ObjectSymbolTable};
4use crate::{SymbolKind, SymbolScope};
5
6/// An entry in a [`SymbolMap`].
7pub trait SymbolMapEntry {
8    /// The symbol address.
9    fn address(&self) -> u64;
10
11    /// The symbol size.
12    ///
13    /// 0 means the symbol continues to the next entry in the symbol map,
14    /// or to the end of the address space if it is the last entry.
15    fn size(&self) -> u64 {
16        0
17    }
18}
19
20/// A map from addresses to symbol information.
21///
22/// The symbol information depends on the chosen entry type, such as [`SymbolMapName`].
23///
24/// Returned by [`Object::symbol_map`].
25#[derive(Debug, Default, Clone)]
26pub struct SymbolMap<T: SymbolMapEntry> {
27    symbols: Vec<T>,
28}
29
30impl<T: SymbolMapEntry> SymbolMap<T> {
31    /// Construct a new symbol map.
32    ///
33    /// This function will sort the symbols by address.
34    pub fn new(mut symbols: Vec<T>) -> Self {
35        symbols.sort_by_key(|s| s.address());
36        SymbolMap { symbols }
37    }
38
39    /// Get the symbol before the given address.
40    #[deprecated = "use before or containing"]
41    pub fn get(&self, address: u64) -> Option<&T> {
42        self.before(address)
43    }
44
45    /// Get the symbol at or before the given address.
46    pub fn before(&self, address: u64) -> Option<&T> {
47        let index = match self
48            .symbols
49            .binary_search_by_key(&address, |symbol| symbol.address())
50        {
51            Ok(index) => index,
52            Err(index) => index.checked_sub(1)?,
53        };
54        self.symbols.get(index)
55    }
56
57    /// Get the symbol containing the given address.
58    pub fn containing(&self, address: u64) -> Option<&T> {
59        self.before(address).filter(|entry| {
60            entry.size() == 0 || address.wrapping_sub(entry.address()) < entry.size()
61        })
62    }
63
64    /// Get all symbols in the map.
65    #[inline]
66    pub fn symbols(&self) -> &[T] {
67        &self.symbols
68    }
69}
70
71/// The type used for entries in a [`SymbolMap`] that maps from addresses to names.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
73pub struct SymbolMapName<'data> {
74    address: u64,
75    size: u64,
76    name: &'data str,
77}
78
79impl<'data> SymbolMapName<'data> {
80    /// Construct a `SymbolMapName`.
81    pub fn new(address: u64, size: u64, name: &'data str) -> Self {
82        SymbolMapName {
83            address,
84            size,
85            name,
86        }
87    }
88
89    /// The symbol address.
90    #[inline]
91    pub fn address(&self) -> u64 {
92        self.address
93    }
94
95    /// The symbol size.
96    #[inline]
97    pub fn size(&self) -> u64 {
98        self.size
99    }
100
101    /// The symbol name.
102    #[inline]
103    pub fn name(&self) -> &'data str {
104        self.name
105    }
106}
107
108impl<'data> SymbolMapEntry for SymbolMapName<'data> {
109    #[inline]
110    fn address(&self) -> u64 {
111        self.address
112    }
113
114    #[inline]
115    fn size(&self) -> u64 {
116        self.size
117    }
118}
119
120/// A map from addresses to symbol names and object files.
121///
122/// This is derived from STAB entries in Mach-O files.
123///
124/// Returned by [`Object::object_map`].
125#[derive(Debug, Default, Clone)]
126pub struct ObjectMap<'data> {
127    symbols: SymbolMap<ObjectMapEntry<'data>>,
128    objects: Vec<ObjectMapFile<'data>>,
129}
130
131impl<'data> ObjectMap<'data> {
132    #[cfg(feature = "macho")]
133    pub(super) fn new(
134        symbols: Vec<ObjectMapEntry<'data>>,
135        objects: Vec<ObjectMapFile<'data>>,
136    ) -> Self {
137        ObjectMap {
138            symbols: SymbolMap::new(symbols),
139            objects,
140        }
141    }
142
143    /// Get the entry containing the given address.
144    pub fn get(&self, address: u64) -> Option<&ObjectMapEntry<'data>> {
145        self.symbols.containing(address)
146    }
147
148    /// Get all symbols in the map.
149    #[inline]
150    pub fn symbols(&self) -> &[ObjectMapEntry<'data>] {
151        self.symbols.symbols()
152    }
153
154    /// Get all objects in the map.
155    #[inline]
156    pub fn objects(&self) -> &[ObjectMapFile<'data>] {
157        &self.objects
158    }
159}
160
161/// A symbol in an [`ObjectMap`].
162#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
163pub struct ObjectMapEntry<'data> {
164    address: u64,
165    size: u64,
166    name: &'data [u8],
167    object: usize,
168}
169
170impl<'data> ObjectMapEntry<'data> {
171    #[cfg(feature = "macho")]
172    pub(super) fn new(address: u64, size: u64, name: &'data [u8], object: usize) -> Self {
173        ObjectMapEntry {
174            address,
175            size,
176            name,
177            object,
178        }
179    }
180
181    /// Get the symbol address.
182    #[inline]
183    pub fn address(&self) -> u64 {
184        self.address
185    }
186
187    /// Get the symbol size.
188    ///
189    /// This may be 0 if the size is unknown.
190    #[inline]
191    pub fn size(&self) -> u64 {
192        self.size
193    }
194
195    /// Get the symbol name.
196    #[inline]
197    pub fn name(&self) -> &'data [u8] {
198        self.name
199    }
200
201    /// Get the index of the object file name.
202    #[inline]
203    pub fn object_index(&self) -> usize {
204        self.object
205    }
206
207    /// Get the object file name.
208    #[inline]
209    pub fn object<'a>(&self, map: &'a ObjectMap<'data>) -> &'a ObjectMapFile<'data> {
210        &map.objects[self.object]
211    }
212}
213
214impl<'data> SymbolMapEntry for ObjectMapEntry<'data> {
215    #[inline]
216    fn address(&self) -> u64 {
217        self.address
218    }
219
220    #[inline]
221    fn size(&self) -> u64 {
222        self.size
223    }
224}
225
226/// An object file name in an [`ObjectMap`].
227#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
228pub struct ObjectMapFile<'data> {
229    path: &'data [u8],
230    member: Option<&'data [u8]>,
231}
232
233impl<'data> ObjectMapFile<'data> {
234    #[cfg(feature = "macho")]
235    pub(super) fn new(path: &'data [u8], member: Option<&'data [u8]>) -> Self {
236        ObjectMapFile { path, member }
237    }
238
239    /// Get the path to the file containing the object.
240    #[inline]
241    pub fn path(&self) -> &'data [u8] {
242        self.path
243    }
244
245    /// If the file is an archive, get the name of the member containing the object.
246    #[inline]
247    pub fn member(&self) -> Option<&'data [u8]> {
248        self.member
249    }
250}
251
252/// A builder for a [`SymbolMap`].
253// TODO: builder options for
254// - synthetic function start/end (from LC_FUNCTION_STARTS or exception handling)
255// - section filter (e.g. text only)
256// - map entries (e.g. subtract base address)
257#[derive(Debug, Default)]
258pub struct SymbolMapBuilder(());
259
260impl SymbolMapBuilder {
261    /// Construct a new symbol map builder.
262    pub fn new() -> Self {
263        Self::default()
264    }
265
266    /// Read the symbols from an object file to create a symbol map.
267    ///
268    /// The map will only contain defined text and data symbols.
269    /// The dynamic symbol table will only be used if there are no debugging symbols.
270    ///
271    /// If symbol sizes are unknown then we guess the size based on the next symbol
272    /// or end of section.
273    ///
274    /// This does not work well if multiple sections use the same base address, which is
275    /// common for relocatable object files. The symbols will be overlapping, and the
276    /// symbol returned by lookups may be indeterministic. Additionally, if the symbol
277    /// size is unknown then we may use a symbol or section end address from a different
278    /// section to guess its size.
279    pub fn build<'data, O>(self, object: &O) -> SymbolMap<SymbolMapName<'data>>
280    where
281        O: Object<'data> + ?Sized,
282    {
283        let mut symbols = Vec::new();
284        if let Some(table) = object
285            .symbol_table()
286            .or_else(|| object.dynamic_symbol_table())
287        {
288            // Sometimes symbols share addresses. Collect them all then choose the "best".
289            let mut all_symbols = Vec::new();
290            for symbol in table.symbols() {
291                // Must have an address.
292                if !symbol.is_definition() {
293                    continue;
294                }
295                // Must have a name.
296                let name = match symbol.name() {
297                    Ok(name) => name,
298                    _ => continue,
299                };
300                if name.is_empty() {
301                    continue;
302                }
303                let address = symbol.address();
304                let size = symbol.size();
305
306                // Lower is better.
307                let mut priority = 0u32;
308
309                // Prefer known kind.
310                match symbol.kind() {
311                    SymbolKind::Text | SymbolKind::Data => {}
312                    SymbolKind::Unknown => priority += 1,
313                    _ => continue,
314                }
315
316                // Prefer XCOFF labels over csects.
317                // This special case is needed because labels don't have sizes.
318                priority *= 2;
319                #[cfg(feature = "xcoff")]
320                if let crate::SymbolFlags::Xcoff { x_smtyp, .. } = symbol.flags() {
321                    priority += (x_smtyp != crate::xcoff::XTY_LD) as u32;
322                    if size != 0 {
323                        // Add end of sized symbols (typically csects) to bound label sizes.
324                        all_symbols.push((address.saturating_add(size), !0, !0, !0, ""));
325                    }
326                }
327
328                // Prefer symbols that have a size.
329                priority *= 2;
330                priority += (size == 0) as u32;
331
332                // Prefer global visibility.
333                priority *= 4;
334                priority += match symbol.scope() {
335                    SymbolScope::Unknown => 3,
336                    SymbolScope::Compilation => 2,
337                    SymbolScope::Linkage => 1,
338                    SymbolScope::Dynamic => 0,
339                };
340
341                // Prefer later entries (earlier symbol is likely to be less specific).
342                let index = !0 - symbol.index().0;
343
344                // Tuple is ordered for sort.
345                all_symbols.push((address, priority, index, size, name));
346            }
347
348            // Add end of sections to improve guesses for unknown sizes.
349            for section in object.sections() {
350                let address = section.address().saturating_add(section.size());
351                all_symbols.push((address, !0, !0, !0, ""));
352            }
353
354            // Unstable sort is okay because tuple includes index.
355            all_symbols.sort_unstable();
356
357            let mut previous_address = !0;
358            for (address, _priority, _index, size, name) in all_symbols {
359                if address != previous_address {
360                    symbols.push(SymbolMapName::new(address, size, name));
361                    previous_address = address;
362                }
363            }
364
365            // Guess size for symbols with zero size.
366            let mut symbol_iter = symbols.iter_mut().rev();
367            let mut previous_address = symbol_iter.next().map(|s| s.address).unwrap_or(0);
368            for symbol in symbol_iter {
369                if symbol.size == 0 {
370                    symbol.size = previous_address.saturating_sub(symbol.address);
371                }
372                previous_address = symbol.address;
373            }
374
375            // Remove the entries for end of symbol/section.
376            symbols.retain(|x| !x.name.is_empty());
377        }
378        SymbolMap::new(symbols)
379    }
380}