gimli/read/
reader.rs

1#[cfg(feature = "read")]
2use alloc::borrow::Cow;
3use core::convert::TryInto;
4use core::fmt::Debug;
5use core::hash::Hash;
6use core::ops::{Add, AddAssign, Sub};
7
8use crate::common::Format;
9use crate::endianity::Endianity;
10use crate::leb128;
11use crate::read::{Error, Result};
12
13/// An identifier for an offset within a section reader.
14///
15/// This is used for error reporting. The meaning of this value is specific to
16/// each reader implementation. The values should be chosen to be unique amongst
17/// all readers. If values are not unique then errors may point to the wrong reader.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub struct ReaderOffsetId(pub u64);
20
21/// A trait for offsets with a DWARF section.
22///
23/// This allows consumers to choose a size that is appropriate for their address space.
24pub trait ReaderOffset:
25    Debug + Copy + Eq + Ord + Hash + Add<Output = Self> + AddAssign + Sub<Output = Self>
26{
27    /// Convert a u8 to an offset.
28    fn from_u8(offset: u8) -> Self;
29
30    /// Convert a u16 to an offset.
31    fn from_u16(offset: u16) -> Self;
32
33    /// Convert an i16 to an offset.
34    fn from_i16(offset: i16) -> Self;
35
36    /// Convert a u32 to an offset.
37    fn from_u32(offset: u32) -> Self;
38
39    /// Convert a u64 to an offset.
40    ///
41    /// Returns `Error::UnsupportedOffset` if the value is too large.
42    fn from_u64(offset: u64) -> Result<Self>;
43
44    /// Convert an offset to a u64.
45    fn into_u64(self) -> u64;
46
47    /// Wrapping (modular) addition. Computes `self + other`.
48    fn wrapping_add(self, other: Self) -> Self;
49
50    /// Checked subtraction. Computes `self - other`.
51    fn checked_sub(self, other: Self) -> Option<Self>;
52}
53
54impl ReaderOffset for u64 {
55    #[inline]
56    fn from_u8(offset: u8) -> Self {
57        u64::from(offset)
58    }
59
60    #[inline]
61    fn from_u16(offset: u16) -> Self {
62        u64::from(offset)
63    }
64
65    #[inline]
66    fn from_i16(offset: i16) -> Self {
67        offset as u64
68    }
69
70    #[inline]
71    fn from_u32(offset: u32) -> Self {
72        u64::from(offset)
73    }
74
75    #[inline]
76    fn from_u64(offset: u64) -> Result<Self> {
77        Ok(offset)
78    }
79
80    #[inline]
81    fn into_u64(self) -> u64 {
82        self
83    }
84
85    #[inline]
86    fn wrapping_add(self, other: Self) -> Self {
87        self.wrapping_add(other)
88    }
89
90    #[inline]
91    fn checked_sub(self, other: Self) -> Option<Self> {
92        self.checked_sub(other)
93    }
94}
95
96impl ReaderOffset for u32 {
97    #[inline]
98    fn from_u8(offset: u8) -> Self {
99        u32::from(offset)
100    }
101
102    #[inline]
103    fn from_u16(offset: u16) -> Self {
104        u32::from(offset)
105    }
106
107    #[inline]
108    fn from_i16(offset: i16) -> Self {
109        offset as u32
110    }
111
112    #[inline]
113    fn from_u32(offset: u32) -> Self {
114        offset
115    }
116
117    #[inline]
118    fn from_u64(offset64: u64) -> Result<Self> {
119        let offset = offset64 as u32;
120        if u64::from(offset) == offset64 {
121            Ok(offset)
122        } else {
123            Err(Error::UnsupportedOffset)
124        }
125    }
126
127    #[inline]
128    fn into_u64(self) -> u64 {
129        u64::from(self)
130    }
131
132    #[inline]
133    fn wrapping_add(self, other: Self) -> Self {
134        self.wrapping_add(other)
135    }
136
137    #[inline]
138    fn checked_sub(self, other: Self) -> Option<Self> {
139        self.checked_sub(other)
140    }
141}
142
143impl ReaderOffset for usize {
144    #[inline]
145    fn from_u8(offset: u8) -> Self {
146        offset as usize
147    }
148
149    #[inline]
150    fn from_u16(offset: u16) -> Self {
151        offset as usize
152    }
153
154    #[inline]
155    fn from_i16(offset: i16) -> Self {
156        offset as usize
157    }
158
159    #[inline]
160    fn from_u32(offset: u32) -> Self {
161        offset as usize
162    }
163
164    #[inline]
165    fn from_u64(offset64: u64) -> Result<Self> {
166        let offset = offset64 as usize;
167        if offset as u64 == offset64 {
168            Ok(offset)
169        } else {
170            Err(Error::UnsupportedOffset)
171        }
172    }
173
174    #[inline]
175    fn into_u64(self) -> u64 {
176        self as u64
177    }
178
179    #[inline]
180    fn wrapping_add(self, other: Self) -> Self {
181        self.wrapping_add(other)
182    }
183
184    #[inline]
185    fn checked_sub(self, other: Self) -> Option<Self> {
186        self.checked_sub(other)
187    }
188}
189
190/// A trait for addresses within a DWARF section.
191///
192/// Currently this is a simple extension trait for `u64`, but it may be expanded
193/// in the future to support user-defined address types.
194pub(crate) trait ReaderAddress: Sized {
195    /// Add a length to an address of the given size.
196    ///
197    /// Returns an error for overflow.
198    fn add_sized(self, length: u64, size: u8) -> Result<Self>;
199
200    /// Add a length to an address of the given size.
201    ///
202    /// Wraps the result to the size of the address to allow for the possibility
203    /// that the length is a negative value.
204    fn wrapping_add_sized(self, length: u64, size: u8) -> Self;
205
206    /// The all-zeros value of an address.
207    fn zeros() -> Self;
208
209    /// The all-ones value of an address of the given size.
210    fn ones_sized(size: u8) -> Self;
211
212    /// Return the minimum value for a tombstone address.
213    ///
214    /// A variety of values may be used as tombstones in DWARF data.  DWARF 6 specifies a
215    /// tombstone value of -1, and this is compatible with most sections in earlier DWARF
216    /// versions. However, for .debug_loc and .debug_ranges in DWARF 4 and earlier, the
217    /// tombstone value is -2, because -1 already has a special meaning. -2 has also been
218    /// seen in .debug_line, possibly from a proprietary fork of lld.
219    ///
220    /// So this function returns -2 (cast to an unsigned value), and callers can consider
221    /// addresses greater than or equal to this value to be tombstones.
222    ///
223    /// Prior to the use of -1 or -2 for tombstones, it was common to use 0 or 1.
224    /// Additionally, gold may leave the relocation addend in place. These values are not
225    /// handled by this function, so callers will need to handle them separately if they
226    /// want to.
227    fn min_tombstone(size: u8) -> Self {
228        Self::zeros().wrapping_add_sized(-2i64 as u64, size)
229    }
230}
231
232impl ReaderAddress for u64 {
233    #[inline]
234    fn add_sized(self, length: u64, size: u8) -> Result<Self> {
235        let address = self.checked_add(length).ok_or(Error::AddressOverflow)?;
236        let mask = Self::ones_sized(size);
237        if address & !mask != 0 {
238            return Err(Error::AddressOverflow);
239        }
240        Ok(address)
241    }
242
243    #[inline]
244    fn wrapping_add_sized(self, length: u64, size: u8) -> Self {
245        let mask = Self::ones_sized(size);
246        self.wrapping_add(length) & mask
247    }
248
249    #[inline]
250    fn zeros() -> Self {
251        0
252    }
253
254    #[inline]
255    fn ones_sized(size: u8) -> Self {
256        !0 >> (64 - size * 8)
257    }
258}
259
260#[cfg(not(feature = "read"))]
261pub(crate) mod seal_if_no_alloc {
262    #[derive(Debug)]
263    pub struct Sealed;
264}
265
266/// A trait for reading the data from a DWARF section.
267///
268/// All read operations advance the section offset of the reader
269/// unless specified otherwise.
270///
271/// ## Choosing a `Reader` Implementation
272///
273/// `gimli` comes with a few different `Reader` implementations and lets you
274/// choose the one that is right for your use case. A `Reader` is essentially a
275/// view into the raw bytes that make up some DWARF, but this view might borrow
276/// the underlying data or use reference counting ownership, and it might be
277/// thread safe or not.
278///
279/// | Implementation    | Ownership         | Thread Safe | Notes |
280/// |:------------------|:------------------|:------------|:------|
281/// | [`EndianSlice`](./struct.EndianSlice.html)        | Borrowed          | Yes         | Fastest, but requires that all of your code work with borrows. |
282/// | [`EndianRcSlice`](./struct.EndianRcSlice.html)    | Reference counted | No          | Shared ownership via reference counting, which alleviates the borrow restrictions of `EndianSlice` but imposes reference counting increments and decrements. Cannot be sent across threads, because the reference count is not atomic. |
283/// | [`EndianArcSlice`](./struct.EndianArcSlice.html)  | Reference counted | Yes         | The same as `EndianRcSlice`, but uses atomic reference counting, and therefore reference counting operations are slower but `EndianArcSlice`s may be sent across threads. |
284/// | [`EndianReader<T>`](./struct.EndianReader.html)   | Same as `T`       | Same as `T` | Escape hatch for easily defining your own type of `Reader`. |
285pub trait Reader: Debug + Clone {
286    /// The endianity of bytes that are read.
287    type Endian: Endianity;
288
289    /// The type used for offsets and lengths.
290    type Offset: ReaderOffset;
291
292    /// Return the endianity of bytes that are read.
293    fn endian(&self) -> Self::Endian;
294
295    /// Return the number of bytes remaining.
296    fn len(&self) -> Self::Offset;
297
298    /// Set the number of bytes remaining to zero.
299    fn empty(&mut self);
300
301    /// Set the number of bytes remaining to the specified length.
302    fn truncate(&mut self, len: Self::Offset) -> Result<()>;
303
304    /// Return the offset of this reader's data relative to the start of
305    /// the given base reader's data.
306    ///
307    /// May panic if this reader's data is not contained within the given
308    /// base reader's data.
309    fn offset_from(&self, base: &Self) -> Self::Offset;
310
311    /// Return an identifier for the current reader offset.
312    fn offset_id(&self) -> ReaderOffsetId;
313
314    /// Return the offset corresponding to the given `id` if
315    /// it is associated with this reader.
316    fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset>;
317
318    /// Find the index of the first occurrence of the given byte.
319    /// The offset of the reader is not changed.
320    fn find(&self, byte: u8) -> Result<Self::Offset>;
321
322    /// Discard the specified number of bytes.
323    fn skip(&mut self, len: Self::Offset) -> Result<()>;
324
325    /// Split a reader in two.
326    ///
327    /// A new reader is returned that can be used to read the next
328    /// `len` bytes, and `self` is advanced so that it reads the remainder.
329    fn split(&mut self, len: Self::Offset) -> Result<Self>;
330
331    /// This trait cannot be implemented if "read" feature is not enabled.
332    ///
333    /// `Reader` trait has a few methods that depend on `alloc` crate.
334    /// Disallowing `Reader` trait implementation prevents a crate that only depends on
335    /// "read-core" from being broken if another crate depending on `gimli` enables
336    /// "read" feature.
337    #[cfg(not(feature = "read"))]
338    fn cannot_implement() -> seal_if_no_alloc::Sealed;
339
340    /// Return all remaining data as a clone-on-write slice.
341    ///
342    /// The slice will be borrowed where possible, but some readers may
343    /// always return an owned vector.
344    ///
345    /// Does not advance the reader.
346    #[cfg(feature = "read")]
347    fn to_slice(&self) -> Result<Cow<'_, [u8]>>;
348
349    /// Convert all remaining data to a clone-on-write string.
350    ///
351    /// The string will be borrowed where possible, but some readers may
352    /// always return an owned string.
353    ///
354    /// Does not advance the reader.
355    ///
356    /// Returns an error if the data contains invalid characters.
357    #[cfg(feature = "read")]
358    fn to_string(&self) -> Result<Cow<'_, str>>;
359
360    /// Convert all remaining data to a clone-on-write string, including invalid characters.
361    ///
362    /// The string will be borrowed where possible, but some readers may
363    /// always return an owned string.
364    ///
365    /// Does not advance the reader.
366    #[cfg(feature = "read")]
367    fn to_string_lossy(&self) -> Result<Cow<'_, str>>;
368
369    /// Read exactly `buf.len()` bytes into `buf`.
370    fn read_slice(&mut self, buf: &mut [u8]) -> Result<()>;
371
372    /// Read a u8 array.
373    #[inline]
374    fn read_u8_array<A>(&mut self) -> Result<A>
375    where
376        A: Sized + Default + AsMut<[u8]>,
377    {
378        let mut val = Default::default();
379        self.read_slice(<A as AsMut<[u8]>>::as_mut(&mut val))?;
380        Ok(val)
381    }
382
383    /// Return true if the number of bytes remaining is zero.
384    #[inline]
385    fn is_empty(&self) -> bool {
386        self.len() == Self::Offset::from_u8(0)
387    }
388
389    /// Read a u8.
390    #[inline]
391    fn read_u8(&mut self) -> Result<u8> {
392        let a: [u8; 1] = self.read_u8_array()?;
393        Ok(a[0])
394    }
395
396    /// Read an i8.
397    #[inline]
398    fn read_i8(&mut self) -> Result<i8> {
399        let a: [u8; 1] = self.read_u8_array()?;
400        Ok(a[0] as i8)
401    }
402
403    /// Read a u16.
404    #[inline]
405    fn read_u16(&mut self) -> Result<u16> {
406        let a: [u8; 2] = self.read_u8_array()?;
407        Ok(self.endian().read_u16(&a))
408    }
409
410    /// Read an i16.
411    #[inline]
412    fn read_i16(&mut self) -> Result<i16> {
413        let a: [u8; 2] = self.read_u8_array()?;
414        Ok(self.endian().read_i16(&a))
415    }
416
417    /// Read a u32.
418    #[inline]
419    fn read_u32(&mut self) -> Result<u32> {
420        let a: [u8; 4] = self.read_u8_array()?;
421        Ok(self.endian().read_u32(&a))
422    }
423
424    /// Read an i32.
425    #[inline]
426    fn read_i32(&mut self) -> Result<i32> {
427        let a: [u8; 4] = self.read_u8_array()?;
428        Ok(self.endian().read_i32(&a))
429    }
430
431    /// Read a u64.
432    #[inline]
433    fn read_u64(&mut self) -> Result<u64> {
434        let a: [u8; 8] = self.read_u8_array()?;
435        Ok(self.endian().read_u64(&a))
436    }
437
438    /// Read an i64.
439    #[inline]
440    fn read_i64(&mut self) -> Result<i64> {
441        let a: [u8; 8] = self.read_u8_array()?;
442        Ok(self.endian().read_i64(&a))
443    }
444
445    /// Read a f32.
446    #[inline]
447    fn read_f32(&mut self) -> Result<f32> {
448        let a: [u8; 4] = self.read_u8_array()?;
449        Ok(self.endian().read_f32(&a))
450    }
451
452    /// Read a f64.
453    #[inline]
454    fn read_f64(&mut self) -> Result<f64> {
455        let a: [u8; 8] = self.read_u8_array()?;
456        Ok(self.endian().read_f64(&a))
457    }
458
459    /// Read an unsigned n-bytes integer u64.
460    ///
461    /// # Panics
462    ///
463    /// Panics when nbytes < 1 or nbytes > 8
464    #[inline]
465    fn read_uint(&mut self, n: usize) -> Result<u64> {
466        let mut buf = [0; 8];
467        self.read_slice(&mut buf[..n])?;
468        Ok(self.endian().read_uint(&buf[..n]))
469    }
470
471    /// Read a null-terminated slice, and return it (excluding the null).
472    fn read_null_terminated_slice(&mut self) -> Result<Self> {
473        let idx = self.find(0)?;
474        let val = self.split(idx)?;
475        self.skip(Self::Offset::from_u8(1))?;
476        Ok(val)
477    }
478
479    /// Skip a LEB128 encoded integer.
480    fn skip_leb128(&mut self) -> Result<()> {
481        leb128::read::skip(self)
482    }
483
484    /// Read an unsigned LEB128 encoded integer.
485    fn read_uleb128(&mut self) -> Result<u64> {
486        leb128::read::unsigned(self)
487    }
488
489    /// Read an unsigned LEB128 encoded u32.
490    fn read_uleb128_u32(&mut self) -> Result<u32> {
491        leb128::read::unsigned(self)?
492            .try_into()
493            .map_err(|_| Error::BadUnsignedLeb128)
494    }
495
496    /// Read an unsigned LEB128 encoded u16.
497    fn read_uleb128_u16(&mut self) -> Result<u16> {
498        leb128::read::u16(self)
499    }
500
501    /// Read a signed LEB128 encoded integer.
502    fn read_sleb128(&mut self) -> Result<i64> {
503        leb128::read::signed(self)
504    }
505
506    /// Read an initial length field.
507    ///
508    /// This field is encoded as either a 32-bit length or
509    /// a 64-bit length, and the returned `Format` indicates which.
510    fn read_initial_length(&mut self) -> Result<(Self::Offset, Format)> {
511        const MAX_DWARF_32_UNIT_LENGTH: u32 = 0xffff_fff0;
512        const DWARF_64_INITIAL_UNIT_LENGTH: u32 = 0xffff_ffff;
513
514        let val = self.read_u32()?;
515        if val < MAX_DWARF_32_UNIT_LENGTH {
516            Ok((Self::Offset::from_u32(val), Format::Dwarf32))
517        } else if val == DWARF_64_INITIAL_UNIT_LENGTH {
518            let val = self.read_u64().and_then(Self::Offset::from_u64)?;
519            Ok((val, Format::Dwarf64))
520        } else {
521            Err(Error::UnknownReservedLength)
522        }
523    }
524
525    /// Read a byte and validate it as an address size.
526    fn read_address_size(&mut self) -> Result<u8> {
527        let size = self.read_u8()?;
528        match size {
529            1 | 2 | 4 | 8 => Ok(size),
530            _ => Err(Error::UnsupportedAddressSize(size)),
531        }
532    }
533
534    /// Read an address-sized integer, and return it as a `u64`.
535    fn read_address(&mut self, address_size: u8) -> Result<u64> {
536        match address_size {
537            1 => self.read_u8().map(u64::from),
538            2 => self.read_u16().map(u64::from),
539            4 => self.read_u32().map(u64::from),
540            8 => self.read_u64(),
541            otherwise => Err(Error::UnsupportedAddressSize(otherwise)),
542        }
543    }
544
545    /// Parse a word-sized integer according to the DWARF format.
546    ///
547    /// These are always used to encode section offsets or lengths,
548    /// and so have a type of `Self::Offset`.
549    fn read_word(&mut self, format: Format) -> Result<Self::Offset> {
550        match format {
551            Format::Dwarf32 => self.read_u32().map(Self::Offset::from_u32),
552            Format::Dwarf64 => self.read_u64().and_then(Self::Offset::from_u64),
553        }
554    }
555
556    /// Parse a word-sized section length according to the DWARF format.
557    #[inline]
558    fn read_length(&mut self, format: Format) -> Result<Self::Offset> {
559        self.read_word(format)
560    }
561
562    /// Parse a word-sized section offset according to the DWARF format.
563    #[inline]
564    fn read_offset(&mut self, format: Format) -> Result<Self::Offset> {
565        self.read_word(format)
566    }
567
568    /// Parse a section offset of the given size.
569    ///
570    /// This is used for `DW_FORM_ref_addr` values in DWARF version 2.
571    fn read_sized_offset(&mut self, size: u8) -> Result<Self::Offset> {
572        match size {
573            1 => self.read_u8().map(u64::from),
574            2 => self.read_u16().map(u64::from),
575            4 => self.read_u32().map(u64::from),
576            8 => self.read_u64(),
577            otherwise => Err(Error::UnsupportedOffsetSize(otherwise)),
578        }
579        .and_then(Self::Offset::from_u64)
580    }
581}
582
583#[cfg(test)]
584mod test {
585    use super::*;
586
587    #[test]
588    fn test_min_tombstone() {
589        assert_eq!(u64::min_tombstone(1), 0xfe);
590        assert_eq!(u64::min_tombstone(2), 0xfffe);
591        assert_eq!(u64::min_tombstone(4), 0xffff_fffe);
592        assert_eq!(u64::min_tombstone(8), 0xffff_ffff_ffff_fffe);
593    }
594}