jiff/tz/
tzif.rs

1/*!
2This module provides support for TZif binary files from the [Time Zone
3Database].
4
5These binary files are the ones commonly found in Unix distributions in the
6`/usr/share/zoneinfo` directory.
7
8[Time Zone Database]: https://www.iana.org/time-zones
9*/
10
11use core::{fmt::Debug, ops::Range};
12
13#[cfg(feature = "alloc")]
14use alloc::{string::String, vec::Vec};
15
16use crate::{
17    civil::DateTime,
18    error::Error,
19    shared::{self, util::array_str::Abbreviation},
20    timestamp::Timestamp,
21    tz::{
22        posix::PosixTimeZone, timezone::TimeZoneAbbreviation, AmbiguousOffset,
23        Dst, Offset, TimeZoneOffsetInfo, TimeZoneTransition,
24    },
25};
26
27/// The owned variant of `Tzif`.
28#[cfg(feature = "alloc")]
29pub(crate) type TzifOwned = Tzif<
30    String,
31    Abbreviation,
32    Vec<shared::TzifLocalTimeType>,
33    Vec<i64>,
34    Vec<shared::TzifDateTime>,
35    Vec<shared::TzifDateTime>,
36    Vec<shared::TzifTransitionInfo>,
37>;
38
39/// The static variant of `Tzif`.
40pub(crate) type TzifStatic = Tzif<
41    &'static str,
42    &'static str,
43    &'static [shared::TzifLocalTimeType],
44    &'static [i64],
45    &'static [shared::TzifDateTime],
46    &'static [shared::TzifDateTime],
47    &'static [shared::TzifTransitionInfo],
48>;
49
50/// A time zone based on IANA TZif formatted data.
51///
52/// TZif is a binary format described by RFC 8536. Its typical structure is to
53/// define a single time zone per file in the `/usr/share/zoneinfo` directory
54/// on Unix systems. The name of a time zone is its file path with the
55/// `/usr/share/zoneinfo/` prefix stripped from it.
56///
57/// This type doesn't provide any facilities for dealing with files on disk
58/// or the `/usr/share/zoneinfo` directory. This type is just for parsing the
59/// contents of TZif formatted data in memory, and turning it into a data type
60/// that can be used as a time zone.
61#[derive(Debug)]
62// not part of Jiff's public API
63#[doc(hidden)]
64// This ensures the alignment of this type is always *at least* 8 bytes. This
65// is required for the pointer tagging inside of `TimeZone` to be sound. At
66// time of writing (2024-02-24), this explicit `repr` isn't required on 64-bit
67// systems since the type definition is such that it will have an alignment of
68// at least 8 bytes anyway. But this *is* required for 32-bit systems, where
69// the type definition at present only has an alignment of 4 bytes.
70#[repr(align(8))]
71pub struct Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS> {
72    inner: shared::Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>,
73    /// The POSIX time zone for this TZif data, if present.
74    ///
75    /// Note that this is also present on `shared::Tzif`, but uses the
76    /// `shared::PosixTimeZone` type, which isn't quite what we want here.
77    ///
78    /// For now we just duplicate it, which is slightly unfortunate. But this
79    /// is small and not a huge deal. Ideally we can clean this up later.
80    posix_tz: Option<PosixTimeZone<ABBREV>>,
81}
82
83impl TzifStatic {
84    /// Converts from the shared-but-internal API for use in proc macros.
85    ///
86    /// This specifically works in a `const` context. And it requires that
87    /// caller to pass in the parsed `Tzif` in its fixed form along with the
88    /// variable length local time types and transitions. (Technically, the
89    /// TZ identifier and the designations are also variable length despite
90    /// being parsed of `TzifFixed`, but in practice they can be handled just
91    /// fine via `&'static str`.)
92    ///
93    /// Notice that the `types` and `transitions` are *not* from the `shared`
94    /// API, but rather, from the types defined in this module. They have to
95    /// be this way because there's a conversion step that occurs. In practice,
96    /// this sort of thing is embedded as a literal in source code via a proc
97    /// macro. Like this:
98    ///
99    /// ```text
100    /// static TZIF: Tzif<&str, &str, &[LocalTimeType], &[Transition]> =
101    ///     Tzif::from_shared_const(
102    ///         shared::TzifFixed {
103    ///             name: Some("America/New_York"),
104    ///             version: b'3',
105    ///             checksum: 0xDEADBEEF,
106    ///             designations: "ESTEDT",
107    ///             posix_tz: None,
108    ///         },
109    ///         &[
110    ///             shared::TzifLocalTimeType {
111    ///                 offset: -5 * 60 * 60,
112    ///                 is_dst: false,
113    ///                 designation: 0..3,
114    ///                 indicator: shared::TzifIndicator::LocalWall,
115    ///             }.to_jiff(),
116    ///         ],
117    ///         &[
118    ///             shared::TzifTransition {
119    ///                 timestamp: 123456789,
120    ///                 type_index: 0,
121    ///             }.to_jiff(-5, -5),
122    ///         ],
123    ///     );
124    /// ```
125    ///
126    /// Or something like that anyway. The point is, our `static` slices are
127    /// variable length and they need to be the right types. At least, I
128    /// couldn't see a simpler way to arrange this.
129    pub(crate) const fn from_shared_const(
130        sh: shared::TzifStatic,
131    ) -> TzifStatic {
132        let posix_tz = match sh.fixed.posix_tz {
133            None => None,
134            Some(posix_tz) => Some(PosixTimeZone::from_shared_const(posix_tz)),
135        };
136        Tzif { inner: sh, posix_tz }
137    }
138}
139
140#[cfg(feature = "alloc")]
141impl TzifOwned {
142    /// Parses the given data as a TZif formatted file.
143    ///
144    /// The name given is attached to the `Tzif` value returned, but is
145    /// otherwise not significant.
146    ///
147    /// If the given data is not recognized to be valid TZif, then an error is
148    /// returned.
149    ///
150    /// In general, callers may assume that it is safe to pass arbitrary or
151    /// even untrusted data to this function and count on it not panicking
152    /// or using resources that aren't limited to a small constant factor of
153    /// the size of the data itself. That is, callers can reliably limit the
154    /// resources used by limiting the size of the data given to this parse
155    /// function.
156    pub(crate) fn parse(
157        name: Option<String>,
158        bytes: &[u8],
159    ) -> Result<Self, Error> {
160        let sh = shared::TzifOwned::parse(name, bytes).map_err(Error::tzif)?;
161        Ok(TzifOwned::from_shared_owned(sh))
162    }
163
164    /// Converts from the shared-but-internal API for use in proc macros.
165    ///
166    /// This is not `const` since it accepts owned values on the heap for
167    /// variable length data inside `Tzif`.
168    pub(crate) fn from_shared_owned(sh: shared::TzifOwned) -> TzifOwned {
169        let posix_tz = match sh.fixed.posix_tz {
170            None => None,
171            Some(posix_tz) => Some(PosixTimeZone::from_shared_owned(posix_tz)),
172        };
173        Tzif { inner: sh, posix_tz }
174    }
175}
176
177impl<
178        STR: AsRef<str>,
179        ABBREV: AsRef<str> + Debug,
180        TYPES: AsRef<[shared::TzifLocalTimeType]>,
181        TIMESTAMPS: AsRef<[i64]>,
182        STARTS: AsRef<[shared::TzifDateTime]>,
183        ENDS: AsRef<[shared::TzifDateTime]>,
184        INFOS: AsRef<[shared::TzifTransitionInfo]>,
185    > Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>
186{
187    /// Returns the name given to this TZif data in its constructor.
188    pub(crate) fn name(&self) -> Option<&str> {
189        self.inner.fixed.name.as_ref().map(|n| n.as_ref())
190    }
191
192    /// Returns the appropriate time zone offset to use for the given
193    /// timestamp.
194    pub(crate) fn to_offset(&self, timestamp: Timestamp) -> Offset {
195        match self.to_local_time_type(timestamp) {
196            Ok(typ) => Offset::from_seconds_unchecked(typ.offset),
197            Err(tz) => tz.to_offset(timestamp),
198        }
199    }
200
201    /// Returns the appropriate time zone offset to use for the given
202    /// timestamp.
203    ///
204    /// This also includes whether the offset returned should be considered to
205    /// be DST or not, along with the time zone abbreviation (e.g., EST for
206    /// standard time in New York, and EDT for DST in New York).
207    pub(crate) fn to_offset_info(
208        &self,
209        timestamp: Timestamp,
210    ) -> TimeZoneOffsetInfo<'_> {
211        let typ = match self.to_local_time_type(timestamp) {
212            Ok(typ) => typ,
213            Err(tz) => return tz.to_offset_info(timestamp),
214        };
215        let abbreviation =
216            TimeZoneAbbreviation::Borrowed(self.designation(typ));
217        TimeZoneOffsetInfo {
218            offset: Offset::from_seconds_unchecked(typ.offset),
219            dst: Dst::from(typ.is_dst),
220            abbreviation,
221        }
222    }
223
224    /// Returns the local time type for the timestamp given.
225    ///
226    /// If one could not be found, then this implies that the caller should
227    /// use the POSIX time zone returned in the error variant.
228    fn to_local_time_type(
229        &self,
230        timestamp: Timestamp,
231    ) -> Result<&shared::TzifLocalTimeType, &PosixTimeZone<ABBREV>> {
232        let timestamp = timestamp.as_second();
233        // This is guaranteed because we always push at least one transition.
234        // This isn't guaranteed by TZif since it might have 0 transitions,
235        // but we always add a "dummy" first transition with our minimum
236        // `Timestamp` value. TZif doesn't do this because there is no
237        // universal minimum timestamp. (`i64::MIN` is a candidate, but that's
238        // likely to cause overflow in readers that don't do error checking.)
239        //
240        // The result of the dummy transition is that the code below is simpler
241        // with fewer special cases.
242        let timestamps = self.timestamps();
243        assert!(!timestamps.is_empty(), "transitions is non-empty");
244        let index = if timestamp > *timestamps.last().unwrap() {
245            timestamps.len() - 1
246        } else {
247            let search = self.timestamps().binary_search(&timestamp);
248            match search {
249                // Since the first transition is always Timestamp::MIN, it's
250                // impossible for any timestamp to sort before it.
251                Err(0) => {
252                    unreachable!("impossible to come before Timestamp::MIN")
253                }
254                Ok(i) => i,
255                // i points to the position immediately after the matching
256                // timestamp. And since we know that i>0 because of the i==0
257                // check above, we can safely subtract 1.
258                Err(i) => i.checked_sub(1).expect("i is non-zero"),
259            }
260        };
261        // Our index is always in bounds. The only way it couldn't be is if
262        // binary search returns an Err(len) for a time greater than the
263        // maximum transition. But we account for that above by converting
264        // Err(len) to Err(len-1).
265        debug_assert!(index < timestamps.len());
266        // RFC 8536 says: "Local time for timestamps on or after the last
267        // transition is specified by the TZ string in the footer (Section 3.3)
268        // if present and nonempty; otherwise, it is unspecified."
269        //
270        // Subtracting 1 is OK because we know self.transitions is not empty.
271        let index = if index < timestamps.len() - 1 {
272            // This is the typical case in "fat" TZif files: we found a
273            // matching transition.
274            index
275        } else {
276            match self.posix_tz() {
277                // This is the typical case in "slim" TZif files, where the
278                // last transition is, as I understand it, the transition at
279                // which a consistent rule started that a POSIX TZ string can
280                // fully describe. For example, (as of 2024-03-27) the last
281                // transition in the "fat" America/New_York TZif file is
282                // in 2037, where as in the "slim" version it is 2007.
283                //
284                // This is likely why some things break with the "slim"
285                // version: they don't support POSIX TZ strings (or don't
286                // support them correctly).
287                Some(tz) => return Err(tz),
288                // This case is technically unspecified, but I think the
289                // typical thing to do is to just use the last transition.
290                // I'm not 100% sure on this one.
291                None => index,
292            }
293        };
294        Ok(self.local_time_type(index))
295    }
296
297    /// Returns a possibly ambiguous timestamp for the given civil datetime.
298    ///
299    /// The given datetime should correspond to the "wall" clock time of what
300    /// humans use to tell time for this time zone.
301    ///
302    /// Note that "ambiguous timestamp" is represented by the possible
303    /// selection of offsets that could be applied to the given datetime. In
304    /// general, it is only ambiguous around transitions to-and-from DST. The
305    /// ambiguity can arise as a "fold" (when a particular wall clock time is
306    /// repeated) or as a "gap" (when a particular wall clock time is skipped
307    /// entirely).
308    pub(crate) fn to_ambiguous_kind(&self, dt: DateTime) -> AmbiguousOffset {
309        // This implementation very nearly mirrors `to_local_time_type`
310        // above in the beginning: we do a binary search to find transition
311        // applicable for the given datetime. Except, we do it on wall clock
312        // times instead of timestamps. And in particular, each transition
313        // begins with a possibly ambiguous range of wall clock times
314        // corresponding to either a "gap" or "fold" in time.
315        let dtt = shared::TzifDateTime::new(
316            dt.year(),
317            dt.month(),
318            dt.day(),
319            dt.hour(),
320            dt.minute(),
321            dt.second(),
322        );
323        let (starts, ends) = (self.civil_starts(), self.civil_ends());
324        assert!(!starts.is_empty(), "transitions is non-empty");
325        let this_index = match starts.binary_search(&dtt) {
326            Err(0) => unreachable!("impossible to come before DateTime::MIN"),
327            Ok(i) => i,
328            Err(i) => i.checked_sub(1).expect("i is non-zero"),
329        };
330        debug_assert!(this_index < starts.len());
331
332        let this_offset = self.local_time_type(this_index).offset;
333        // This is a little tricky, but we need to check for ambiguous civil
334        // datetimes before possibly using the POSIX TZ string. Namely, a
335        // datetime could be ambiguous with respect to the last transition,
336        // and we should handle that according to the gap/fold determined for
337        // that transition. We cover this case in tests in tz/mod.rs for the
338        // Pacific/Honolulu time zone, whose last transition begins with a gap.
339        match self.transition_kind(this_index) {
340            shared::TzifTransitionKind::Gap if dtt < ends[this_index] => {
341                // A gap/fold can only appear when there exists a previous
342                // transition.
343                let prev_index = this_index.checked_sub(1).unwrap();
344                let prev_offset = self.local_time_type(prev_index).offset;
345                return AmbiguousOffset::Gap {
346                    before: Offset::from_seconds_unchecked(prev_offset),
347                    after: Offset::from_seconds_unchecked(this_offset),
348                };
349            }
350            shared::TzifTransitionKind::Fold if dtt < ends[this_index] => {
351                // A gap/fold can only appear when there exists a previous
352                // transition.
353                let prev_index = this_index.checked_sub(1).unwrap();
354                let prev_offset = self.local_time_type(prev_index).offset;
355                return AmbiguousOffset::Fold {
356                    before: Offset::from_seconds_unchecked(prev_offset),
357                    after: Offset::from_seconds_unchecked(this_offset),
358                };
359            }
360            _ => {}
361        }
362        // The datetime given is not ambiguous with respect to any of the
363        // transitions in the TZif data. But, if we matched at or after the
364        // last transition, then we need to use the POSIX TZ string (which
365        // could still return an ambiguous offset).
366        if this_index == starts.len() - 1 {
367            if let Some(tz) = self.posix_tz() {
368                return tz.to_ambiguous_kind(dt);
369            }
370            // This case is unspecified according to RFC 8536. It means that
371            // the given datetime exceeds all transitions *and* there is no
372            // POSIX TZ string. So this can happen in V1 files for example.
373            // But those should hopefully be essentially non-existent nowadays
374            // (2024-03). In any case, we just fall through to using the last
375            // transition, which does seem likely to be wrong ~half the time
376            // in time zones with DST. But there really isn't much else we can
377            // do I think.
378        }
379        AmbiguousOffset::Unambiguous {
380            offset: Offset::from_seconds_unchecked(this_offset),
381        }
382    }
383
384    /// Returns the timestamp of the most recent time zone transition prior
385    /// to the timestamp given. If one doesn't exist, `None` is returned.
386    pub(crate) fn previous_transition<'t>(
387        &'t self,
388        ts: Timestamp,
389    ) -> Option<TimeZoneTransition<'t>> {
390        assert!(!self.timestamps().is_empty(), "transitions is non-empty");
391        let mut timestamp = ts.as_second();
392        if ts.subsec_nanosecond() != 0 {
393            timestamp = timestamp.saturating_add(1);
394        }
395        let search = self.timestamps().binary_search(&timestamp);
396        let index = match search {
397            Ok(i) | Err(i) => i.checked_sub(1)?,
398        };
399        let index = if index == 0 {
400            // The first transition is a dummy that we insert, so if we land on
401            // it here, treat it as if it doesn't exist.
402            return None;
403        } else if index == self.timestamps().len() - 1 {
404            if let Some(ref posix_tz) = self.posix_tz() {
405                // Since the POSIX TZ must be consistent with the last
406                // transition, it must be the case that tzif_last <=
407                // posix_prev_trans in all cases. So the transition according
408                // to the POSIX TZ is always correct here.
409                //
410                // What if this returns `None` though? I'm not sure in which
411                // cases that could matter, and I think it might be a violation
412                // of the TZif format if it does.
413                //
414                // It can return `None`! In the case of a time zone that
415                // has eliminated DST, it might have historical time zone
416                // transitions but a POSIX time zone without DST. (For example,
417                // `America/Sao_Paulo`.) And thus, this would return `None`.
418                // So if it does, we pretend as if the POSIX time zone doesn't
419                // exist.
420                if let Some(trans) = posix_tz.previous_transition(ts) {
421                    return Some(trans);
422                }
423            }
424            index
425        } else {
426            index
427        };
428        let timestamp = self.timestamps()[index];
429        let typ = self.local_time_type(index);
430        Some(TimeZoneTransition {
431            timestamp: Timestamp::constant(timestamp, 0),
432            offset: Offset::from_seconds_unchecked(typ.offset),
433            abbrev: self.designation(typ),
434            dst: Dst::from(typ.is_dst),
435        })
436    }
437
438    /// Returns the timestamp of the soonest time zone transition after the
439    /// timestamp given. If one doesn't exist, `None` is returned.
440    pub(crate) fn next_transition<'t>(
441        &'t self,
442        ts: Timestamp,
443    ) -> Option<TimeZoneTransition<'t>> {
444        assert!(!self.timestamps().is_empty(), "transitions is non-empty");
445        let timestamp = ts.as_second();
446        let search = self.timestamps().binary_search(&timestamp);
447        let index = match search {
448            Ok(i) => i.checked_add(1)?,
449            Err(i) => i,
450        };
451        let index = if index == 0 {
452            // The first transition is a dummy that we insert, so if we land on
453            // it here, treat it as if it doesn't exist.
454            return None;
455        } else if index >= self.timestamps().len() {
456            if let Some(posix_tz) = self.posix_tz() {
457                // Since the POSIX TZ must be consistent with the last
458                // transition, it must be the case that next.timestamp <=
459                // posix_next_tans in all cases. So the transition according to
460                // the POSIX TZ is always correct here.
461                //
462                // What if this returns `None` though? I'm not sure in which
463                // cases that could matter, and I think it might be a violation
464                // of the TZif format if it does.
465                //
466                // In the "previous" case above, this could return `None` even
467                // when there are historical time zone transitions in the case
468                // of a time zone eliminating DST (e.g., `America/Sao_Paulo`).
469                // But unlike the previous case, if we get `None` here, then
470                // that is the real answer because there are no other known
471                // future time zone transitions.
472                //
473                // 2025-05-05: OK, this could return `None` and this is fine.
474                // It happens for time zones that had DST but then stopped
475                // it at some point in the past. The POSIX time zone has no
476                // DST and thus returns `None`. That's fine. But there was a
477                // problem: we were using the POSIX time zone even when there
478                // was a historical time zone transition after the timestamp
479                // given. That was fixed by changing the condition when we get
480                // here: it can only happen when the timestamp given comes at
481                // or after all historical time zone transitions.
482                return posix_tz.next_transition(ts);
483            }
484            self.timestamps().len() - 1
485        } else {
486            index
487        };
488        let timestamp = self.timestamps()[index];
489        let typ = self.local_time_type(index);
490        Some(TimeZoneTransition {
491            timestamp: Timestamp::constant(timestamp, 0),
492            offset: Offset::from_seconds_unchecked(typ.offset),
493            abbrev: self.designation(typ),
494            dst: Dst::from(typ.is_dst),
495        })
496    }
497
498    fn designation(&self, typ: &shared::TzifLocalTimeType) -> &str {
499        // OK because we verify that the designation range on every local
500        // time type is a valid range into `self.designations`.
501        &self.designations()[typ.designation()]
502    }
503
504    fn local_time_type(
505        &self,
506        transition_index: usize,
507    ) -> &shared::TzifLocalTimeType {
508        // OK because we require that `type_index` always points to a valid
509        // local time type.
510        &self.types()[usize::from(self.infos()[transition_index].type_index)]
511    }
512
513    fn transition_kind(
514        &self,
515        transition_index: usize,
516    ) -> shared::TzifTransitionKind {
517        self.infos()[transition_index].kind
518    }
519
520    fn posix_tz(&self) -> Option<&PosixTimeZone<ABBREV>> {
521        self.posix_tz.as_ref()
522    }
523
524    fn designations(&self) -> &str {
525        self.inner.fixed.designations.as_ref()
526    }
527
528    fn types(&self) -> &[shared::TzifLocalTimeType] {
529        self.inner.types.as_ref()
530    }
531
532    fn timestamps(&self) -> &[i64] {
533        self.inner.transitions.timestamps.as_ref()
534    }
535
536    fn civil_starts(&self) -> &[shared::TzifDateTime] {
537        self.inner.transitions.civil_starts.as_ref()
538    }
539
540    fn civil_ends(&self) -> &[shared::TzifDateTime] {
541        self.inner.transitions.civil_ends.as_ref()
542    }
543
544    fn infos(&self) -> &[shared::TzifTransitionInfo] {
545        self.inner.transitions.infos.as_ref()
546    }
547}
548
549impl<STR: AsRef<str>, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS> Eq
550    for Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>
551{
552}
553
554impl<STR: AsRef<str>, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS> PartialEq
555    for Tzif<STR, ABBREV, TYPES, TIMESTAMPS, STARTS, ENDS, INFOS>
556{
557    fn eq(&self, rhs: &Self) -> bool {
558        self.inner.fixed.name.as_ref().map(|n| n.as_ref())
559            == rhs.inner.fixed.name.as_ref().map(|n| n.as_ref())
560            && self.inner.fixed.checksum == rhs.inner.fixed.checksum
561    }
562}
563
564impl shared::TzifLocalTimeType {
565    fn designation(&self) -> Range<usize> {
566        usize::from(self.designation.0)..usize::from(self.designation.1)
567    }
568}
569
570impl core::fmt::Display for shared::TzifIndicator {
571    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
572        match *self {
573            shared::TzifIndicator::LocalWall => f.write_str("local/wall"),
574            shared::TzifIndicator::LocalStandard => f.write_str("local/std"),
575            shared::TzifIndicator::UTStandard => f.write_str("ut/std"),
576        }
577    }
578}
579
580/// Does a quick check that returns true if the data might be in TZif format.
581///
582/// It is possible that this returns true even if the given data is not in TZif
583/// format. However, it is impossible for this to return false when the given
584/// data is TZif. That is, a false positive is allowed but a false negative is
585/// not.
586#[cfg(feature = "tzdb-zoneinfo")]
587pub(crate) fn is_possibly_tzif(data: &[u8]) -> bool {
588    data.starts_with(b"TZif")
589}
590
591#[cfg(all(test, feature = "alloc"))]
592mod tests {
593    use alloc::{string::ToString, vec};
594
595    #[cfg(not(miri))]
596    use crate::tz::testdata::TZIF_TEST_FILES;
597
598    use super::*;
599
600    /// This converts TZif data into a human readable format.
601    ///
602    /// This is useful for debugging (via `./scripts/jiff-debug tzif`), but we
603    /// also use it for snapshot testing to make reading the test output at
604    /// least *somewhat* comprehensible for humans. Otherwise, one needs to
605    /// read and understand Unix timestamps. That ain't going to fly.
606    ///
607    /// For this to work, we make sure everything in a `Tzif` value is
608    /// represented in some way in this output.
609    fn tzif_to_human_readable(tzif: &TzifOwned) -> String {
610        use std::io::Write;
611
612        fn datetime(dt: shared::TzifDateTime) -> DateTime {
613            DateTime::constant(
614                dt.year(),
615                dt.month(),
616                dt.day(),
617                dt.hour(),
618                dt.minute(),
619                dt.second(),
620                0,
621            )
622        }
623
624        let mut out = tabwriter::TabWriter::new(vec![])
625            .alignment(tabwriter::Alignment::Left);
626
627        writeln!(out, "TIME ZONE NAME").unwrap();
628        writeln!(out, "  {}", tzif.name().unwrap_or("UNNAMED")).unwrap();
629
630        writeln!(out, "TIME ZONE VERSION").unwrap();
631        writeln!(
632            out,
633            "  {}",
634            char::try_from(tzif.inner.fixed.version).unwrap()
635        )
636        .unwrap();
637
638        writeln!(out, "LOCAL TIME TYPES").unwrap();
639        for (i, typ) in tzif.inner.types.iter().enumerate() {
640            writeln!(
641                out,
642                "  {i:03}:\toffset={off}\t\
643                   designation={desig}\t{dst}\tindicator={ind}",
644                off = Offset::from_seconds_unchecked(typ.offset),
645                desig = tzif.designation(&typ),
646                dst = if typ.is_dst { "dst" } else { "" },
647                ind = typ.indicator,
648            )
649            .unwrap();
650        }
651        if !tzif.timestamps().is_empty() {
652            writeln!(out, "TRANSITIONS").unwrap();
653            for i in 0..tzif.timestamps().len() {
654                let timestamp = Timestamp::constant(tzif.timestamps()[i], 0);
655                let dt = Offset::UTC.to_datetime(timestamp);
656                let typ = tzif.local_time_type(i);
657                let wall =
658                    alloc::format!("{}", datetime(tzif.civil_starts()[i]));
659                let ambiguous = match tzif.transition_kind(i) {
660                    shared::TzifTransitionKind::Unambiguous => {
661                        "unambiguous".to_string()
662                    }
663                    shared::TzifTransitionKind::Gap => {
664                        let end = datetime(tzif.civil_ends()[i]);
665                        alloc::format!(" gap-until({end})")
666                    }
667                    shared::TzifTransitionKind::Fold => {
668                        let end = datetime(tzif.civil_ends()[i]);
669                        alloc::format!("fold-until({end})")
670                    }
671                };
672
673                writeln!(
674                    out,
675                    "  {i:04}:\t{dt:?}Z\tunix={ts}\twall={wall}\t\
676                       {ambiguous}\t\
677                       type={type_index}\t{off}\t\
678                       {desig}\t{dst}",
679                    ts = timestamp.as_second(),
680                    type_index = tzif.infos()[i].type_index,
681                    off = Offset::from_seconds_unchecked(typ.offset),
682                    desig = tzif.designation(typ),
683                    dst = if typ.is_dst { "dst" } else { "" },
684                )
685                .unwrap();
686            }
687        }
688        if let Some(ref posix_tz) = tzif.posix_tz {
689            writeln!(out, "POSIX TIME ZONE STRING").unwrap();
690            writeln!(out, "  {}", posix_tz).unwrap();
691        }
692        String::from_utf8(out.into_inner().unwrap()).unwrap()
693    }
694
695    /// DEBUG COMMAND
696    ///
697    /// Takes environment variable `JIFF_DEBUG_TZIF_PATH` as input, and treats
698    /// the value as a TZif file path. This test will open the file, parse it
699    /// as a TZif and then dump debug data about the file in a human readable
700    /// plain text format.
701    #[cfg(feature = "std")]
702    #[test]
703    fn debug_tzif() -> anyhow::Result<()> {
704        use anyhow::Context;
705
706        let _ = crate::logging::Logger::init();
707
708        const ENV: &str = "JIFF_DEBUG_TZIF_PATH";
709        let Some(val) = std::env::var_os(ENV) else { return Ok(()) };
710        let Ok(val) = val.into_string() else {
711            anyhow::bail!("{ENV} has invalid UTF-8")
712        };
713        let bytes =
714            std::fs::read(&val).with_context(|| alloc::format!("{val:?}"))?;
715        let tzif = Tzif::parse(Some(val.to_string()), &bytes)?;
716        std::eprint!("{}", tzif_to_human_readable(&tzif));
717        Ok(())
718    }
719
720    #[cfg(not(miri))]
721    #[test]
722    fn tzif_parse_v2plus() {
723        for tzif_test in TZIF_TEST_FILES {
724            insta::assert_snapshot!(
725                alloc::format!("{}_v2+", tzif_test.name),
726                tzif_to_human_readable(&tzif_test.parse())
727            );
728        }
729    }
730
731    #[cfg(not(miri))]
732    #[test]
733    fn tzif_parse_v1() {
734        for tzif_test in TZIF_TEST_FILES {
735            insta::assert_snapshot!(
736                alloc::format!("{}_v1", tzif_test.name),
737                tzif_to_human_readable(&tzif_test.parse_v1())
738            );
739        }
740    }
741
742    /// This tests walks the /usr/share/zoneinfo directory (if it exists) and
743    /// tries to parse every TZif formatted file it can find. We don't really
744    /// do much with it other than to ensure we don't panic or return an error.
745    /// That is, we check that we can parse each file, but not that we do so
746    /// correctly.
747    #[cfg(not(miri))]
748    #[cfg(feature = "tzdb-zoneinfo")]
749    #[cfg(target_os = "linux")]
750    #[test]
751    fn zoneinfo() {
752        const TZDIR: &str = "/usr/share/zoneinfo";
753
754        for result in walkdir::WalkDir::new(TZDIR) {
755            // Just skip if we got an error traversing the directory tree.
756            // These aren't related to our parsing, so it's some other problem
757            // (like the directory not existing).
758            let Ok(dent) = result else { continue };
759            // This test can take some time in debug mode, so skip parsing
760            // some of the less frequently used TZif files.
761            let Some(name) = dent.path().to_str() else { continue };
762            if name.contains("right/") || name.contains("posix/") {
763                continue;
764            }
765            // Again, skip if we can't read. Not my monkeys, not my circus.
766            let Ok(bytes) = std::fs::read(dent.path()) else { continue };
767            if !is_possibly_tzif(&bytes) {
768                continue;
769            }
770            let tzname = dent
771                .path()
772                .strip_prefix(TZDIR)
773                .unwrap_or_else(|_| {
774                    panic!("all paths in TZDIR have {TZDIR:?} prefix")
775                })
776                .to_str()
777                .expect("all paths to be valid UTF-8")
778                .to_string();
779            // OK at this point, we're pretty sure `bytes` should be a TZif
780            // binary file. So try to parse it and fail the test if it fails.
781            if let Err(err) = Tzif::parse(Some(tzname), &bytes) {
782                panic!("failed to parse TZif file {:?}: {err}", dent.path());
783            }
784        }
785    }
786}