From 9717535175676e2b138e0460237c1c6e036cf508 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 2 Jan 2025 09:44:38 -0500 Subject: [PATCH] fmt/temporal: add new `Pieces` type for granular ISO 8601 parsing This commit adds a new type to the `jiff::fmt::temporal` module that exposes the raw components of a parsed Temporal ISO 8601 datetime string. This is meant to address use cases that need something a bit more flexible than what is provided by the higher level parsing routines. Namely, the higher level routines go out of their way to stop you from shooting yourself in the foot. For example, parsing into a `Zoned` requires a time zone annotation. But parsing into a `Pieces` doesn't require any of that. It just has to match the Temporal ISO 8601 grammar. Then you can mix and match the pieces in whatever way you desire. And indeed, you can pretty easily shoot yourself in the foot with this API. I feel okay about this because it's tucked into a corner of Jiff that you specifically have to seek out to use. I've also included examples in the docs of _how_ you can easily shoot yourself in the foot. I've included a couple case studies in the docs reflecting some real world examples I've come across in the issue tracker. Ref #112, Ref #181, Closes #188, Ref https://github.com/tc39/proposal-temporal/issues/2930 --- CHANGELOG.md | 21 + src/fmt/offset.rs | 19 + src/fmt/rfc9557.rs | 76 +- src/fmt/temporal/mod.rs | 207 +++++ src/fmt/temporal/parser.rs | 21 +- src/fmt/temporal/pieces.rs | 1725 +++++++++++++++++++++++++++++++++++ src/fmt/temporal/printer.rs | 68 ++ src/lib.rs | 7 +- src/tz/offset.rs | 56 ++ src/util/borrow.rs | 66 ++ src/zoned.rs | 8 +- 11 files changed, 2227 insertions(+), 47 deletions(-) create mode 100644 src/fmt/temporal/pieces.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b72000a..631d3589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # CHANGELOG +0.1.20 (2025-01-03) +=================== +This release inclues a new type, `Pieces`, in the `jiff::fmt::temporal` +sub-module. This exposes the individual components of a parsed Temporal +ISO 8601 datetime string. It allows users of Jiff to circumvent the checks +in the higher level parsing routines that prevent you from shooting yourself +in the foot. + +For example, parsing into a `Zoned` will return an error for raw RFC 3339 +timestamps like `2025-01-03T22:03-05` because there is no time zone annotation. +Without a time zone, Jiff cannot do time zone aware arithmetic and rounding. +Instead, such a datetime can only be parsed into a `Timestamp`. This lower +level `Pieces` API now permits users of Jiff to parse this string into its +component parts and assemble it into a `Zoned` if they so choose. + +Enhancements: + +* [#188](https://github.com/BurntSushi/jiff/issues/188): +Add `fmt::temporal::Pieces` for granular datetime parsing and formatting. + + 0.1.19 (2025-01-02) =================== This releases includes a UTF-8 related bug fix and a few enhancements. diff --git a/src/fmt/offset.rs b/src/fmt/offset.rs index 96da13b0..3b1a13ef 100644 --- a/src/fmt/offset.rs +++ b/src/fmt/offset.rs @@ -106,6 +106,7 @@ from [Temporal's hybrid grammar]. use crate::{ error::{err, Error, ErrorContext}, fmt::{ + temporal::{PiecesNumericOffset, PiecesOffset}, util::{parse_temporal_fraction, FractionalFormatter}, Parsed, }, @@ -156,6 +157,24 @@ impl ParsedOffset { } } + /// Convert a parsed offset to a more structured representation. + /// + /// This is like `to_offset`, but preserves `Z` and `-00:00` versus + /// `+00:00`. This does still attempt to create an `Offset`, and that + /// construction can fail. + pub(crate) fn to_pieces_offset(&self) -> Result { + match self.kind { + ParsedOffsetKind::Zulu => Ok(PiecesOffset::Zulu), + ParsedOffsetKind::Numeric(ref numeric) => { + let mut off = PiecesNumericOffset::from(numeric.to_offset()?); + if numeric.sign < 0 { + off = off.with_negative_zero(); + } + Ok(PiecesOffset::from(off)) + } + } + } + /// Whether this parsed offset corresponds to Zulu time or not. /// /// This is useful in error reporting for parsing civil times. Namely, we diff --git a/src/fmt/rfc9557.rs b/src/fmt/rfc9557.rs index e50cf209..817992f1 100644 --- a/src/fmt/rfc9557.rs +++ b/src/fmt/rfc9557.rs @@ -98,9 +98,9 @@ use crate::{ error::{err, Error}, fmt::{ offset::{self, ParsedOffset}, + temporal::{TimeZoneAnnotation, TimeZoneAnnotationKind}, Parsed, }, - tz::{TimeZone, TimeZoneDatabase}, util::{escape, parse}, }; @@ -130,41 +130,16 @@ impl<'i> ParsedAnnotations<'i> { ParsedAnnotations { input: escape::Bytes(&[]), time_zone: None } } - /// If a time zone annotation was parsed, then this returns the annotation - /// converted to a `TimeZone`, along with a flag indicating whether it - /// is "critical" or not. (When it's "critical," there should be more - /// stringent validation.) + /// Turns this parsed time zone into a structured time zone annotation, + /// if an annotation was found. Otherwise, returns `Ok(None)`. /// - /// If the time zone annotation parsed successfully but was either not - /// found in the database given or otherwise invalid, then an error is - /// returned. - /// - /// `None` is returned only when there was no time zone annotation. - pub(crate) fn to_time_zone( + /// This can return an error if the parsed offset could not be converted + /// to a `crate::tz::Offset`. + pub(crate) fn to_time_zone_annotation( &self, - db: &TimeZoneDatabase, - ) -> Result, Error> { + ) -> Result>, Error> { let Some(ref parsed) = self.time_zone else { return Ok(None) }; - // NOTE: We don't currently utilize the critical flag here. Temporal - // seems to ignore it. It's not quite clear what else we'd do with it, - // particularly given that we provide a way to do conflict resolution - // between offsets and time zones. - match *parsed { - ParsedTimeZone::Named { critical, name } => { - let tz = match db.get(name) { - Ok(tz) => tz, - Err(err) => return Err(err!("{}", err)), - }; - Ok(Some((tz, critical))) - } - ParsedTimeZone::Offset { critical, ref offset } => { - let offset = match offset.to_offset() { - Ok(offset) => offset, - Err(err) => return Err(err), - }; - Ok(Some((TimeZone::fixed(offset), critical))) - } - } + Ok(Some(parsed.to_time_zone_annotation()?)) } } @@ -187,6 +162,31 @@ enum ParsedTimeZone<'i> { }, } +impl<'i> ParsedTimeZone<'i> { + /// Turns this parsed time zone into a structured time zone annotation. + /// + /// This can return an error if the parsed offset could not be converted + /// to a `crate::tz::Offset`. + /// + /// This also includes a flag of whether the annotation is "critical" or + /// not. + pub(crate) fn to_time_zone_annotation( + &self, + ) -> Result, Error> { + let (kind, critical) = match *self { + ParsedTimeZone::Named { name, critical } => { + let kind = TimeZoneAnnotationKind::from(name); + (kind, critical) + } + ParsedTimeZone::Offset { ref offset, critical } => { + let kind = TimeZoneAnnotationKind::Offset(offset.to_offset()?); + (kind, critical) + } + }; + Ok(TimeZoneAnnotation { kind, critical }) + } +} + /// A parser for RFC 9557 annotations. #[derive(Debug)] pub(crate) struct Parser { @@ -603,8 +603,9 @@ mod tests { .parse(input) .unwrap() .value - .to_time_zone(crate::tz::db()) + .to_time_zone_annotation() .unwrap() + .map(|ann| (ann.to_time_zone().unwrap(), ann.is_critical())) }; insta::assert_debug_snapshot!(p(b"[America/New_York]"), @r###" @@ -1024,13 +1025,16 @@ mod tests { #[cfg(feature = "std")] #[test] - fn err_time_zone() { + fn err_time_zone_db_lookup() { let p = |input| { Parser::new() .parse(input) .unwrap() .value - .to_time_zone(crate::tz::db()) + .to_time_zone_annotation() + .unwrap() + .unwrap() + .to_time_zone() .unwrap_err() }; diff --git a/src/fmt/temporal/mod.rs b/src/fmt/temporal/mod.rs index a1bfdd42..446d4c95 100644 --- a/src/fmt/temporal/mod.rs +++ b/src/fmt/temporal/mod.rs @@ -175,7 +175,13 @@ use crate::{ SignedDuration, Timestamp, Zoned, }; +pub use self::pieces::{ + Pieces, PiecesNumericOffset, PiecesOffset, TimeZoneAnnotation, + TimeZoneAnnotationKind, TimeZoneAnnotationName, +}; + mod parser; +mod pieces; mod printer; /// The default date time parser that we use throughout Jiff. @@ -753,6 +759,129 @@ impl DateTimeParser { let time = parsed_time.to_time(); Ok(time) } + + /// Parse a Temporal datetime string into [`Pieces`]. + /// + /// This is a lower level routine meant to give callers raw access to the + /// individual "pieces" of a parsed Temporal ISO 8601 datetime string. + /// Note that this only includes strings that have a date component. + /// + /// The benefit of this routine is that it only checks that the datetime + /// is itself valid. It doesn't do any automatic diambiguation, offset + /// conflict resolution or attempt to prevent you from shooting yourself + /// in the foot. For example, this routine will let you parse a fixed + /// offset datetime into a `Zoned` without a time zone abbreviation. + /// + /// Note that when using this routine, the + /// [`DateTimeParser::offset_conflict`] and + /// [`DateTimeParser::disambiguation`] configuration knobs are completely + /// ignored. This is because with the lower level `Pieces`, callers must + /// handle offset conflict resolution (if they want it) themselves. See + /// the [`Pieces`] documentation for a case study on how to do this if + /// you need it. + /// + /// # Errors + /// + /// This returns an error if the datetime string given is invalid or if it + /// is valid but doesn't fit in the date range supported by Jiff. + /// + /// # Example + /// + /// This shows how to parse a fixed offset timestamp into a `Zoned`. + /// + /// ``` + /// use jiff::{fmt::temporal::DateTimeParser, tz::TimeZone}; + /// + /// static PARSER: DateTimeParser = DateTimeParser::new(); + /// + /// let timestamp = "2025-01-02T15:13-05"; + /// + /// // Normally this operation will fail. + /// assert_eq!( + /// PARSER.parse_zoned(timestamp).unwrap_err().to_string(), + /// "failed to find time zone in square brackets in \ + /// \"2025-01-02T15:13-05\", which is required for \ + /// parsing a zoned instant", + /// ); + /// + /// // But you can work-around this with `Pieces`, which gives you direct + /// // access to the components parsed from the string. + /// let pieces = PARSER.parse_pieces(timestamp)?; + /// let time = pieces.time().unwrap_or_else(jiff::civil::Time::midnight); + /// let dt = pieces.date().to_datetime(time); + /// let tz = match pieces.to_time_zone()? { + /// Some(tz) => tz, + /// None => { + /// let Some(offset) = pieces.to_numeric_offset() else { + /// let msg = format!( + /// "timestamp `{timestamp}` has no time zone \ + /// or offset, and thus cannot be parsed into \ + /// an instant", + /// ); + /// return Err(msg.into()); + /// }; + /// TimeZone::fixed(offset) + /// } + /// }; + /// // We don't bother with offset conflict resolution. And note that + /// // this uses automatic "compatible" disambiguation in the case of + /// // discontinuities. Of course, this is all moot if `TimeZone` is + /// // fixed. The above code handles the case where it isn't! + /// let zdt = tz.to_zoned(dt)?; + /// assert_eq!(zdt.to_string(), "2025-01-02T15:13:00-05:00[-05:00]"); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: work around errors when a `Z` (Zulu) offset is encountered + /// + /// Because parsing a date with a `Z` offset and interpreting it as + /// a civil date or time is usually a bug, it is forbidden: + /// + /// ``` + /// use jiff::{civil::date, fmt::temporal::DateTimeParser}; + /// + /// static PARSER: DateTimeParser = DateTimeParser::new(); + /// + /// assert_eq!( + /// PARSER.parse_date("2024-03-10T00:00:00Z").unwrap_err().to_string(), + /// "cannot parse civil date from string with a Zulu offset, \ + /// parse as a `Timestamp` and convert to a civil date instead", + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// But this sort of error checking doesn't happen when you parse into a + /// [`Pieces`]. You just get what was parsed, which lets you extract a + /// date even if the higher level APIs forbid it: + /// + /// ``` + /// use jiff::{civil, fmt::temporal::DateTimeParser, tz::Offset}; + /// + /// static PARSER: DateTimeParser = DateTimeParser::new(); + /// + /// let pieces = PARSER.parse_pieces("2024-03-10T00:00:00Z")?; + /// assert_eq!(pieces.date(), civil::date(2024, 3, 10)); + /// assert_eq!(pieces.time(), Some(civil::time(0, 0, 0, 0))); + /// assert_eq!(pieces.to_numeric_offset(), Some(Offset::UTC)); + /// assert_eq!(pieces.to_time_zone()?, None); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This is usually not the right thing to do. It isn't even suggested in + /// the error message above. But if you know it's the right thing, then + /// `Pieces` will let you do it. + pub fn parse_pieces<'i, I: ?Sized + AsRef<[u8]> + 'i>( + &self, + input: &'i I, + ) -> Result, Error> { + let input = input.as_ref(); + let parsed = self.p.parse_temporal_datetime(input)?.into_full()?; + let pieces = parsed.to_pieces()?; + Ok(pieces) + } } /// A printer for Temporal datetimes. @@ -1162,6 +1291,50 @@ impl DateTimePrinter { buf } + /// Format `Pieces` of a Temporal datetime. + /// + /// This is a convenience routine for [`DateTimePrinter::print_pieces`] + /// with a `String`. + /// + /// # Example + /// + /// ``` + /// use jiff::{ + /// fmt::temporal::{DateTimePrinter, Pieces}, + /// tz::offset, + /// Timestamp, + /// }; + /// + /// const PRINTER: DateTimePrinter = DateTimePrinter::new(); + /// + /// let pieces = Pieces::from(Timestamp::UNIX_EPOCH); + /// assert_eq!( + /// PRINTER.pieces_to_string(&pieces), + /// "1970-01-01T00:00:00Z", + /// ); + /// + /// let pieces = Pieces::from((Timestamp::UNIX_EPOCH, offset(0))); + /// assert_eq!( + /// PRINTER.pieces_to_string(&pieces), + /// "1970-01-01T00:00:00+00:00", + /// ); + /// + /// let pieces = Pieces::from((Timestamp::UNIX_EPOCH, offset(-5))); + /// assert_eq!( + /// PRINTER.pieces_to_string(&pieces), + /// "1969-12-31T19:00:00-05:00", + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "alloc")] + pub fn pieces_to_string(&self, pieces: &Pieces) -> alloc::string::String { + let mut buf = alloc::string::String::with_capacity(4); + // OK because writing to `String` never fails. + self.print_pieces(pieces, &mut buf).unwrap(); + buf + } + /// Print a `Zoned` datetime to the given writer. /// /// # Errors @@ -1412,6 +1585,40 @@ impl DateTimePrinter { ) -> Result<(), Error> { self.p.print_time(time, wtr) } + + /// Print the `Pieces` of a Temporal datetime. + /// + /// # Errors + /// + /// This only returns an error when writing to the given [`Write`] + /// implementation would fail. Some such implementations, like for `String` + /// and `Vec`, never fail (unless memory allocation fails). In such + /// cases, it would be appropriate to call `unwrap()` on the result. + /// + /// # Example + /// + /// ``` + /// use jiff::{civil::date, fmt::temporal::{DateTimePrinter, Pieces}}; + /// + /// const PRINTER: DateTimePrinter = DateTimePrinter::new(); + /// + /// let pieces = Pieces::from(date(2024, 6, 15)) + /// .with_time_zone_name("US/Eastern"); + /// + /// let mut buf = String::new(); + /// // Printing to a `String` can never fail. + /// PRINTER.print_pieces(&pieces, &mut buf).unwrap(); + /// assert_eq!(buf, "2024-06-15[US/Eastern]"); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn print_pieces( + &self, + pieces: &Pieces, + wtr: W, + ) -> Result<(), Error> { + self.p.print_pieces(pieces, wtr) + } } /// A parser for Temporal durations. diff --git a/src/fmt/temporal/parser.rs b/src/fmt/temporal/parser.rs index c0c521cb..e10cffa5 100644 --- a/src/fmt/temporal/parser.rs +++ b/src/fmt/temporal/parser.rs @@ -4,6 +4,7 @@ use crate::{ fmt::{ offset::{self, ParsedOffset}, rfc9557::{self, ParsedAnnotations}, + temporal::Pieces, util::{ fractional_time_to_duration, fractional_time_to_span, parse_temporal_fraction, @@ -35,6 +36,21 @@ pub(super) struct ParsedDateTime<'i> { } impl<'i> ParsedDateTime<'i> { + #[inline(always)] + pub(super) fn to_pieces(&self) -> Result, Error> { + let mut pieces = Pieces::from(self.date.date); + if let Some(ref time) = self.time { + pieces = pieces.with_time(time.time); + } + if let Some(ref offset) = self.offset { + pieces = pieces.with_offset(offset.to_pieces_offset()?); + } + if let Some(ann) = self.annotations.to_time_zone_annotation()? { + pieces = pieces.with_time_zone_annotation(ann); + } + Ok(pieces) + } + #[inline(always)] pub(super) fn to_zoned( &self, @@ -56,14 +72,15 @@ impl<'i> ParsedDateTime<'i> { let dt = DateTime::from_parts(self.date.date, time); // We always require a time zone when parsing a zoned instant. - let (tz, _critical) = - self.annotations.to_time_zone(db)?.ok_or_else(|| { + let tz_annotation = + self.annotations.to_time_zone_annotation()?.ok_or_else(|| { err!( "failed to find time zone in square brackets \ in {:?}, which is required for parsing a zoned instant", self.input, ) })?; + let tz = tz_annotation.to_time_zone_with(db)?; // If there's no offset, then our only choice, regardless of conflict // resolution preference, is to use the time zone. That is, there is no diff --git a/src/fmt/temporal/pieces.rs b/src/fmt/temporal/pieces.rs new file mode 100644 index 00000000..7fcf6682 --- /dev/null +++ b/src/fmt/temporal/pieces.rs @@ -0,0 +1,1725 @@ +use crate::{ + civil::{Date, DateTime, Time}, + error::Error, + tz::{Offset, TimeZone, TimeZoneDatabase}, + util::borrow::StringCow, + Timestamp, Zoned, +}; + +/// A low level representation of a parsed Temporal ISO 8601 datetime string. +/// +/// Most users should not need to use or care about this type. Its purpose is +/// to represent the individual components of a datetime string for more +/// flexible parsing when use cases call for it. +/// +/// One can parse into `Pieces` via [`Pieces::parse`]. Its date, time +/// (optional), offset (optional) and time zone annotation (optional) can be +/// queried independently. Each component corresponds to the following in a +/// datetime string: +/// +/// ```text +/// {date}T{time}{offset}[{time-zone-annotation}] +/// ``` +/// +/// For example: +/// +/// ```text +/// 2025-01-03T19:54-05[America/New_York] +/// ``` +/// +/// A date is the only required component. +/// +/// A `Pieces` can also be constructed from structured values via its `From` +/// trait implementations. The `From` trait has the following implementations +/// available: +/// +/// * `From` creates a `Pieces` with just a civil [`Date`]. All other +/// components are left empty. +/// * `From` creates a `Pieces` with a civil [`Date`] and [`Time`]. +/// The offset and time zone annotation are left empty. +/// * `From` creates a `Pieces` from a [`Timestamp`] using +/// a Zulu offset. This signifies that the precise instant is known, but the +/// local time's offset from UTC is unknown. The [`Date`] and [`Time`] are +/// determined via `Offset::UTC.to_datetime(timestamp)`. The time zone +/// annotation is left empty. +/// * `From<(Timestamp, Offset)>` creates a `Pieces` from a [`Timestamp`] and +/// an [`Offset`]. The [`Date`] and [`Time`] are determined via +/// `offset.to_datetime(timestamp)`. The time zone annotation is left empty. +/// * `From<&Zoned>` creates a `Pieces` from a [`Zoned`]. This populates all +/// fields of a `Pieces`. +/// +/// A `Pieces` can be converted to a Temporal ISO 8601 string via its `Display` +/// trait implementation. +/// +/// # Example: distinguishing between `Z`, `+00:00` and `-00:00` +/// +/// With `Pieces`, it's possible to parse a datetime string and inspect the +/// "type" of its offset when it is zero. This makes use of the +/// [`PiecesOffset`] and [`PiecesNumericOffset`] auxiliary types. +/// +/// ``` +/// use jiff::{ +/// fmt::temporal::{Pieces, PiecesNumericOffset, PiecesOffset}, +/// tz::Offset, +/// }; +/// +/// let pieces = Pieces::parse("1970-01-01T00:00:00Z")?; +/// let off = pieces.offset().unwrap(); +/// // Parsed as Zulu. +/// assert_eq!(off, PiecesOffset::Zulu); +/// // Gets converted from Zulu to UTC, i.e., just zero. +/// assert_eq!(off.to_numeric_offset(), Offset::UTC); +/// +/// let pieces = Pieces::parse("1970-01-01T00:00:00-00:00")?; +/// let off = pieces.offset().unwrap(); +/// // Parsed as a negative zero. +/// assert_eq!(off, PiecesOffset::from( +/// PiecesNumericOffset::from(Offset::UTC).with_negative_zero(), +/// )); +/// // Gets converted from -00:00 to UTC, i.e., just zero. +/// assert_eq!(off.to_numeric_offset(), Offset::UTC); +/// +/// let pieces = Pieces::parse("1970-01-01T00:00:00+00:00")?; +/// let off = pieces.offset().unwrap(); +/// // Parsed as a positive zero. +/// assert_eq!(off, PiecesOffset::from( +/// PiecesNumericOffset::from(Offset::UTC), +/// )); +/// // Gets converted from -00:00 to UTC, i.e., just zero. +/// assert_eq!(off.to_numeric_offset(), Offset::UTC); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// It's rare to need to care about these differences, but the above example +/// demonstrates that `Pieces` doesn't try to do any automatic translation for +/// you. +/// +/// # Example: it is very easy to misuse `Pieces` +/// +/// This example shows how easily you can shoot yourself in the foot with +/// `Pieces`: +/// +/// ``` +/// use jiff::{fmt::temporal::{Pieces, TimeZoneAnnotation}, tz}; +/// +/// let mut pieces = Pieces::parse("2025-01-03T07:55+02[Africa/Cairo]")?; +/// pieces = pieces.with_offset(tz::offset(-10)); +/// // This is nonsense because the offset isn't compatible with the time zone! +/// // Moreover, the actual instant that this timestamp represents has changed. +/// assert_eq!(pieces.to_string(), "2025-01-03T07:55:00-10:00[Africa/Cairo]"); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// In the above example, we take a parsed `Pieces`, change its offset and +/// then format it back into a string. There are no speed bumps or errors. +/// A `Pieces` will just blindly follow your instruction, even if it produces +/// a nonsense result. Nonsense results are still parsable back into `Pieces`: +/// +/// ``` +/// use jiff::{civil, fmt::temporal::Pieces, tz::{TimeZone, offset}}; +/// +/// let pieces = Pieces::parse("2025-01-03T07:55:00-10:00[Africa/Cairo]")?; +/// assert_eq!(pieces.date(), civil::date(2025, 1, 3)); +/// assert_eq!(pieces.time(), Some(civil::time(7, 55, 0, 0))); +/// assert_eq!(pieces.to_numeric_offset(), Some(offset(-10))); +/// assert_eq!(pieces.to_time_zone()?, Some(TimeZone::get("Africa/Cairo")?)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This exemplifies that `Pieces` is a mostly "dumb" type that passes +/// through the data it contains, even if it doesn't make sense. +/// +/// # Case study: how to parse `2025-01-03T17:28-05` into `Zoned` +/// +/// One thing in particular that `Pieces` enables callers to do is side-step +/// some of the stricter requirements placed on the higher level parsing +/// functions (such as `Zoned`'s `FromStr` trait implementation). For example, +/// parsing a datetime string into a `Zoned` _requires_ that the string contain +/// a time zone annotation. Namely, parsing `2025-01-03T17:28-05` into a +/// `Zoned` will fail: +/// +/// ``` +/// use jiff::Zoned; +/// +/// assert_eq!( +/// "2025-01-03T17:28-05".parse::().unwrap_err().to_string(), +/// "failed to find time zone in square brackets in \ +/// \"2025-01-03T17:28-05\", which is required for \ +/// parsing a zoned instant", +/// ); +/// ``` +/// +/// The above fails because an RFC 3339 timestamp only contains an offset, +/// not a time zone, and thus the resulting `Zoned` could never do time zone +/// aware arithmetic. +/// +/// However, in some cases, you might want to bypass these protections and +/// creat a `Zoned` value with a fixed offset time zone anyway. For example, +/// perhaps your use cases don't need time zone aware arithmetic, but want to +/// preserve the offset anyway. This can be accomplished with `Pieces`: +/// +/// ``` +/// use jiff::{fmt::temporal::Pieces, tz::TimeZone}; +/// +/// let pieces = Pieces::parse("2025-01-03T17:28-05")?; +/// let time = pieces.time().unwrap_or_else(jiff::civil::Time::midnight); +/// let dt = pieces.date().to_datetime(time); +/// let Some(offset) = pieces.to_numeric_offset() else { +/// let msg = format!( +/// "datetime string has no offset, \ +/// and thus cannot be parsed into an instant", +/// ); +/// return Err(msg.into()); +/// }; +/// let zdt = TimeZone::fixed(offset).to_zoned(dt)?; +/// assert_eq!(zdt.to_string(), "2025-01-03T17:28:00-05:00[-05:00]"); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// One problem with the above code snippet is that it completely ignores if +/// a time zone annotation is present. If it is, it probably makes sense to use +/// it, but "fall back" to a fixed offset time zone if it isn't (which the +/// higher level `Zoned` parsing function won't do for you): +/// +/// ``` +/// use jiff::{fmt::temporal::Pieces, tz::TimeZone}; +/// +/// let timestamp = "2025-01-02T15:13-05"; +/// +/// let pieces = Pieces::parse(timestamp)?; +/// let time = pieces.time().unwrap_or_else(jiff::civil::Time::midnight); +/// let dt = pieces.date().to_datetime(time); +/// let tz = match pieces.to_time_zone()? { +/// Some(tz) => tz, +/// None => { +/// let Some(offset) = pieces.to_numeric_offset() else { +/// let msg = format!( +/// "timestamp `{timestamp}` has no time zone \ +/// or offset, and thus cannot be parsed into \ +/// an instant", +/// ); +/// return Err(msg.into()); +/// }; +/// TimeZone::fixed(offset) +/// } +/// }; +/// // We don't bother with offset conflict resolution. And note that +/// // this uses automatic "compatible" disambiguation in the case of +/// // discontinuities. Of course, this is all moot if `TimeZone` is +/// // fixed. The above code handles the case where it isn't! +/// let zdt = tz.to_zoned(dt)?; +/// assert_eq!(zdt.to_string(), "2025-01-02T15:13:00-05:00[-05:00]"); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This is mostly the same as above, but if an annotation is present, we use +/// a `TimeZone` derived from that over the offset present. +/// +/// However, this still doesn't quite capture what happens when parsing into a +/// `Zoned` value. In particular, parsing into a `Zoned` is _also_ doing offset +/// conflict resolution for you. An offset conflict occurs when there is a +/// mismatch between the offset in an RFC 3339 timestamp and the time zone in +/// an RFC 9557 time zone annotation. +/// +/// For example, `2024-06-14T17:30-05[America/New_York]` has a mismatch +/// since the date is in daylight saving time, but the offset, `-05`, is the +/// offset for standard time in `America/New_York`. If this datetime were +/// fed to the above code, then the `-05` offset would be completely ignored +/// and `America/New_York` would resolve the datetime based on its rules. In +/// this case, you'd get `2024-06-14T17:30-04`, which is a different instant +/// than the original datetime! +/// +/// You can either implement your own conflict resolution or use +/// [`tz::OffsetConflict`](crate::tz::OffsetConflict) to do it for you. +/// +/// ``` +/// use jiff::{fmt::temporal::Pieces, tz::{OffsetConflict, TimeZone}}; +/// +/// let timestamp = "2024-06-14T17:30-05[America/New_York]"; +/// // The default for conflict resolution when parsing into a `Zoned` is +/// // actually `Reject`, but we use `AlwaysOffset` here to show a different +/// // strategy. You'll want to pick the conflict resolution that suits your +/// // needs. The `Reject` strategy is what you should pick if you aren't +/// // sure. +/// let conflict_resolution = OffsetConflict::AlwaysOffset; +/// +/// let pieces = Pieces::parse(timestamp)?; +/// let time = pieces.time().unwrap_or_else(jiff::civil::Time::midnight); +/// let dt = pieces.date().to_datetime(time); +/// let ambiguous_zdt = match pieces.to_time_zone()? { +/// Some(tz) => { +/// match pieces.to_numeric_offset() { +/// None => tz.into_ambiguous_zoned(dt), +/// Some(offset) => { +/// conflict_resolution.resolve(dt, offset, tz)? +/// } +/// } +/// } +/// None => { +/// let Some(offset) = pieces.to_numeric_offset() else { +/// let msg = format!( +/// "timestamp `{timestamp}` has no time zone \ +/// or offset, and thus cannot be parsed into \ +/// an instant", +/// ); +/// return Err(msg.into()); +/// }; +/// // Won't even be ambiguous, but gets us the same +/// // type as the branch above. +/// TimeZone::fixed(offset).into_ambiguous_zoned(dt) +/// } +/// }; +/// // We do compatible disambiguation here like we do in the previous +/// // examples, but you could choose any strategy. As with offset conflict +/// // resolution, if you aren't sure what to pick, a safe choice here would +/// // be `ambiguous_zdt.unambiguous()`, which will return an error if the +/// // datetime is ambiguous in any way. Then, if you ever hit an error, you +/// // can examine the case to see if it should be handled in a different way. +/// let zdt = ambiguous_zdt.compatible()?; +/// // Notice that we now have a different civil time and offset, but the +/// // instant it corresponds to is the same as the one we started with. +/// assert_eq!(zdt.to_string(), "2024-06-14T18:30:00-04:00[America/New_York]"); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// The above has effectively completely rebuilt the higher level `Zoned` +/// parsing routine, but with a fallback to a fixed time zone when a time zone +/// annotation is not present. +/// +/// # Case study: inferring the time zone of RFC 3339 timestamps +/// +/// As [one real world use case details][infer-time-zone], it might be +/// desirable to try and infer the time zone of RFC 3339 timestamps with +/// varying offsets. This might be applicable when: +/// +/// * You have out-of-band information, possibly contextual, that indicates +/// the timestamps have to come from a fixed set of time zones. +/// * The time zones have different standard offsets. +/// * You have a specific desire or need to use a [`Zoned`] value for its +/// ergonomics and time zone aware handling. After all, in this case, you +/// believe the timestamps to actually be generated from a specific time zone, +/// but the interchange format doesn't support carrying that information. Or +/// the source data simply omits it. +/// +/// In other words, you might be trying to make the best of a bad situation. +/// +/// A `Pieces` can help you accomplish this because it gives you access to each +/// component of a parsed datetime, and thus lets you implement arbitrary logic +/// for how to translate that into a `Zoned`. In this case, there is +/// contextual information that Jiff can't possibly know about. +/// +/// The general approach we take here is to make use of +/// [`tz::OffsetConflict`](crate::tz::OffsetConflict) to query whether a +/// timestamp has a fixed offset compatible with a particular time zone. And if +/// so, we can _probably_ assume it comes from that time zone. One hitch is +/// that it's possible for the timestamp to be valid for multiple time zones, +/// so we check that as well. +/// +/// In the use case linked above, we have fixed offset timestamps from +/// `America/Chicago` and `America/New_York`. So let's try implementing the +/// above strategy. Note that we assume our inputs are RFC 3339 fixed offset +/// timestamps and error otherwise. This is just to keep things simple. To +/// handle data that is more varied, see the previous case study where we +/// respect a time zone annotation if it's present, and fall back to a fixed +/// offset time zone if it isn't. +/// +/// ``` +/// use jiff::{fmt::temporal::Pieces, tz::{OffsetConflict, TimeZone}, Zoned}; +/// +/// // The time zones we're allowed to choose from. +/// let tzs = &[ +/// TimeZone::get("America/New_York")?, +/// TimeZone::get("America/Chicago")?, +/// ]; +/// +/// // Here's our data that lacks time zones. The task is to assign a time zone +/// // from `tzs` to each below and convert it to a `Zoned`. If we fail on any +/// // one, then we substitute `None`. +/// let data = &[ +/// "2024-01-13T10:33-05", +/// "2024-01-25T12:15-06", +/// "2024-03-10T02:30-05", +/// "2024-06-08T14:01-05", +/// "2024-06-12T11:46-04", +/// "2024-11-03T01:30-05", +/// ]; +/// // Our answers. +/// let mut zdts: Vec> = vec![]; +/// for string in data { +/// // Parse and gather up the data that we can from the input. +/// // In this case, that's a civil datetime and an offset from UTC. +/// let pieces = Pieces::parse(string)?; +/// let time = pieces.time().unwrap_or_else(jiff::civil::Time::midnight); +/// let dt = pieces.date().to_datetime(time); +/// let Some(offset) = pieces.to_numeric_offset() else { +/// // A robust implementation should use a TZ annotation if present. +/// return Err("missing offset".into()); +/// }; +/// // Now collect all time zones that are valid for this timestamp. +/// let mut candidates = vec![]; +/// for tz in tzs { +/// let result = OffsetConflict::Reject.resolve(dt, offset, tz.clone()); +/// // The parsed offset isn't valid for this time zone, so reject it. +/// let Ok(ambiguous_zdt) = result else { continue }; +/// // This can never fail because we used the "reject" conflict +/// // resolution strategy. It will never return an ambiguous +/// // `Zoned` since we always have a valid offset that does +/// // disambiguation for us. +/// let zdt = ambiguous_zdt.unambiguous().unwrap(); +/// candidates.push(zdt); +/// } +/// if candidates.len() == 1 { +/// zdts.push(Some(candidates.pop().unwrap())); +/// } else { +/// zdts.push(None); +/// } +/// } +/// assert_eq!(zdts, vec![ +/// Some("2024-01-13T10:33-05[America/New_York]".parse()?), +/// Some("2024-01-25T12:15-06[America/Chicago]".parse()?), +/// // Failed because the clock time falls in a gap in the +/// // transition to daylight saving time, and it could be +/// // valid for either America/New_York or America/Chicago. +/// None, +/// Some("2024-06-08T14:01-05[America/Chicago]".parse()?), +/// Some("2024-06-12T11:46-04[America/New_York]".parse()?), +/// // Failed because the clock time falls in a fold in the +/// // transition out of daylight saving time, and it could be +/// // valid for either America/New_York or America/Chicago. +/// None, +/// ]); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// The one hitch here is that if the time zones are close to each +/// geographically and both have daylight saving time, then there are some +/// RFC 3339 timestamps that are truly ambiguous. For example, +/// `2024-11-03T01:30-05` is perfectly valid for both `America/New_York` and +/// `America/Chicago`. In this case, there is no way to tell which time zone +/// the timestamp belongs to. It might be reasonable to return an error in +/// this case or omit the timestamp. It depends on what you need to do. +/// +/// With more effort, it would also be possible to optimize the above routine +/// by utilizing [`TimeZone::preceding`] and [`TimeZone::following`] to get +/// the exact boundaries of each time zone transition. Then you could use an +/// offset lookup table for each range to determine the appropriate time zone. +/// +/// [infer-time-zone]: https://github.com/BurntSushi/jiff/discussions/181#discussioncomment-11729435 +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Pieces<'n> { + date: Date, + time: Option