Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

preserve stucture of timezone designation list #5581

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions utils/tzif/src/data/tzif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,17 @@ pub struct DataBlock {
/// A series of [`UtLocalIndicator`] objects.
pub ut_local_indicators: Vec<UtLocalIndicator>,
}

impl DataBlock {
/// Retrieves the timezone designation at index `idx`.
pub fn time_zone_designation(&self, mut idx: usize) -> Option<&str> {
self.time_zone_designations.iter().find_map(|d| {
if idx <= d.len() {
nordzilla marked this conversation as resolved.
Show resolved Hide resolved
Some(&d[idx..])
} else {
idx -= d.len() + 1;
nordzilla marked this conversation as resolved.
Show resolved Hide resolved
None
}
})
}
}
nordzilla marked this conversation as resolved.
Show resolved Hide resolved
161 changes: 64 additions & 97 deletions utils/tzif/src/parse/tzif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,65 +376,40 @@ where
count_min_max(typecnt, typecnt, local_time_type_record(charcnt))
}

/// A series of bytes constituting an array of
/// NUL-terminated (0x00) time zone designation strings. The total
/// number of bytes is specified by the "charcnt" field in the header.
fn raw_time_zone_designations<Input>(charcnt: usize) -> impl Parser<Input, Output = String>
where
Input: Stream<Token = u8>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
count_min_max(charcnt, charcnt, any())
.map(|bytes: Vec<u8>| String::from_utf8_lossy(&bytes).into_owned())
}

/// A series of bytes constituting an array of
/// NUL-terminated (0x00) time zone designation strings. The total
/// number of bytes is specified by the "charcnt" field in the
/// header.
///
/// Splits each designation into a vector of [`String`] where each string
/// starts at an index defined by a local time type record and ends at a
/// NUL-terminator (0x00)
/// Splits the list of bytes by the NULL-terminator (0x00) character
/// and puts each designation into a [`String`].
///
/// > e.g.
/// > ```text
/// > "LMT\u{0}HMT\u{0}MMT\u{0}IST\u{0}+0630\u{0}"
/// > ```
///
/// Note that two designations MAY overlap if one is a suffix
/// of the other. The character encoding of time zone designation
/// strings is not specified.
/// Note that a local time record index might point in the middle of a
/// designation. In that case the record's designation is the specified
/// suffix. The [DataBlock::time_zone_designation] method can be used to
/// access the correct designation string given an index.
///
/// The character encoding of time zone designation strings is not specified.
/// However, time zone designations SHOULD consist of at least three (3) and no
/// more than six (6) ASCII characters from the set of alphanumerics,
/// '-', and '+'. This is for compatibility with POSIX requirements
/// for time zone abbreviations, so this parser enforces a UTF-8 ASCII encoding,
/// to ensure compatability with Rust strings.
fn time_zone_designations<Input>(
charcnt: usize,
local_time_type_records: Vec<LocalTimeTypeRecord>,
) -> impl Parser<Input, Output = Vec<String>>
fn time_zone_designations<Input>(charcnt: usize) -> impl Parser<Input, Output = Vec<String>>
where
Input: Stream<Token = u8>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
raw_time_zone_designations(charcnt).map(move |raw_time_zone_designations| {
let mut time_zone_designations = Vec::with_capacity(local_time_type_records.len());
for record in &local_time_type_records {
for end_idx in record.idx..charcnt {
if raw_time_zone_designations.as_bytes()[end_idx] == b'\0' {
time_zone_designations.push(
String::from_utf8_lossy(
raw_time_zone_designations[record.idx..end_idx].as_bytes(),
)
.into_owned(),
);
break;
}
}
}
time_zone_designations
count_min_max(charcnt, charcnt, any()).map(|bytes: Vec<u8>| {
bytes
.split_inclusive(|b| *b == 0)
Copy link
Member

@nordzilla nordzilla Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤏 nitpick: style

I would prefer to see *b == b'\0' here to preserve the relationship to the idea that this acts like a C-style null terminator character.

.map(|s| String::from_utf8_lossy(&s[0..s.len() - 1]).into_owned())
.collect()
})
}

Expand Down Expand Up @@ -678,45 +653,17 @@ where
Input: Stream<Token = u8>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
(
historic_transition_times::<V, _>(header.timecnt),
transition_types(header.timecnt, header.typecnt),
local_time_type_records(header.typecnt, header.charcnt),
)
.then(
move |(transition_times, transition_types, local_time_type_records)| {
(
value(transition_times),
value(transition_types),
value(local_time_type_records.clone()),
time_zone_designations(header.charcnt, local_time_type_records),
leap_second_records::<V, _>(header.leapcnt),
standard_wall_indicators(header.isstdcnt),
)
},
)
.then(
move |(
transition_times,
transition_types,
local_time_type_records,
time_zone_designations,
leap_second_records,
standard_wall_indicators,
)| {
combine::struct_parser! {
DataBlock {
transition_times: value(transition_times),
transition_types: value(transition_types),
local_time_type_records: value(local_time_type_records),
time_zone_designations: value(time_zone_designations),
leap_second_records: value(leap_second_records),
standard_wall_indicators: value(standard_wall_indicators),
ut_local_indicators: ut_local_indicators(header.isutcnt),
}
}
},
)
combine::struct_parser! {
DataBlock {
transition_times: historic_transition_times::<V, _>(header.timecnt),
transition_types: transition_types(header.timecnt, header.typecnt),
local_time_type_records: local_time_type_records(header.typecnt, header.charcnt),
time_zone_designations: time_zone_designations(header.charcnt),
leap_second_records: leap_second_records::<V, _>(header.leapcnt),
standard_wall_indicators: standard_wall_indicators(header.isstdcnt),
ut_local_indicators: ut_local_indicators(header.isutcnt),
}
}
}

/// Parses a `TZif` footer.
Expand Down Expand Up @@ -1196,31 +1143,51 @@ mod test {
#[test]
fn parse_time_zone_designations() {
assert_parse_eq!(
time_zone_designations(
14,
vec![
LocalTimeTypeRecord {
utoff: Seconds(35356),
is_dst: false,
idx: 0,
},
LocalTimeTypeRecord {
utoff: Seconds(39600),
is_dst: true,
idx: 4,
},
LocalTimeTypeRecord {
utoff: Seconds(36000),
is_dst: false,
idx: 9,
},
]
),
time_zone_designations(14),
"LMT\0AEDT\0AEST\0",
vec!["LMT".to_owned(), "AEDT".to_owned(), "AEST".to_owned()],
);
}

#[test]
fn time_zone_designation_indexing() {
let block: &[u8] = &[
0x00, 0x00, 0x00, 0x10, 0x01, 0x00, // local time record 0
0x00, 0x00, 0x00, 0x10, 0x01, 0x03, // local time record 1
0x00, 0x00, 0x00, 0x10, 0x01, 0x04, // local time record 2
0x00, 0x00, 0x00, 0x10, 0x01, 0x05, // local time record 3
b'L', b'M', b'T', 0x00, b'A', b'E', b'D', b'T', 0x00, // timezone designations
];
let header = TzifHeader {
version: 0,
isutcnt: 0,
isstdcnt: 0,
leapcnt: 0,
timecnt: 0,
typecnt: 4,
charcnt: 9,
};
let (block, _) = data_block::<1, _>(header).parse(block).unwrap();
assert_eq!(
block.time_zone_designation(block.local_time_type_records[0].idx),
Some("LMT")
);
assert_eq!(
block.time_zone_designation(block.local_time_type_records[1].idx),
Some("")
);
assert_eq!(
block.time_zone_designation(block.local_time_type_records[2].idx),
Some("AEDT")
);
assert_eq!(
block.time_zone_designation(block.local_time_type_records[3].idx),
Some("EDT")
);
assert_eq!(block.time_zone_designation(8), Some(""));
assert_eq!(block.time_zone_designation(9), None);
}

#[test]
fn parse_leap_second_occurrence() {
const FIVE: &[u8] = 5i64.to_be_bytes().as_slice();
Expand Down
Loading