Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ZeroAsciiIgnoreCaseTrie::get_strict #5585

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions utils/zerotrie/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ pub(crate) enum CaseSensitivity {
IgnoreCase,
}

/// How to handle lookup for strings with mixed ASCII case. Only used in ignore-case tries
#[derive(Copy, Clone)]
pub(crate) enum LookupStrictness {
/// Select strings that differ in case so long as their `to_ascii_lowercase` matches
Normal,
/// Select strings only if they match exactly
Strict,
}

impl CaseSensitivity {
#[cfg(feature = "serde")]
const fn to_u8_flag(self) -> u8 {
Expand All @@ -89,6 +98,7 @@ pub(crate) struct ZeroTrieBuilderOptions {
pub ascii_mode: AsciiMode,
pub capacity_mode: CapacityMode,
pub case_sensitivity: CaseSensitivity,
pub lookup_strictness: LookupStrictness,
}

impl ZeroTrieBuilderOptions {
Expand All @@ -113,6 +123,7 @@ impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
ascii_mode: AsciiMode::AsciiOnly,
capacity_mode: CapacityMode::Normal,
case_sensitivity: CaseSensitivity::Sensitive,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand All @@ -129,6 +140,7 @@ impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
ascii_mode: AsciiMode::AsciiOnly,
capacity_mode: CapacityMode::Normal,
case_sensitivity: CaseSensitivity::IgnoreCase,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand All @@ -137,13 +149,24 @@ impl<S: ?Sized> crate::ZeroAsciiIgnoreCaseTrie<S> {
pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
}

/// Internal struct to power `get_strict`
pub(crate) struct ZeroAsciiIgnoreCaseStrictTrie;

impl ZeroTrieWithOptions for ZeroAsciiIgnoreCaseStrictTrie {
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
lookup_strictness: LookupStrictness::Strict,
..crate::ZeroAsciiIgnoreCaseTrie::OPTIONS
};
}

/// Branch nodes could be either binary search or PHF.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
phf_mode: PhfMode::UsePhf,
ascii_mode: AsciiMode::BinarySpans,
capacity_mode: CapacityMode::Normal,
case_sensitivity: CaseSensitivity::Sensitive,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand All @@ -159,6 +182,7 @@ impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
ascii_mode: AsciiMode::BinarySpans,
capacity_mode: CapacityMode::Extended,
case_sensitivity: CaseSensitivity::Sensitive,
lookup_strictness: LookupStrictness::Normal,
};
}

Expand Down
23 changes: 20 additions & 3 deletions utils/zerotrie/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
if let Some((c, temp)) = ascii.split_first() {
if matches!(byte_type, NodeType::Ascii) {
let is_match = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase)
&& matches!(T::OPTIONS.lookup_strictness, LookupStrictness::Normal)
{
b.to_ascii_lowercase() == c.to_ascii_lowercase()
} else {
Expand Down Expand Up @@ -367,12 +368,28 @@ pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
if matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly) || x < 16 {
// binary search
(search, trie) = trie.debug_split_at(x);
// TODO(#5584): Consider making all of these have the same order of elements
let bsearch_result =
if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) {
search.binary_search_by_key(&c.to_ascii_lowercase(), |x| {
x.to_ascii_lowercase()
})
if matches!(T::OPTIONS.lookup_strictness, LookupStrictness::Normal) {
// Ordering: (A=a), (B=b), (C=c), ..., (Z=z)
search.binary_search_by_key(&c.to_ascii_lowercase(), |x| {
x.to_ascii_lowercase()
})
} else {
// Ordering: A, a, B, b, C, c, ..., Z, z
let c_lowercase = c.to_ascii_lowercase();
search.binary_search_by(move |p| {
let p_lowercase = p.to_ascii_lowercase();
if c_lowercase == p_lowercase {
p.cmp(c)
} else {
p_lowercase.cmp(&c_lowercase)
}
})
}
} else {
// Ordering: A, B, C, ..., Z, a, b, c, ..., z
search.binary_search(c)
};
i = bsearch_result.ok()?;
Expand Down
39 changes: 39 additions & 0 deletions utils/zerotrie/src/zerotrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,45 @@ impl_zerotrie_subtype!(
Vec::into_boxed_slice
);

impl<Store> ZeroAsciiIgnoreCaseTrie<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
/// Queries the trie for a string, requiring that it matches case.
///
/// # Examples
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"foo"[..], 1);
/// map.insert(b"Bar", 2);
/// map.insert(b"Bingo", 3);
///
/// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
///
/// assert_eq!(trie.get(b"foo"), Some(1));
/// assert_eq!(trie.get(b"bar"), Some(2));
/// assert_eq!(trie.get(b"BaR"), Some(2));
/// assert_eq!(trie.get_strict(b"bar"), None);
/// assert_eq!(trie.get_strict(b"BaR"), None);
/// assert_eq!(trie.get_strict(b"Bar"), Some(2));
///
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
/// ```
pub fn get_strict<K>(&self, key: K) -> Option<usize>
where
K: AsRef<[u8]>,
{
reader::get_parameterized::<crate::options::ZeroAsciiIgnoreCaseStrictTrie>(
self.store.as_ref(),
key.as_ref(),
)
}
}

macro_rules! impl_dispatch {
($self:ident, $inner_fn:ident()) => {
match $self.0 {
Expand Down
Loading