Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve normalizer performance by adjusting the trie value format #5813

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions components/collator/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ use crate::provider::CollationTailoringV1Marker;
use crate::{AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength};
use core::cmp::Ordering;
use core::convert::TryFrom;
use icu_normalizer::provider::CanonicalDecompositionDataV1Marker;
use icu_normalizer::provider::CanonicalDecompositionDataV2Marker;
use icu_normalizer::provider::CanonicalDecompositionTablesV1Marker;
use icu_normalizer::provider::DecompositionDataV1;
use icu_normalizer::provider::DecompositionDataV2;
use icu_normalizer::provider::DecompositionTablesV1;
use icu_normalizer::Decomposition;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -203,7 +203,7 @@ pub struct Collator {
diacritics: DataPayload<CollationDiacriticsV1Marker>,
options: CollatorOptionsBitField,
reordering: Option<DataPayload<CollationReorderingV1Marker>>,
decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
decompositions: DataPayload<CanonicalDecompositionDataV2Marker>,
tables: DataPayload<CanonicalDecompositionTablesV1Marker>,
lithuanian_dot_above: bool,
}
Expand Down Expand Up @@ -259,7 +259,7 @@ impl Collator {
+ DataProvider<CollationJamoV1Marker>
+ DataProvider<CollationMetadataV1Marker>
+ DataProvider<CollationReorderingV1Marker>
+ DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ ?Sized,
{
Expand All @@ -279,7 +279,7 @@ impl Collator {
fn try_new_unstable_internal<D>(
provider: &D,
root: DataPayload<CollationRootV1Marker>,
decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
decompositions: DataPayload<CanonicalDecompositionDataV2Marker>,
tables: DataPayload<CanonicalDecompositionTablesV1Marker>,
jamo: DataPayload<CollationJamoV1Marker>,
special_primaries: impl FnOnce() -> Result<
Expand Down Expand Up @@ -347,7 +347,7 @@ pub struct CollatorBorrowed<'a> {
diacritics: &'a CollationDiacriticsV1<'a>,
options: CollatorOptionsBitField,
reordering: Option<&'a CollationReorderingV1<'a>>,
decompositions: &'a DecompositionDataV1<'a>,
decompositions: &'a DecompositionDataV2<'a>,
tables: &'a DecompositionTablesV1<'a>,
lithuanian_dot_above: bool,
}
Expand All @@ -360,7 +360,7 @@ impl CollatorBorrowed<'static> {
// copypaste-compatible with `Collator::try_new_unstable_internal`.
let provider = &crate::provider::Baked;
let decompositions =
icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER;
icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V2_MARKER;
let tables =
icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_TABLES_V1_MARKER;
let root = crate::provider::Baked::SINGLETON_COLLATION_ROOT_V1_MARKER;
Expand Down
165 changes: 63 additions & 102 deletions components/collator/src/elements.rs

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions components/collator/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ const _: () = {

icu_normalizer_data::impl_canonical_compositions_v1_marker!(TestingProvider);
icu_normalizer_data::impl_non_recursive_decomposition_supplement_v1_marker!(TestingProvider);
icu_normalizer_data::impl_canonical_decomposition_data_v1_marker!(TestingProvider);
icu_normalizer_data::impl_canonical_decomposition_data_v2_marker!(TestingProvider);
icu_normalizer_data::impl_canonical_decomposition_tables_v1_marker!(TestingProvider);
icu_normalizer_data::impl_compatibility_decomposition_supplement_v1_marker!(TestingProvider);
icu_normalizer_data::impl_compatibility_decomposition_data_v2_marker!(TestingProvider);
icu_normalizer_data::impl_compatibility_decomposition_tables_v1_marker!(TestingProvider);
icu_normalizer_data::impl_uts46_decomposition_supplement_v1_marker!(TestingProvider);
icu_normalizer_data::impl_uts46_decomposition_data_v2_marker!(TestingProvider);
};

type StackString = arraystring::ArrayString<arraystring::typenum::U32>;
Expand Down
8 changes: 3 additions & 5 deletions components/experimental/src/transliterate/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl Direction {
/// {$AE} [:Lowercase:] → Ae;
/// {$OE} [:Lowercase:] → Oe;
/// {$UE} [:Lowercase:] → Ue;
///
///
/// $AE → AE;
/// $OE → OE;
/// $UE → UE;
Expand Down Expand Up @@ -232,8 +232,7 @@ impl RuleCollection {
+ DataProvider<ScriptWithExtensionsPropertyV1Marker>
+ DataProvider<XidStartV1Marker>,
NP: ?Sized
+ DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>,
Expand Down Expand Up @@ -414,8 +413,7 @@ macro_rules! redirect {
}

redirect!(
CanonicalDecompositionDataV1Marker,
CompatibilityDecompositionSupplementV1Marker,
CompatibilityDecompositionDataV2Marker,
CanonicalDecompositionTablesV1Marker,
CompatibilityDecompositionTablesV1Marker,
CanonicalCompositionsV1Marker
Expand Down
30 changes: 14 additions & 16 deletions components/experimental/src/transliterate/transliterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct ComposingTransliterator(ComposingNormalizer);
impl ComposingTransliterator {
fn try_nfc<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
P: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
+ ?Sized,
Expand All @@ -63,8 +63,7 @@ impl ComposingTransliterator {

fn try_nfkc<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
P: DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand All @@ -90,7 +89,7 @@ struct DecomposingTransliterator(DecomposingNormalizer);
impl DecomposingTransliterator {
fn try_nfd<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
P: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ ?Sized,
{
Expand All @@ -101,8 +100,7 @@ impl DecomposingTransliterator {

fn try_nfkd<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
P: DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ ?Sized,
Expand Down Expand Up @@ -279,8 +277,8 @@ impl Transliterator {
) -> Result<Self, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down Expand Up @@ -381,8 +379,8 @@ impl Transliterator {
) -> Result<Transliterator, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand All @@ -405,8 +403,8 @@ impl Transliterator {
) -> Result<Transliterator, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down Expand Up @@ -441,8 +439,8 @@ impl Transliterator {
) -> Result<DataPayload<TransliteratorRulesV1Marker>, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down Expand Up @@ -490,8 +488,8 @@ impl Transliterator {
normalizer_provider: &P,
) -> Option<Result<InternalTransliterator, DataError>>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
P: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down
Loading
Loading