From 7bead9d18f1082dfd8fe1bde294e179967c9d2bc Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Thu, 11 Jul 2024 15:19:16 -0400 Subject: [PATCH 1/9] ICU-22833 Move kCurrencyEntries entries into source file to utilize escapesrc --- icu4c/source/common/static_unicode_sets.cpp | 8 ++++++++ icu4c/source/common/static_unicode_sets.h | 10 ++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/icu4c/source/common/static_unicode_sets.cpp b/icu4c/source/common/static_unicode_sets.cpp index 0db5ea000d4a..d6fe687b7e6c 100644 --- a/icu4c/source/common/static_unicode_sets.cpp +++ b/icu4c/source/common/static_unicode_sets.cpp @@ -227,6 +227,14 @@ Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) { return get(key1)->contains(str) ? key1 : chooseFrom(str, key2); } +CurrencySymbols unisets::kCurrencyEntries[5] = { + {DOLLAR_SIGN, u'$'}, + {POUND_SIGN, u'£'}, + {RUPEE_SIGN, u'₹'}, + {YEN_SIGN, u'¥'}, + {WON_SIGN, u'₩'}, +}; + //Key unisets::chooseCurrency(UnicodeString str) { // if (get(DOLLAR_SIGN)->contains(str)) { // return DOLLAR_SIGN; diff --git a/icu4c/source/common/static_unicode_sets.h b/icu4c/source/common/static_unicode_sets.h index 5d90ce5908de..6e8edd4123da 100644 --- a/icu4c/source/common/static_unicode_sets.h +++ b/icu4c/source/common/static_unicode_sets.h @@ -122,16 +122,10 @@ U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); // Unused in C++: // Key chooseCurrency(UnicodeString str); // Used instead: -static const struct { +extern struct CurrencySymbols { Key key; UChar32 exemplar; -} kCurrencyEntries[] = { - {DOLLAR_SIGN, u'$'}, - {POUND_SIGN, u'£'}, - {RUPEE_SIGN, u'₹'}, - {YEN_SIGN, u'¥'}, - {WON_SIGN, u'₩'}, -}; +} kCurrencyEntries[5]; } // namespace unisets U_NAMESPACE_END From 8fffd95a27d8e6a15019af52162d1532bf41e7af Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Thu, 11 Jul 2024 15:21:19 -0400 Subject: [PATCH 2/9] ICU-22833 Move uniset_getUnicode32Instance out of anonymous namespace Anonymous namespace causes internal linkage and functions with external linkage should be within. --- icu4c/source/common/uniset_props.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp index b3dbdf93c885..81aba349ef64 100644 --- a/icu4c/source/common/uniset_props.cpp +++ b/icu4c/source/common/uniset_props.cpp @@ -91,7 +91,7 @@ void U_CALLCONV createUni32Set(UErrorCode &errorCode) { } ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); } - +} U_CFUNC UnicodeSet * uniset_getUnicode32Instance(UErrorCode &errorCode) { @@ -99,6 +99,8 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) { return uni32Singleton; } +namespace { + // helper functions for matching of pattern syntax pieces ------------------ *** // these functions are parallel to the PERL_OPEN etc. strings above From 6dda44ff90af140ce368785f3828799057072759 Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Thu, 11 Jul 2024 15:23:09 -0400 Subject: [PATCH 3/9] ICU-22833 Work around Oracle Studio compiler with constexpr --- icu4c/source/i18n/formatted_string_builder.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/icu4c/source/i18n/formatted_string_builder.h b/icu4c/source/i18n/formatted_string_builder.h index 32e0900ae23a..262346ff2778 100644 --- a/icu4c/source/i18n/formatted_string_builder.h +++ b/icu4c/source/i18n/formatted_string_builder.h @@ -225,8 +225,7 @@ static_assert( constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field) : bits(( - U_ASSERT(category <= 0xf), - U_ASSERT(field <= 0xf), + U_ASSERT(category <= 0xf && field <= 0xf), static_cast((category << 4) | field) )) {} From 08ed95600294223e3d300e6071238fad1e78d301 Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Thu, 11 Jul 2024 16:20:25 -0400 Subject: [PATCH 4/9] ICU-22833 Correct C linkage on function for Oracle Studio compiler --- icu4c/source/tools/genrb/reslist.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icu4c/source/tools/genrb/reslist.cpp b/icu4c/source/tools/genrb/reslist.cpp index e1c2d25061cd..19cfcdcb7122 100644 --- a/icu4c/source/tools/genrb/reslist.cpp +++ b/icu4c/source/tools/genrb/reslist.cpp @@ -1346,11 +1346,11 @@ compareKeyOldpos(const void * /*context*/, const void *l, const void *r) { return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos); } -void SResource::collectKeys(std::function collector) const { +U_CFUNC void SResource::collectKeys(std::function collector) const { collector(fKey); } -void ContainerResource::collectKeys(std::function collector) const { +U_CFUNC void ContainerResource::collectKeys(std::function collector) const { collector(fKey); for (SResource* curr = fFirst; curr != nullptr; curr = curr->fNext) { curr->collectKeys(collector); From 98c921df5183e54b16f146d61a79b111332c3c42 Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Thu, 11 Jul 2024 16:28:30 -0400 Subject: [PATCH 5/9] ICU-22833 Cast to resolve ambiguity when calling assertEquals --- icu4c/source/test/intltest/convtest.cpp | 4 ++-- icu4c/source/test/intltest/strtest.cpp | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/icu4c/source/test/intltest/convtest.cpp b/icu4c/source/test/intltest/convtest.cpp index 9c2fef3b8a37..26d05c0ffe8f 100644 --- a/icu4c/source/test/intltest/convtest.cpp +++ b/icu4c/source/test/intltest/convtest.cpp @@ -744,7 +744,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() { assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); length = (int32_t)(target - result); assertEquals("number of bytes written", 2, length); - assertEquals("next byte not clobbered", 5, result[2]); + assertEquals("next byte not clobbered", (char)5, result[2]); // Convert the rest and flush. ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), @@ -777,7 +777,7 @@ ConversionTest::TestUTF8ToUTF8Overflow() { assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); length = (int32_t)(target - result); assertEquals("text2 number of bytes written", 3, length); - assertEquals("text2 next byte not clobbered", 5, result[3]); + assertEquals("text2 next byte not clobbered", (char)5, result[3]); // Convert the rest and flush. ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp index dff7bc409742..9aa9218328e3 100644 --- a/icu4c/source/test/intltest/strtest.cpp +++ b/icu4c/source/test/intltest/strtest.cpp @@ -538,9 +538,9 @@ StringTest::TestStringPieceU8() { StringPiece abc3(u8"abcdef", 3); assertEquals("abc3.length", 3, abc3.length()); - assertEquals("abc3[0]", 0x61, abc3.data()[0]); - assertEquals("abc3[1]", 0x62, abc3.data()[1]); - assertEquals("abc3[2]", 0x63, abc3.data()[2]); + assertEquals("abc3[0]", (char)0x61, abc3.data()[0]); + assertEquals("abc3[1]", (char)0x62, abc3.data()[1]); + assertEquals("abc3[2]", (char)0x63, abc3.data()[2]); StringPiece uvw("q"); uvw.set(u8"uvw"); @@ -550,9 +550,9 @@ StringTest::TestStringPieceU8() { StringPiece xyz("r"); xyz.set(u8"xyzXYZ", 3); assertEquals("xyz.length", 3, xyz.length()); - assertEquals("xyz[0]", 0x78, xyz.data()[0]); - assertEquals("xyz[1]", 0x79, xyz.data()[1]); - assertEquals("xyz[2]", 0x7a, xyz.data()[2]); + assertEquals("xyz[0]", (char)0x78, xyz.data()[0]); + assertEquals("xyz[1]", (char)0x79, xyz.data()[1]); + assertEquals("xyz[2]", (char)0x7a, xyz.data()[2]); StringPiece null(nullptr); assertTrue("null is empty", null.empty()); From d4133cd425c1c67abfd72781ece4c9abaf11f5e5 Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Thu, 11 Jul 2024 20:03:59 -0400 Subject: [PATCH 6/9] ICU-22833 Resolve extern function overload for Oracle Studio compiler --- icu4c/source/test/intltest/itutil.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/icu4c/source/test/intltest/itutil.cpp b/icu4c/source/test/intltest/itutil.cpp index a89c83f0243b..5f11ca9f3c85 100644 --- a/icu4c/source/test/intltest/itutil.cpp +++ b/icu4c/source/test/intltest/itutil.cpp @@ -808,12 +808,14 @@ void LocalPointerTest::TestLocalXyzPointerNull() { } +extern "C" typedef void(*fun_ptr)(UConstrainedFieldPosition*); + void LocalPointerTest::TestLocalXyzStdUniquePtr() { IcuTestErrorCode status(*this, "TestLocalXyzStdUniquePtr"); #if !UCONFIG_NO_FORMATTING auto* ptr = ucfpos_open(status); // Implicit conversion operator - std::unique_ptr a = + std::unique_ptr a = LocalUConstrainedFieldPositionPointer(ptr); // Explicit move constructor LocalUConstrainedFieldPositionPointer a2(std::move(a)); From 0c757b0c109ef3cb1f6ba7b2db9f2f0b431d28d8 Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Thu, 11 Jul 2024 20:05:03 -0400 Subject: [PATCH 7/9] ICU-22833 Remove unnecessary macro in test that causes Oracle Studio error --- icu4c/source/test/intltest/numbertest_api.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 4146247ff299..af3d52060a28 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -3763,10 +3763,6 @@ void NumberFormatterApiTest::roundingOther() { DBL_MIN, u"2.225074E-308"); -#ifndef DBL_TRUE_MIN -#define DBL_TRUE_MIN 4.9E-324 -#endif - // Note: this behavior is intentionally different from Java; see // https://github.com/google/double-conversion/issues/126 assertFormatSingle( @@ -3775,7 +3771,7 @@ void NumberFormatterApiTest::roundingOther() { u"E0", NumberFormatter::with().notation(Notation::scientific()), Locale::getEnglish(), - DBL_TRUE_MIN, + 4.9E-324, u"5E-324"); } From 316f10a5df0aa0e6fb348006a3df7fd2ae64a161 Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Fri, 12 Jul 2024 12:04:32 -0400 Subject: [PATCH 8/9] ICU-22833 Use lowercase u for UTF-16 literals --- icu4c/source/test/intltest/rbbitst.cpp | 12 ++++++------ icu4c/source/test/intltest/uobjtest.cpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index 2afe4b3912df..b07553eb27a3 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -3379,20 +3379,20 @@ int32_t RBBILineMonkey::next(int32_t startPos) { } if (fAP->contains(prevChar) && - (fAK->contains(thisChar) || thisChar == U'◌' || fAS->contains(thisChar))) { + (fAK->contains(thisChar) || thisChar == (UChar32)u'◌' || fAS->contains(thisChar))) { setAppliedRule(pos, "LB 28a.1 AP x (AK | ◌ | AS)"); continue; } - if ((fAK->contains(prevChar) || prevChar == U'◌' || fAS->contains(prevChar)) && + if ((fAK->contains(prevChar) || prevChar == (UChar32)u'◌' || fAS->contains(prevChar)) && (fVF->contains(thisChar) || fVI->contains(thisChar))) { setAppliedRule(pos, "LB 28a.2 (AK | ◌ | AS) x (VF | VI)"); continue; } - if ((fAK->contains(prevCharX2) || prevCharX2 == U'◌' || fAS->contains(prevCharX2)) && + if ((fAK->contains(prevCharX2) || prevCharX2 == (UChar32)u'◌' || fAS->contains(prevCharX2)) && fVI->contains(prevChar) && - (fAK->contains(thisChar) || thisChar == U'◌')) { + (fAK->contains(thisChar) || thisChar == (UChar32)u'◌')) { setAppliedRule(pos, "LB 28a.3 (AK | ◌ | AS) VI x (AK | ◌)"); continue; } @@ -3401,8 +3401,8 @@ int32_t RBBILineMonkey::next(int32_t startPos) { // note: UnicodeString::char32At(length) returns ffff, not distinguishable // from a legit ffff noncharacter. So test length separately. UChar32 nextChar = fText->char32At(nextPos); - if ((fAK->contains(prevChar) || prevChar == U'◌' || fAS->contains(prevChar)) && - (fAK->contains(thisChar) || thisChar == U'◌' || fAS->contains(thisChar)) && + if ((fAK->contains(prevChar) || prevChar == (UChar32)u'◌' || fAS->contains(prevChar)) && + (fAK->contains(thisChar) || thisChar == (UChar32)u'◌' || fAS->contains(thisChar)) && fVF->contains(nextChar)) { setAppliedRule(pos, "LB 28a.4 (AK | ◌ | AS) x (AK | ◌ | AS) VF"); continue; diff --git a/icu4c/source/test/intltest/uobjtest.cpp b/icu4c/source/test/intltest/uobjtest.cpp index bb0e69da8e3f..537c36f17666 100644 --- a/icu4c/source/test/intltest/uobjtest.cpp +++ b/icu4c/source/test/intltest/uobjtest.cpp @@ -512,7 +512,7 @@ void UObjectTest::testUMemory() { UnicodeString *p; enum { len=20 }; - p=new(bytes) UnicodeString(len, (UChar32)U'€', len); + p=new(bytes) UnicodeString(len, (UChar32)u'€', len); if((void *)p!=(void *)bytes) { errln("placement new did not place the object at the expected address"); } From 1429db56bf2129744ad6e62f91c263762a0274e5 Mon Sep 17 00:00:00 2001 From: Scott Zhong Date: Fri, 12 Jul 2024 12:05:23 -0400 Subject: [PATCH 9/9] ICU-22833 Convert to hex encoding literals for UTF-32 literals `escapesrc` is unable to convert UTF-32 literal into escape --- icu4c/source/test/intltest/usettest.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/icu4c/source/test/intltest/usettest.cpp b/icu4c/source/test/intltest/usettest.cpp index b7bf21e436cb..440f46865957 100644 --- a/icu4c/source/test/intltest/usettest.cpp +++ b/icu4c/source/test/intltest/usettest.cpp @@ -4177,6 +4177,8 @@ void UnicodeSetTest::TestSkipToStrings() { assertFalse("no next", iter.next()); } +#include + void UnicodeSetTest::TestPatternCodePointComplement() { IcuTestErrorCode errorCode(*this, "TestPatternCodePointComplement"); // ICU-21524 changes pattern ^ and equivalent functions to perform a "code point complement". @@ -4195,14 +4197,14 @@ void UnicodeSetTest::TestPatternCodePointComplement() { } assertTrue("[:^Basic_Emoji:] --> lots of elements", notBasic.size() > 1000); assertFalse("[:^Basic_Emoji:] --> no strings", notBasic.hasStrings()); - assertFalse("[:^Basic_Emoji:] --> no bicycle", notBasic.contains(U'🚲')); + assertFalse("[:^Basic_Emoji:] --> no bicycle", notBasic.contains(U'\U0001F6B2')); } { UnicodeSet notBasic(u"[:Basic_Emoji=No:]", errorCode); assertTrue("[:Basic_Emoji=No:] --> lots of elements", notBasic.size() > 1000); assertFalse("[:Basic_Emoji=No:] --> no strings", notBasic.hasStrings()); - assertFalse("[:Basic_Emoji=No:] --> no bicycle", notBasic.contains(U'🚲')); + assertFalse("[:Basic_Emoji=No:] --> no bicycle", notBasic.contains(U'\U0001F6B2')); } { @@ -4213,7 +4215,7 @@ void UnicodeSetTest::TestPatternCodePointComplement() { assertFalse("[].applyIntPropertyValue(Basic_Emoji, 0) --> no strings", notBasic.hasStrings()); assertFalse("[].applyIntPropertyValue(Basic_Emoji, 0) --> no bicycle", - notBasic.contains(U'🚲')); + notBasic.contains(U'\U0001F6B2')); } { @@ -4224,7 +4226,7 @@ void UnicodeSetTest::TestPatternCodePointComplement() { assertFalse("[].applyPropertyAlias(Basic_Emoji, No) --> no strings", notBasic.hasStrings()); assertFalse("[].applyPropertyAlias(Basic_Emoji, No) --> no bicycle", - notBasic.contains(U'🚲')); + notBasic.contains(U'\U0001F6B2')); } // When there are strings, we must not use the complement for a more compact toPattern(). @@ -4253,6 +4255,6 @@ void UnicodeSetTest::TestPatternCodePointComplement() { assertTrue("[:Basic_Emoji:].complement() --> has strings", notBasic.hasStrings()); assertTrue("[:Basic_Emoji:].complement().contains(chipmunk+emoji)", notBasic.contains(u"🐿\uFE0F")); - assertFalse("[:Basic_Emoji:].complement() --> no bicycle", notBasic.contains(U'🚲')); + assertFalse("[:Basic_Emoji:].complement() --> no bicycle", notBasic.contains(U'\U0001F6B2')); } }