Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into arabic-crown-letters
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Jun 7, 2024
2 parents 4d60844 + 5a4870e commit 835cdda
Show file tree
Hide file tree
Showing 24 changed files with 176 additions and 19 deletions.
2 changes: 1 addition & 1 deletion unicodetools/data/ucd/dev/DerivedAge.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedAge-16.0.0.txt

Check warning on line 1 in unicodetools/data/ucd/dev/DerivedAge.txt

View workflow job for this annotation

GitHub Actions / Draft unless approved

Not in the 16.0 pipeline

These characters are neither accepted for Unicode 16.0, nor for any specific version of Unicode, nor are they provisionally assigned. The Age property values for new characters are likely incorrect right now. They will be recomputed after the UTC accepts their encoding and this pull request is updated for the target version.

Check warning on line 1 in unicodetools/data/ucd/dev/DerivedAge.txt

View workflow job for this annotation

GitHub Actions / Draft unless approved

Not in the 16.0 pipeline

These characters are neither accepted for Unicode 16.0, nor for any specific version of Unicode, nor are they provisionally assigned. The Age property values for new characters are likely incorrect right now. They will be recomputed after the UTC accepts their encoding and this pull request is updated for the target version.
# Date: 2024-05-23, 16:13:47 GMT
# Date: 2024-06-07, 15:34:57 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
2 changes: 1 addition & 1 deletion unicodetools/data/ucd/dev/DerivedCoreProperties.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedCoreProperties-16.0.0.txt
# Date: 2024-05-23, 16:14:19 GMT
# Date: 2024-06-07, 15:35:16 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
11 changes: 9 additions & 2 deletions unicodetools/data/ucd/dev/PropertyAliases.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# PropertyAliases-16.0.0.txt
# Date: 2024-04-30, 21:48:30 GMT
# Date: 2024-06-06, 21:52:48 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -99,6 +99,11 @@ cjkIRG_VSource ; kIRG_VSource
cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS
isc ; ISO_Comment
JSN ; Jamo_Short_Name
kEH_Cat ; kEH_Cat
kEH_Desc ; kEH_Desc
kEH_HG ; kEH_HG
kEH_IFAO ; kEH_IFAO
kEH_JSesh ; kEH_JSesh
na ; Name
na1 ; Unicode_1_Name
Name_Alias ; Name_Alias
Expand Down Expand Up @@ -179,6 +184,8 @@ IDSB ; IDS_Binary_Operator
IDST ; IDS_Trinary_Operator
IDSU ; IDS_Unary_Operator
Join_C ; Join_Control
kEH_NoMirror ; kEH_NoMirror
kEH_NoRotate ; kEH_NoRotate
LOE ; Logical_Order_Exception
Lower ; Lowercase
Math ; Math
Expand Down Expand Up @@ -213,6 +220,6 @@ XO_NFKC ; Expands_On_NFKC
XO_NFKD ; Expands_On_NFKD

# ================================================
# Total: 135
# Total: 142

# EOF
32 changes: 31 additions & 1 deletion unicodetools/data/ucd/dev/PropertyValueAliases.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# PropertyValueAliases-16.0.0.txt
# Date: 2024-05-23, 16:43:21 GMT
# Date: 2024-06-07, 15:35:32 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -1684,4 +1684,34 @@ XIDS; Y ; Yes ; T

# @missing: 0000..10FFFF; cjkRSUnicode; <none>

# kEH_Cat (kEH_Cat)

# @missing: 0000..10FFFF; kEH_Cat; <none>

# kEH_Desc (kEH_Desc)

# @missing: 0000..10FFFF; kEH_Desc; <none>

# kEH_HG (kEH_HG)

# @missing: 0000..10FFFF; kEH_HG; <none>

# kEH_IFAO (kEH_IFAO)

# @missing: 0000..10FFFF; kEH_IFAO; <none>

# kEH_JSesh (kEH_JSesh)

# @missing: 0000..10FFFF; kEH_JSesh; <none>

# kEH_NoMirror (kEH_NoMirror)

kEH_NoMirror; N ; No ; F ; False
kEH_NoMirror; Y ; Yes ; T ; True

# kEH_NoRotate (kEH_NoRotate)

kEH_NoRotate; N ; No ; F ; False
kEH_NoRotate; Y ; Yes ; T ; True

# EOF
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# GraphemeBreakProperty-16.0.0.txt
# Date: 2024-05-23, 16:14:28 GMT
# Date: 2024-06-07, 15:35:21 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SentenceBreakProperty-16.0.0.txt
# Date: 2024-05-23, 16:15:02 GMT
# Date: 2024-06-07, 15:35:46 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
2 changes: 1 addition & 1 deletion unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# WordBreakProperty-16.0.0.txt
# Date: 2024-05-23, 16:15:06 GMT
# Date: 2024-06-07, 15:35:48 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
2 changes: 1 addition & 1 deletion unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedBidiClass-16.0.0.txt
# Date: 2024-05-23, 16:14:15 GMT
# Date: 2024-06-07, 15:35:13 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedCombiningClass-16.0.0.txt
# Date: 2024-05-23, 16:14:18 GMT
# Date: 2024-06-07, 15:35:15 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedEastAsianWidth-16.0.0.txt
# Date: 2024-05-23, 16:14:21 GMT
# Date: 2024-06-07, 15:35:17 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedGeneralCategory-16.0.0.txt
# Date: 2024-05-23, 16:14:21 GMT
# Date: 2024-06-07, 15:35:17 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedJoiningGroup-16.0.0.txt
# Date: 2024-05-24, 11:54:53 GMT
# Date: 2024-06-07, 15:35:18 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
2 changes: 1 addition & 1 deletion unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedJoiningType-16.0.0.txt
# Date: 2024-05-24, 11:54:54 GMT
# Date: 2024-06-07, 15:35:18 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
2 changes: 1 addition & 1 deletion unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedLineBreak-16.0.0.txt
# Date: 2024-05-23, 16:14:24 GMT
# Date: 2024-06-07, 15:35:19 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
2 changes: 1 addition & 1 deletion unicodetools/data/ucd/dev/extracted/DerivedName.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedName-16.0.0.txt
# Date: 2024-05-23, 16:14:24 GMT
# Date: 2024-06-07, 15:35:19 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,10 @@ static void parseSourceFile(
} else {
indexUnicodeProperties.getFileNames().add(fullFilename);
UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename));
if (fileName.startsWith("Unihan") || fileName.startsWith("k")) {
if (fileName.startsWith("Unihan")
|| fileName.startsWith("Unikemet")
|| (fileName.endsWith("Sources") && !fileName.startsWith("Emoji"))
|| fileName.startsWith("k")) {
parser.withTabs(true);
}
PropertyParsingInfo propInfo;
Expand Down
14 changes: 12 additions & 2 deletions unicodetools/src/main/java/org/unicode/props/PropertyStatus.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ public enum PropertyScope {
UcdProperty.Emoji_KDDI,
UcdProperty.Emoji_SB);

// TODO(egg): These lists are not up to date!
private static final EnumSet<UcdProperty> CONTRIBUTORY_PROPERTY =
EnumSet.of(
UcdProperty.Jamo_Short_Name,
Expand Down Expand Up @@ -230,7 +231,10 @@ public enum PropertyScope {
UcdProperty.Named_Sequences_Prov,
UcdProperty.Regional_Indicator,
UcdProperty.Standardized_Variant,
UcdProperty.Vertical_Orientation);
UcdProperty.Vertical_Orientation,
// Unikemet
UcdProperty.kEH_Cat,
UcdProperty.kEH_Desc);

private static final EnumSet<UcdProperty> NORMATIVE_PROPERTY =
EnumSet.of(
Expand Down Expand Up @@ -290,7 +294,13 @@ public enum PropertyScope {
UcdProperty.kIRG_MSource,
UcdProperty.kIRG_TSource,
UcdProperty.kIRG_USource,
UcdProperty.kIRG_VSource);
UcdProperty.kIRG_VSource,
// Unikemet
UcdProperty.kEH_HG,
UcdProperty.kEH_IFAO,
UcdProperty.kEH_JSesh,
UcdProperty.kEH_NoMirror,
UcdProperty.kEH_NoRotate);
private static final EnumSet<UcdProperty> IMMUTABLE_PROPERTY =
EnumSet.of(
UcdProperty.Name,
Expand Down
15 changes: 15 additions & 0 deletions unicodetools/src/main/java/org/unicode/props/UcdProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ public enum UcdProperty {
kDaeJaweon(PropertyType.Miscellaneous, "cjkDaeJaweon"),
kDefinition(PropertyType.Miscellaneous, "cjkDefinition"),
kEACC(PropertyType.Miscellaneous, "cjkEACC"),
kEH_Cat(PropertyType.Miscellaneous, "kEH_Cat"),
kEH_Desc(PropertyType.Miscellaneous, "kEH_Desc"),
kEH_FVal(PropertyType.Miscellaneous, "kEH_FVal"),
kEH_Func(PropertyType.Miscellaneous, "kEH_Func"),
kEH_HG(PropertyType.Miscellaneous, "kEH_HG"),
kEH_IFAO(PropertyType.Miscellaneous, "kEH_IFAO"),
kEH_JSesh(PropertyType.Miscellaneous, "kEH_JSesh"),
kEH_UniK(PropertyType.Miscellaneous, "kEH_UniK"),
kFanqie(PropertyType.Miscellaneous, "cjkFanqie"),
kFenn(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkFenn"),
kFennIndex(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkFennIndex"),
Expand Down Expand Up @@ -182,13 +190,15 @@ public enum UcdProperty {
kRSKanWa(PropertyType.Miscellaneous, "cjkRSKanWa"),
kRSKangXi(PropertyType.Miscellaneous, "cjkRSKangXi"),
kRSKorean(PropertyType.Miscellaneous, "cjkRSKorean"),
kRSTUnicode(PropertyType.Miscellaneous, "kRSTUnicode"),
kRSUnicode(
PropertyType.Miscellaneous,
null,
ValueCardinality.Ordered,
"cjkRSUnicode",
"Unicode_Radical_Stroke",
"URS"),
kReading(PropertyType.Miscellaneous, "kReading"),
kSBGY(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkSBGY"),
kSMSZD2003Index(PropertyType.Miscellaneous, "cjkSMSZD2003Index"),
kSMSZD2003Readings(PropertyType.Miscellaneous, "cjkSMSZD2003Readings"),
Expand All @@ -200,9 +210,11 @@ public enum UcdProperty {
ValueCardinality.Unordered,
"cjkSpecializedSemanticVariant"),
kSpoofingVariant(PropertyType.Miscellaneous, "cjkSpoofingVariant"),
kSrc_NushuDuben(PropertyType.Miscellaneous, "kSrc_NushuDuben"),
kStrange(PropertyType.Miscellaneous, "cjkStrange"),
kTGH(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTGH"),
kTGHZ2013(PropertyType.Miscellaneous, "cjkTGHZ2013"),
kTGT_MergedSrc(PropertyType.Miscellaneous, "kTGT_MergedSrc"),
kTaiwanTelegraph(PropertyType.Miscellaneous, "cjkTaiwanTelegraph"),
kTang(PropertyType.Miscellaneous, null, ValueCardinality.Unordered, "cjkTang"),
kTotalStrokes(PropertyType.Miscellaneous, null, ValueCardinality.Ordered, "cjkTotalStrokes"),
Expand Down Expand Up @@ -341,6 +353,9 @@ public enum UcdProperty {
White_Space(PropertyType.Binary, Binary.class, null, "WSpace", "space"),
XID_Continue(PropertyType.Binary, Binary.class, null, "XIDC"),
XID_Start(PropertyType.Binary, Binary.class, null, "XIDS"),
kEH_Core(PropertyType.Binary, Binary.class, null, "kEH_Core"),
kEH_NoMirror(PropertyType.Binary, Binary.class, null, "kEH_NoMirror"),
kEH_NoRotate(PropertyType.Binary, Binary.class, null, "kEH_NoRotate"),

// Unknown
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1443,6 +1443,14 @@ public static Joining_Type_Values forName(String name) {
// kDaeJaweon
// kDefinition
// kEACC
// kEH_Cat
// kEH_Desc
// kEH_Func
// kEH_FVal
// kEH_HG
// kEH_IFAO
// kEH_JSesh
// kEH_UniK
// kFanqie
// kFenn
// kFennIndex
Expand Down Expand Up @@ -1510,11 +1518,13 @@ public static Joining_Type_Values forName(String name) {
// kPhonetic
// kPrimaryNumeric
// kPseudoGB1
// kReading
// kRSAdobe_Japan1_6
// kRSJapanese
// kRSKangXi
// kRSKanWa
// kRSKorean
// kRSTUnicode
// kRSUnicode
// kSBGY
// kSemanticVariant
Expand All @@ -1523,11 +1533,13 @@ public static Joining_Type_Values forName(String name) {
// kSMSZD2003Readings
// kSpecializedSemanticVariant
// kSpoofingVariant
// kSrc_NushuDuben
// kStrange
// kTaiwanTelegraph
// kTang
// kTGH
// kTGHZ2013
// kTGT_MergedSrc
// kTotalStrokes
// kTraditionalVariant
// kUnihanCore2020
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,13 @@ public String _getValue(int codepoint) {
"cjkIRG_VSource",
"cjkIRG_VSource",
"kIRG_VSource");
add(iup.getProperty("kEH_Cat"));
add(iup.getProperty("kEH_Desc"));
add(iup.getProperty("kEH_HG"));
add(iup.getProperty("kEH_IFAO"));
add(iup.getProperty("kEH_JSesh"));
add(iup.getProperty("kEH_NoMirror"));
add(iup.getProperty("kEH_NoRotate"));
add(iup.getProperty("Emoji"));
add(iup.getProperty("Emoji_Presentation"));
add(iup.getProperty("Emoji_Modifier"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ RETS ; RGI_Emoji_Tag_Sequence ; Emoji_Tag_Sequence
REZS ; RGI_Emoji_Zwj_Sequence ; Emoji_Zwj_Sequence
# RE ; RGI_Emoji

kEH_Core ; kEH_Core
kEH_NoMirror ; kEH_NoMirror
kEH_NoRotate ; kEH_NoRotate

# ================================================
# Enumerated Properties
# ================================================
Expand Down Expand Up @@ -162,3 +166,13 @@ cjkVietnameseNumeric ; kVietnameseNumeric
cjkZhuangNumeric ; kZhuangNumeric
# 16.0
cjkFanqie ; kFanqie

kTGT_MergedSrc ; kTGT_MergedSrc
kRSTUnicode ; kRSTUnicode

kSrc_NushuDuben ; kSrc_NushuDuben
kReading ; kReading

kEH_Func ; kEH_Func
kEH_FVal ; kEH_FVal
kEH_UniK ; kEH_UniK
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
# @missing: 0000..10FFFF; Emoji_Component ; No
# @missing: 0000..10FFFF; Extended_Pictographic ; No

# @missing: 0000..10FFFF; kEH_Core ; No
# @missing: 0000..10FFFF; kEH_NoMirror ; No
# @missing: 0000..10FFFF; kEH_NoRotate ; No

# End of binary properties.

# @missing: 0000..10FFFF; Canonical_Combining_Class; Not_Reordered
Expand Down Expand Up @@ -194,3 +198,13 @@ Do_Not_Emit_Type ; Precomposed_Form ; Precomposed_Form
Do_Not_Emit_Type ; Deprecated ; Deprecated
Do_Not_Emit_Type ; Discouraged ; Discouraged
Do_Not_Emit_Type ; Preferred_Spelling ; Preferred_Spelling

# @missing: 0000..10FFFF; kTGT_MergedSrc ; <none>
# @missing: 0000..10FFFF; kRSTUnicode ; <none>

# @missing: 0000..10FFFF; kSrc_NushuDuben ; <none>
# @missing: 0000..10FFFF; kReading ; <none>

# @missing: 0000..10FFFF; kEH_Func ; <none>
# @missing: 0000..10FFFF; kEH_FVal ; <none>
# @missing: 0000..10FFFF; kEH_UniK ; <none>
Loading

0 comments on commit 835cdda

Please sign in to comment.