diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt index 3c9e0ca80..d08e74aff 100644 --- a/unicodetools/data/ucd/dev/ArabicShaping.txt +++ b/unicodetools/data/ucd/dev/ArabicShaping.txt @@ -850,6 +850,28 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group 10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL 10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH 10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF +10ED9; CROWN BEH; L; CROWN BEH +10EDA; DOTLESS CROWN BEH WITH 3 DOTS BELOW; L; CROWN BEH +10EDB; DOTLESS CROWN BEH WITH 2 DOTS ABOVE; L; CROWN BEH +10EDC; DOTLESS CROWN BEH WITH 3 DOTS ABOVE; L; CROWN BEH +10EDD; CROWN HAH WITH DOT BELOW; L; CROWN HAH +10EDE; CROWN HAH; L; CROWN HAH +10EDF; CROWN HAH WITH DOT ABOVE; L; CROWN HAH +10EE0; CROWN SEEN; L; CROWN SEEN +10EE1; CROWN SEEN WITH 3 DOTS ABOVE; L; CROWN SEEN +10EE2; CROWN SAD; L; CROWN SAD +10EE3; CROWN SAD WITH DOT ABOVE; L; CROWN SAD +10EE4; CROWN TAH; L; CROWN TAH +10EE5; CROWN TAH WITH DOT ABOVE; L; CROWN TAH +10EE6; CROWN AIN; L; CROWN AIN +10EE7; CROWN AIN WITH DOT ABOVE; L; CROWN AIN +10EE8; CROWN FEH; L; CROWN FEH +10EE9; DOTLESS CROWN FEH WITH TWO DOTS ABOVE; L; CROWN FEH +10EEA; CROWN KAF; L; CROWN KAF +10EEB; CROWN MEEM; L; CROWN MEEM +10EEC; DOTLESS CROWN BEH WITH DOT ABOVE; L; CROWN BEH +10EED; CROWN HEH; L; CROWN HEH +10EEE; DOTLESS CROWN BEH WITH 2 DOTS BELOW; L; CROWN BEH # Sogdian Characters diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..c27bbd075 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# DerivedAge-17.0.0.txt +# Date: 2024-10-16, 13:58:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2059,4 +2059,15 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Total code points: 5185 +# ================================================ + +# Age=V17_0 + +# Newly assigned in Unicode 17.0.0 (September, 2025) + +10ED9..10EEE ; 17.0 # [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; 17.0 # ARABIC CROWN + +# Total code points: 23 + # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1075638f1..8cb93e6ef 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ -# DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-31, 18:09:32 GMT +# DerivedCoreProperties-17.0.0.txt +# Date: 2024-10-16, 13:58:58 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1053,6 +1053,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; Alphabetic # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1441,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142781 # ================================================ @@ -3350,6 +3351,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; Case_Ignorable # Mn ARABIC CROWN 10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -3505,7 +3507,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2750 # ================================================ @@ -6729,6 +6731,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; ID_Start # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6962,7 +6965,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141291 # ================================================ @@ -7895,6 +7898,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; ID_Continue # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; ID_Continue # Mn ARABIC CROWN 10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -8370,7 +8375,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144564 # ================================================ @@ -8915,6 +8920,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; XID_Start # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9148,7 +9154,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141268 # ================================================ @@ -10082,6 +10088,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; XID_Continue # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; XID_Continue # Mn ARABIC CROWN 10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -10557,7 +10565,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144545 # ================================================ @@ -10874,6 +10882,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 10D24..10D27 ; Grapheme_Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Grapheme_Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Grapheme_Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; Grapheme_Extend # Mn ARABIC CROWN 10EFC..10EFF ; Grapheme_Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Grapheme_Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Grapheme_Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -11029,7 +11038,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2193 +# Total code points: 2194 # ================================================ @@ -12330,6 +12339,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EAD ; Grapheme_Base # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; Grapheme_Base # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; Grapheme_Base # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -12812,7 +12822,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152730 +# Total code points: 152752 # ================================================ @@ -13201,6 +13211,7 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; InCB; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; InCB; Extend # Mn ARABIC CROWN 10EFC..10EFF ; InCB; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -13357,6 +13368,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2192 +# Total code points: 2193 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..22c9bbeed 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-05-23, 16:14:27 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1964,6 +1964,8 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; N # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; N # Mn ARABIC CROWN 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3ce258217..f26e242ca 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:55 GMT +# LineBreak-17.0.0.txt +# Date: 2024-10-16, 13:59:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2820,6 +2820,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; AL # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; CM # Mn ARABIC CROWN 10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e..b3f440701 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# Date: 2024-05-23, 16:14:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -18646,6 +18646,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 10EAB 0315 0300 05AE 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062; # (a◌𐺫◌̕◌̀◌֮b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING HAMZA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAC 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062; # (a◌̕◌̀◌֮◌𐺬b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING MADDA MARK, LATIN SMALL LETTER B 0061 10EAC 0315 0300 05AE 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062; # (a◌𐺬◌̕◌̀◌֮b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING MADDA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10EF9 0062;00E0 05AE 10EF9 0315 0062;0061 05AE 0300 10EF9 0315 0062;00E0 05AE 10EF9 0315 0062;0061 05AE 0300 10EF9 0315 0062; # (a◌̕◌̀◌֮◌𐻹b; à◌֮◌𐻹◌̕b; a◌֮◌̀◌𐻹◌̕b; à◌֮◌𐻹◌̕b; a◌֮◌̀◌𐻹◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC CROWN, LATIN SMALL LETTER B +0061 10EF9 0315 0300 05AE 0062;0061 05AE 10EF9 0300 0315 0062;0061 05AE 10EF9 0300 0315 0062;0061 05AE 10EF9 0300 0315 0062;0061 05AE 10EF9 0300 0315 0062; # (a◌𐻹◌̕◌̀◌֮b; a◌֮◌𐻹◌̀◌̕b; a◌֮◌𐻹◌̀◌̕b; a◌֮◌𐻹◌̀◌̕b; a◌֮◌𐻹◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC CROWN, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFD 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062; # (a◌֚◌̖◌᷺◌𐻽b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD SAKTA, LATIN SMALL LETTER B 0061 10EFD 059A 0316 1DFA 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062; # (a◌𐻽◌֚◌̖◌᷺b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW WORD SAKTA, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFE 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062; # (a◌֚◌̖◌᷺◌𐻾b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD QASR, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index fae2831e7..083997bcc 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ -# PropList-16.0.0.txt -# Date: 2024-05-31, 18:09:48 GMT +# PropList-17.0.0.txt +# Date: 2024-10-16, 13:59:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1922,7 +1922,8 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT 08CD..08CF ; Modifier_Combining_Mark # Mn [3] ARABIC SMALL HIGH ZAH..ARABIC LARGE ROUND DOT BELOW 08D3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW WAW 08F3 ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH WAW +10EF9 ; Modifier_Combining_Mark # Mn ARABIC CROWN -# Total code points: 14 +# Total code points: 15 # EOF diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 4358a787e..4901801ce 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-17.0.0.txt -# Date: 2024-09-11, 23:38:17 GMT +# Date: 2024-10-16, 13:59:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1003,6 +1003,16 @@ jg ; Alef ; Alef jg ; Beh ; Beh jg ; Beth ; Beth jg ; Burushaski_Yeh_Barree ; Burushaski_Yeh_Barree +jg ; Crown_Ain ; Crown_Ain +jg ; Crown_Beh ; Crown_Beh +jg ; Crown_Feh ; Crown_Feh +jg ; Crown_Hah ; Crown_Hah +jg ; Crown_Heh ; Crown_Heh +jg ; Crown_Kaf ; Crown_Kaf +jg ; Crown_Meem ; Crown_Meem +jg ; Crown_Sad ; Crown_Sad +jg ; Crown_Seen ; Crown_Seen +jg ; Crown_Tah ; Crown_Tah jg ; Dal ; Dal jg ; Dalath_Rish ; Dalath_Rish jg ; E ; E diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..d77994fa1 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Date: 2024-05-23, 16:15:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -890,6 +890,8 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; Arabic # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; Arabic # Mn ARABIC CROWN 10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -926,7 +928,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1373 +# Total code points: 1396 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..ee6ac5dbf 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -19541,6 +19541,29 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10ED9;ARABIC CROWN LETTER BEH;Lo;0;AL;;;;;N;;;;; +10EDA;ARABIC CROWN LETTER PEH;Lo;0;AL;;;;;N;;;;; +10EDB;ARABIC CROWN LETTER TEH;Lo;0;AL;;;;;N;;;;; +10EDC;ARABIC CROWN LETTER THEH;Lo;0;AL;;;;;N;;;;; +10EDD;ARABIC CROWN LETTER JEEM;Lo;0;AL;;;;;N;;;;; +10EDE;ARABIC CROWN LETTER HAH;Lo;0;AL;;;;;N;;;;; +10EDF;ARABIC CROWN LETTER KHAH;Lo;0;AL;;;;;N;;;;; +10EE0;ARABIC CROWN LETTER SEEN;Lo;0;AL;;;;;N;;;;; +10EE1;ARABIC CROWN LETTER SHEEN;Lo;0;AL;;;;;N;;;;; +10EE2;ARABIC CROWN LETTER SAD;Lo;0;AL;;;;;N;;;;; +10EE3;ARABIC CROWN LETTER DAD;Lo;0;AL;;;;;N;;;;; +10EE4;ARABIC CROWN LETTER TAH;Lo;0;AL;;;;;N;;;;; +10EE5;ARABIC CROWN LETTER ZAH;Lo;0;AL;;;;;N;;;;; +10EE6;ARABIC CROWN LETTER AIN;Lo;0;AL;;;;;N;;;;; +10EE7;ARABIC CROWN LETTER GHAIN;Lo;0;AL;;;;;N;;;;; +10EE8;ARABIC CROWN LETTER FEH;Lo;0;AL;;;;;N;;;;; +10EE9;ARABIC CROWN LETTER QAF;Lo;0;AL;;;;;N;;;;; +10EEA;ARABIC CROWN LETTER KAF;Lo;0;AL;;;;;N;;;;; +10EEB;ARABIC CROWN LETTER MEEM;Lo;0;AL;;;;;N;;;;; +10EEC;ARABIC CROWN LETTER NOON;Lo;0;AL;;;;;N;;;;; +10EED;ARABIC CROWN LETTER HEH;Lo;0;AL;;;;;N;;;;; +10EEE;ARABIC CROWN LETTER YEH;Lo;0;AL;;;;;N;;;;; +10EF9;ARABIC CROWN;Mn;230;NSM;;;;;N;;;;; 10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; 10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..e8a63ad4b 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# Date: 2024-05-23, 16:15:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1784,6 +1784,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; R # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; R # Mn ARABIC CROWN 10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index a863397dd..44e04ec04 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ -# GraphemeBreakProperty-16.0.0.txt -# Date: 2024-05-31, 18:09:38 GMT +# GraphemeBreakProperty-17.0.0.txt +# Date: 2024-10-16, 13:59:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -339,6 +339,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; Extend # Mn ARABIC CROWN 10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -495,7 +496,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2198 +# Total code points: 2199 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index ca3689e6b..3851469a4 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ -# SentenceBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:32 GMT +# SentenceBreakProperty-17.0.0.txt +# Date: 2024-10-16, 13:59:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -373,6 +373,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; Extend # Mn ARABIC CROWN 10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -586,7 +587,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2601 +# Total code points: 2602 # ================================================ @@ -2389,6 +2390,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; OLetter # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2585,7 +2587,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136930 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e557c3d0d..9a2f37b5e 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ -# WordBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:36 GMT +# WordBreakProperty-17.0.0.txt +# Date: 2024-10-16, 13:59:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -409,6 +409,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; Extend # Mn ARABIC CROWN 10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -623,7 +624,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2605 +# Total code points: 2606 # ================================================ @@ -1142,6 +1143,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; ALetter # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1355,7 +1357,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33813 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..abf97bba2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ -# DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# DerivedBidiClass-17.0.0.txt +# Date: 2024-10-16, 13:58:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2274,6 +2274,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; NSM # Mn ARABIC CROWN 10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -2408,7 +2409,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2028 +# Total code points: 2029 # ================================================ @@ -2456,6 +2457,7 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; AL # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2501,8 +2503,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 293 code points not listed here. -# Total code points: 1767 +# The above property value applies to 270 code points not listed here. +# Total code points: 1766 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..05a599a89 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ -# DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# DerivedCombiningClass-17.0.0.txt +# Date: 2024-10-16, 13:58:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1475,6 +1475,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; 0 # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -2060,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. -# Total code points: 1113178 +# The above property value applies to 821558 code points not listed here. +# Total code points: 1113177 # ================================================ @@ -2778,6 +2779,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 10D24..10D27 ; 230 # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; 230 # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; 230 # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; 230 # Mn ARABIC CROWN 10F48..10F4A ; 230 # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE 10F4C ; 230 # Mn SOGDIAN COMBINING HOOK ABOVE 10F82 ; 230 # Mn OLD UYGHUR COMBINING DOT ABOVE @@ -2803,7 +2805,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 518 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..e0231758b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ -# DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedEastAsianWidth-17.0.0.txt +# Date: 2024-10-16, 13:59:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1486,6 +1486,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; N # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH +10EF9 ; N # Mn ARABIC CROWN 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -2103,7 +2105,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761076 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..64ba28385 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ -# DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedGeneralCategory-17.0.0.txt +# Date: 2024-10-16, 13:59:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -439,7 +439,9 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC5..10EFB ; Cn # [55] .. +10EC5..10ED8 ; Cn # [20] .. +10EEF..10EF8 ; Cn # [10] .. +10EFA..10EFB ; Cn # [2] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -747,7 +749,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819510 # ================================================ @@ -2525,6 +2527,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; Lo # [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2708,7 +2711,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136477 +# Total code points: 136499 # ================================================ @@ -2938,6 +2941,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; Mn # ARABIC CROWN 10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -3072,7 +3076,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2020 +# Total code points: 2021 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt index 17778a8a0..e757d0017 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt @@ -1,5 +1,5 @@ -# DerivedJoiningGroup-16.0.0.txt -# Date: 2024-07-30, 21:15:55 GMT +# DerivedJoiningGroup-17.0.0.txt +# Date: 2024-10-16, 13:59:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -750,4 +750,66 @@ # Total code points: 1 +# ================================================ + +10ED9..10EDC ; Crown_Beh # Lo [4] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER THEH +10EEC ; Crown_Beh # Lo ARABIC CROWN LETTER NOON +10EEE ; Crown_Beh # Lo ARABIC CROWN LETTER YEH + +# Total code points: 6 + +# ================================================ + +10EDD..10EDF ; Crown_Hah # Lo [3] ARABIC CROWN LETTER JEEM..ARABIC CROWN LETTER KHAH + +# Total code points: 3 + +# ================================================ + +10EE0..10EE1 ; Crown_Seen # Lo [2] ARABIC CROWN LETTER SEEN..ARABIC CROWN LETTER SHEEN + +# Total code points: 2 + +# ================================================ + +10EE2..10EE3 ; Crown_Sad # Lo [2] ARABIC CROWN LETTER SAD..ARABIC CROWN LETTER DAD + +# Total code points: 2 + +# ================================================ + +10EE4..10EE5 ; Crown_Tah # Lo [2] ARABIC CROWN LETTER TAH..ARABIC CROWN LETTER ZAH + +# Total code points: 2 + +# ================================================ + +10EE6..10EE7 ; Crown_Ain # Lo [2] ARABIC CROWN LETTER AIN..ARABIC CROWN LETTER GHAIN + +# Total code points: 2 + +# ================================================ + +10EE8..10EE9 ; Crown_Feh # Lo [2] ARABIC CROWN LETTER FEH..ARABIC CROWN LETTER QAF + +# Total code points: 2 + +# ================================================ + +10EEA ; Crown_Kaf # Lo ARABIC CROWN LETTER KAF + +# Total code points: 1 + +# ================================================ + +10EEB ; Crown_Meem # Lo ARABIC CROWN LETTER MEEM + +# Total code points: 1 + +# ================================================ + +10EED ; Crown_Heh # Lo ARABIC CROWN LETTER HEH + +# Total code points: 1 + # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 3841a92cc..d9aa77ea2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ -# DerivedJoiningType-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedJoiningType-17.0.0.txt +# Date: 2024-10-16, 13:59:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -194,9 +194,10 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 10ACD ; L # Lo MANICHAEAN LETTER HETH 10AD7 ; L # Lo MANICHAEAN LETTER NUN 10D00 ; L # Lo HANIFI ROHINGYA LETTER A +10ED9..10EEE ; L # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10FCB ; L # No CHORASMIAN NUMBER ONE HUNDRED -# Total code points: 5 +# Total code points: 27 # ================================================ @@ -441,6 +442,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; T # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; T # Mn ARABIC CROWN 10EFC..10EFF ; T # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -581,6 +583,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2185 +# Total code points: 2186 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 31d143e92..5444b5952 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ -# DerivedLineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:50 GMT +# DerivedLineBreak-17.0.0.txt +# Date: 2024-10-16, 13:59:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757630 code points not listed here. +# Total code points: 895098 # ================================================ @@ -1315,6 +1315,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9..10EEE ; AL # Lo [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1613,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26679 +# Total code points: 26701 # ================================================ @@ -2182,6 +2183,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF9 ; CM # Mn ARABIC CROWN 10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -2387,7 +2389,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2470 +# Total code points: 2471 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..7748a0478 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ -# DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedName-17.0.0.txt +# Date: 2024-10-16, 13:59:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -30206,6 +30206,29 @@ FFFD ; REPLACEMENT CHARACTER 10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW 10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10ED9 ; ARABIC CROWN LETTER BEH +10EDA ; ARABIC CROWN LETTER PEH +10EDB ; ARABIC CROWN LETTER TEH +10EDC ; ARABIC CROWN LETTER THEH +10EDD ; ARABIC CROWN LETTER JEEM +10EDE ; ARABIC CROWN LETTER HAH +10EDF ; ARABIC CROWN LETTER KHAH +10EE0 ; ARABIC CROWN LETTER SEEN +10EE1 ; ARABIC CROWN LETTER SHEEN +10EE2 ; ARABIC CROWN LETTER SAD +10EE3 ; ARABIC CROWN LETTER DAD +10EE4 ; ARABIC CROWN LETTER TAH +10EE5 ; ARABIC CROWN LETTER ZAH +10EE6 ; ARABIC CROWN LETTER AIN +10EE7 ; ARABIC CROWN LETTER GHAIN +10EE8 ; ARABIC CROWN LETTER FEH +10EE9 ; ARABIC CROWN LETTER QAF +10EEA ; ARABIC CROWN LETTER KAF +10EEB ; ARABIC CROWN LETTER MEEM +10EEC ; ARABIC CROWN LETTER NOON +10EED ; ARABIC CROWN LETTER HEH +10EEE ; ARABIC CROWN LETTER YEH +10EF9 ; ARABIC CROWN 10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA 10EFE ; ARABIC SMALL LOW WORD QASR @@ -45367,6 +45390,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 155021 # EOF diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 55107dc2a..b1146c358 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -1264,6 +1264,16 @@ public enum Joining_Group_Values implements Named { Beh("Beh"), Beth("Beth"), Burushaski_Yeh_Barree("Burushaski_Yeh_Barree"), + Crown_Ain("Crown_Ain"), + Crown_Beh("Crown_Beh"), + Crown_Feh("Crown_Feh"), + Crown_Hah("Crown_Hah"), + Crown_Heh("Crown_Heh"), + Crown_Kaf("Crown_Kaf"), + Crown_Meem("Crown_Meem"), + Crown_Sad("Crown_Sad"), + Crown_Seen("Crown_Seen"), + Crown_Tah("Crown_Tah"), Dal("Dal"), Dalath_Rish("Dalath_Rish"), E("E"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java index 9075daea6..bd64e1cda 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java @@ -1227,7 +1227,18 @@ public final class UCD_Names implements UCD_Types { "THIN_YEH", "VERTICAL_TAIL", // Unicode 16 - "KASHMIRI_YEH" + "KASHMIRI_YEH", + // Unicode n > 17 + "CROWN_BEH", + "CROWN_HAH", + "CROWN_SEEN", + "CROWN_SAD", + "CROWN_TAH", + "CROWN_AIN", + "CROWN_FEH", + "CROWN_KAF", + "CROWN_MEEM", + "CROWN_HEH", }; static { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index b1ffb8261..fc69d6879 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -781,8 +781,19 @@ public interface UCD_Types { VERTICAL_TAIL = 104, // Unicode 16 KASHMIRI_YEH = 105, + // Unicode n > 17 + CROWN_BEH = 105, + CROWN_HAH = 106, + CROWN_SEEN = 107, + CROWN_SAD = 108, + CROWN_TAH = 109, + CROWN_AIN = 110, + CROWN_FEH = 111, + CROWN_KAF = 112, + CROWN_MEEM = 113, + CROWN_HEH = 114, // limit - LIMIT_JOINING_GROUP = KASHMIRI_YEH + 1; + LIMIT_JOINING_GROUP = CROWN_HEH + 1; static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3; public static final int NF_COMPATIBILITY_MASK = 2, NF_COMPOSITION_MASK = 1;