From 4a0e30115157746e45332f2fa9767a62f622e9af Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 5 Jul 2024 20:47:05 +0200 Subject: [PATCH 01/15] UnicodeData.txt lines from the proposal --- unicodetools/data/ucd/dev/UnicodeData.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..60b985026 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,19 @@ +10EC9;ARABIC SMALL BASELINE FATHA;Lo;0;AL;;;;;N;;;;; +10ECA;ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH;Lo;0;AL;;;;;N;;;;; +10ECB;ARABIC NORTHEAST POINTING ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +10ECC;ARABIC NORTHEAST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +10ECD;ARABIC SOUTHWEST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +10ECE;ARABIC SMALL CIRCLE ABOVE;Mn;230;NSM;;;;;N;;;;; +10ECF;ARABIC LARGE CIRCLE ABOVE;Mn;230;NSM;;;;;N;;;;; +10EF0;ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO;Mn;220;NSM;;;;;N;;;;; +10EF1;ARABIC SQUARE BELOW;Mn;220;NSM;;;;;N;;;;; +10EF2;ARABIC FILLED SQUARE BELOW;Mn;220;NSM;;;;;N;;;;; +10EF3;ARABIC SMALL HIGH NOON WITH FATHA;Mn;230;NSM;;;;;N;;;;; +10EF4;ARABIC SMALL LOW NOON WITH FATHA;Mn;220;NSM;;;;;N;;;;; +10EF5;ARABIC SMALL HIGH NOON WITH DAMMA;Mn;230;NSM;;;;;N;;;;; +10EF6;ARABIC SMALL LOW NOON WITH DAMMA;Mn;220;NSM;;;;;N;;;;; +10EF7;ARABIC SMALL HIGH HEH INITIAL FORM;Mn;230;NSM;;;;;N;;;;; +10EF8;ARABIC SMALL HIGH WORD KABBIR;Mn;230;NSM;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From b0d80c8d6ae2534d98ccd3fc0779b03f920007d8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 5 Jul 2024 20:49:52 +0200 Subject: [PATCH 02/15] lb=AL for the letter, lb=CM for the marks --- unicodetools/data/ucd/dev/LineBreak.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 9dc61d95d..61d443df3 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:19 GMT +# Date: 2024-07-05, 18:48:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2825,6 +2825,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; AL # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB..10ECF ; CM # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; CM # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF From 04bb0e1da8cfede8bf389270ff6ba426f26b584b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 5 Jul 2024 20:51:24 +0200 Subject: [PATCH 03/15] Arabic --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..b6c5a64a3 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +10EC9..10EF8 ; Arabic # Scripts-16.0.0.txt # Date: 2024-04-30, 21:48:40 GMT # © 2024 Unicode®, Inc. From f00f5e247c3ceeed3e2d0de7e5839a1ad23015ad Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 5 Jul 2024 20:53:21 +0200 Subject: [PATCH 04/15] PropList.txt lines from the proposal --- unicodetools/data/ucd/dev/PropList.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 395aa448d..e857282ef 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,6 @@ +10EF4 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW NOON WITH FATHA +10EF6 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW NOON WITH DAMMA + # PropList-16.0.0.txt # Date: 2024-05-08, 03:40:06 GMT # © 2024 Unicode®, Inc. From 918e5cba39e12104c85371efbeb7f76d770f3d7f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 5 Jul 2024 20:55:49 +0200 Subject: [PATCH 05/15] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 6 ++-- .../data/ucd/dev/DerivedCoreProperties.txt | 36 +++++++++++++------ unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++- .../data/ucd/dev/NormalizationTest.txt | 30 +++++++++++++++- unicodetools/data/ucd/dev/PropList.txt | 9 +++-- unicodetools/data/ucd/dev/Scripts.txt | 9 +++-- unicodetools/data/ucd/dev/UnicodeData.txt | 32 ++++++++--------- .../data/ucd/dev/VerticalOrientation.txt | 5 ++- .../dev/auxiliary/GraphemeBreakProperty.txt | 6 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 9 +++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 9 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 11 +++--- .../dev/extracted/DerivedCombiningClass.txt | 20 ++++++++--- .../dev/extracted/DerivedEastAsianWidth.txt | 7 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 15 +++++--- .../ucd/dev/extracted/DerivedJoiningType.txt | 6 ++-- .../ucd/dev/extracted/DerivedLineBreak.txt | 13 ++++--- .../data/ucd/dev/extracted/DerivedName.txt | 20 +++++++++-- 18 files changed, 176 insertions(+), 72 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..4cd179e8f 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# Date: 2024-07-05, 18:54:50 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2022,6 +2022,8 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 10D69..10D85 ; 16.0 # [29] GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA 10D8E..10D8F ; 16.0 # [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10EC2..10EC4 ; 16.0 # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECF ; 16.0 # [7] ARABIC SMALL BASELINE FATHA..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; 16.0 # [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC ; 16.0 # ARABIC COMBINING ALEF OVERLAY 11380..11389 ; 16.0 # [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL 1138B ; 16.0 # TULU-TIGALARI LETTER EE @@ -2057,6 +2059,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5185 +# Total code points: 5201 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 88df29b3b..a17658f4f 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-02, 15:02:37 GMT +# Date: 2024-07-05, 18:55:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1053,6 +1053,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; Alphabetic # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1441,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142761 # ================================================ @@ -3350,6 +3351,8 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; Case_Ignorable # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; Case_Ignorable # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -3505,7 +3508,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2763 # ================================================ @@ -6729,6 +6732,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; ID_Start # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6962,7 +6966,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141271 # ================================================ @@ -7895,6 +7899,9 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; ID_Continue # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB..10ECF ; ID_Continue # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; ID_Continue # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -8370,7 +8377,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144557 # ================================================ @@ -8915,6 +8922,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; XID_Start # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9148,7 +9156,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141248 # ================================================ @@ -10082,6 +10090,9 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; XID_Continue # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB..10ECF ; XID_Continue # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; XID_Continue # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -10557,7 +10568,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144538 # ================================================ @@ -10869,6 +10880,8 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 10D24..10D27 ; Grapheme_Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Grapheme_Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Grapheme_Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; Grapheme_Extend # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; Grapheme_Extend # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Grapheme_Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Grapheme_Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Grapheme_Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -11024,7 +11037,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2185 +# Total code points: 2199 # ================================================ @@ -12329,6 +12342,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EAD ; Grapheme_Base # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; Grapheme_Base # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; Grapheme_Base # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -12811,7 +12825,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152738 +# Total code points: 152740 # ================================================ @@ -13195,6 +13209,8 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; InCB; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; InCB; Extend # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; InCB; Extend # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; InCB; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -13351,6 +13367,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2184 +# Total code points: 2198 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..a8b325fce 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-07-05, 18:55:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1964,6 +1964,9 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; N # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB..10ECF ; N # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; N # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e..2afe6c5cf 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# Date: 2024-07-05, 18:55:22 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -18646,6 +18646,34 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 10EAB 0315 0300 05AE 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062; # (a◌𐺫◌̕◌̀◌֮b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING HAMZA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAC 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062; # (a◌̕◌̀◌֮◌𐺬b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING MADDA MARK, LATIN SMALL LETTER B 0061 10EAC 0315 0300 05AE 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062;0061 05AE 10EAC 0300 0315 0062; # (a◌𐺬◌̕◌̀◌֮b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; a◌֮◌𐺬◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING MADDA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10ECB 0062;00E0 05AE 10ECB 0315 0062;0061 05AE 0300 10ECB 0315 0062;00E0 05AE 10ECB 0315 0062;0061 05AE 0300 10ECB 0315 0062; # (a◌̕◌̀◌֮◌𐻋b; à◌֮◌𐻋◌̕b; a◌֮◌̀◌𐻋◌̕b; à◌֮◌𐻋◌̕b; a◌֮◌̀◌𐻋◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC NORTHEAST POINTING ARROWHEAD ABOVE, LATIN SMALL LETTER B +0061 10ECB 0315 0300 05AE 0062;0061 05AE 10ECB 0300 0315 0062;0061 05AE 10ECB 0300 0315 0062;0061 05AE 10ECB 0300 0315 0062;0061 05AE 10ECB 0300 0315 0062; # (a◌𐻋◌̕◌̀◌֮b; a◌֮◌𐻋◌̀◌̕b; a◌֮◌𐻋◌̀◌̕b; a◌֮◌𐻋◌̀◌̕b; a◌֮◌𐻋◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC NORTHEAST POINTING ARROWHEAD ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10ECC 0062;0061 1DFA 0316 10ECC 059A 0062;0061 1DFA 0316 10ECC 059A 0062;0061 1DFA 0316 10ECC 059A 0062;0061 1DFA 0316 10ECC 059A 0062; # (a◌֚◌̖◌᷺◌𐻌b; a◌᷺◌̖◌𐻌◌֚b; a◌᷺◌̖◌𐻌◌֚b; a◌᷺◌̖◌𐻌◌֚b; a◌᷺◌̖◌𐻌◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC NORTHEAST POINTING ARROWHEAD BELOW, LATIN SMALL LETTER B +0061 10ECC 059A 0316 1DFA 0062;0061 1DFA 10ECC 0316 059A 0062;0061 1DFA 10ECC 0316 059A 0062;0061 1DFA 10ECC 0316 059A 0062;0061 1DFA 10ECC 0316 059A 0062; # (a◌𐻌◌֚◌̖◌᷺b; a◌᷺◌𐻌◌̖◌֚b; a◌᷺◌𐻌◌̖◌֚b; a◌᷺◌𐻌◌̖◌֚b; a◌᷺◌𐻌◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC NORTHEAST POINTING ARROWHEAD BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10ECD 0062;0061 1DFA 0316 10ECD 059A 0062;0061 1DFA 0316 10ECD 059A 0062;0061 1DFA 0316 10ECD 059A 0062;0061 1DFA 0316 10ECD 059A 0062; # (a◌֚◌̖◌᷺◌𐻍b; a◌᷺◌̖◌𐻍◌֚b; a◌᷺◌̖◌𐻍◌֚b; a◌᷺◌̖◌𐻍◌֚b; a◌᷺◌̖◌𐻍◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SOUTHWEST POINTING ARROWHEAD BELOW, LATIN SMALL LETTER B +0061 10ECD 059A 0316 1DFA 0062;0061 1DFA 10ECD 0316 059A 0062;0061 1DFA 10ECD 0316 059A 0062;0061 1DFA 10ECD 0316 059A 0062;0061 1DFA 10ECD 0316 059A 0062; # (a◌𐻍◌֚◌̖◌᷺b; a◌᷺◌𐻍◌̖◌֚b; a◌᷺◌𐻍◌̖◌֚b; a◌᷺◌𐻍◌̖◌֚b; a◌᷺◌𐻍◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SOUTHWEST POINTING ARROWHEAD BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 10ECE 0062;00E0 05AE 10ECE 0315 0062;0061 05AE 0300 10ECE 0315 0062;00E0 05AE 10ECE 0315 0062;0061 05AE 0300 10ECE 0315 0062; # (a◌̕◌̀◌֮◌𐻎b; à◌֮◌𐻎◌̕b; a◌֮◌̀◌𐻎◌̕b; à◌֮◌𐻎◌̕b; a◌֮◌̀◌𐻎◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC SMALL CIRCLE ABOVE, LATIN SMALL LETTER B +0061 10ECE 0315 0300 05AE 0062;0061 05AE 10ECE 0300 0315 0062;0061 05AE 10ECE 0300 0315 0062;0061 05AE 10ECE 0300 0315 0062;0061 05AE 10ECE 0300 0315 0062; # (a◌𐻎◌̕◌̀◌֮b; a◌֮◌𐻎◌̀◌̕b; a◌֮◌𐻎◌̀◌̕b; a◌֮◌𐻎◌̀◌̕b; a◌֮◌𐻎◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC SMALL CIRCLE ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10ECF 0062;00E0 05AE 10ECF 0315 0062;0061 05AE 0300 10ECF 0315 0062;00E0 05AE 10ECF 0315 0062;0061 05AE 0300 10ECF 0315 0062; # (a◌̕◌̀◌֮◌𐻏b; à◌֮◌𐻏◌̕b; a◌֮◌̀◌𐻏◌̕b; à◌֮◌𐻏◌̕b; a◌֮◌̀◌𐻏◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC LARGE CIRCLE ABOVE, LATIN SMALL LETTER B +0061 10ECF 0315 0300 05AE 0062;0061 05AE 10ECF 0300 0315 0062;0061 05AE 10ECF 0300 0315 0062;0061 05AE 10ECF 0300 0315 0062;0061 05AE 10ECF 0300 0315 0062; # (a◌𐻏◌̕◌̀◌֮b; a◌֮◌𐻏◌̀◌̕b; a◌֮◌𐻏◌̀◌̕b; a◌֮◌𐻏◌̀◌̕b; a◌֮◌𐻏◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC LARGE CIRCLE ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EF0 0062;0061 1DFA 0316 10EF0 059A 0062;0061 1DFA 0316 10EF0 059A 0062;0061 1DFA 0316 10EF0 059A 0062;0061 1DFA 0316 10EF0 059A 0062; # (a◌֚◌̖◌᷺◌𐻰b; a◌᷺◌̖◌𐻰◌֚b; a◌᷺◌̖◌𐻰◌֚b; a◌᷺◌̖◌𐻰◌֚b; a◌᷺◌̖◌𐻰◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO, LATIN SMALL LETTER B +0061 10EF0 059A 0316 1DFA 0062;0061 1DFA 10EF0 0316 059A 0062;0061 1DFA 10EF0 0316 059A 0062;0061 1DFA 10EF0 0316 059A 0062;0061 1DFA 10EF0 0316 059A 0062; # (a◌𐻰◌֚◌̖◌᷺b; a◌᷺◌𐻰◌̖◌֚b; a◌᷺◌𐻰◌̖◌֚b; a◌᷺◌𐻰◌̖◌֚b; a◌᷺◌𐻰◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EF1 0062;0061 1DFA 0316 10EF1 059A 0062;0061 1DFA 0316 10EF1 059A 0062;0061 1DFA 0316 10EF1 059A 0062;0061 1DFA 0316 10EF1 059A 0062; # (a◌֚◌̖◌᷺◌𐻱b; a◌᷺◌̖◌𐻱◌֚b; a◌᷺◌̖◌𐻱◌֚b; a◌᷺◌̖◌𐻱◌֚b; a◌᷺◌̖◌𐻱◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SQUARE BELOW, LATIN SMALL LETTER B +0061 10EF1 059A 0316 1DFA 0062;0061 1DFA 10EF1 0316 059A 0062;0061 1DFA 10EF1 0316 059A 0062;0061 1DFA 10EF1 0316 059A 0062;0061 1DFA 10EF1 0316 059A 0062; # (a◌𐻱◌֚◌̖◌᷺b; a◌᷺◌𐻱◌̖◌֚b; a◌᷺◌𐻱◌̖◌֚b; a◌᷺◌𐻱◌̖◌֚b; a◌᷺◌𐻱◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SQUARE BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EF2 0062;0061 1DFA 0316 10EF2 059A 0062;0061 1DFA 0316 10EF2 059A 0062;0061 1DFA 0316 10EF2 059A 0062;0061 1DFA 0316 10EF2 059A 0062; # (a◌֚◌̖◌᷺◌𐻲b; a◌᷺◌̖◌𐻲◌֚b; a◌᷺◌̖◌𐻲◌֚b; a◌᷺◌̖◌𐻲◌֚b; a◌᷺◌̖◌𐻲◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC FILLED SQUARE BELOW, LATIN SMALL LETTER B +0061 10EF2 059A 0316 1DFA 0062;0061 1DFA 10EF2 0316 059A 0062;0061 1DFA 10EF2 0316 059A 0062;0061 1DFA 10EF2 0316 059A 0062;0061 1DFA 10EF2 0316 059A 0062; # (a◌𐻲◌֚◌̖◌᷺b; a◌᷺◌𐻲◌̖◌֚b; a◌᷺◌𐻲◌̖◌֚b; a◌᷺◌𐻲◌̖◌֚b; a◌᷺◌𐻲◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC FILLED SQUARE BELOW, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 10EF3 0062;00E0 05AE 10EF3 0315 0062;0061 05AE 0300 10EF3 0315 0062;00E0 05AE 10EF3 0315 0062;0061 05AE 0300 10EF3 0315 0062; # (a◌̕◌̀◌֮◌𐻳b; à◌֮◌𐻳◌̕b; a◌֮◌̀◌𐻳◌̕b; à◌֮◌𐻳◌̕b; a◌֮◌̀◌𐻳◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC SMALL HIGH NOON WITH FATHA, LATIN SMALL LETTER B +0061 10EF3 0315 0300 05AE 0062;0061 05AE 10EF3 0300 0315 0062;0061 05AE 10EF3 0300 0315 0062;0061 05AE 10EF3 0300 0315 0062;0061 05AE 10EF3 0300 0315 0062; # (a◌𐻳◌̕◌̀◌֮b; a◌֮◌𐻳◌̀◌̕b; a◌֮◌𐻳◌̀◌̕b; a◌֮◌𐻳◌̀◌̕b; a◌֮◌𐻳◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC SMALL HIGH NOON WITH FATHA, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EF4 0062;0061 1DFA 0316 10EF4 059A 0062;0061 1DFA 0316 10EF4 059A 0062;0061 1DFA 0316 10EF4 059A 0062;0061 1DFA 0316 10EF4 059A 0062; # (a◌֚◌̖◌᷺◌𐻴b; a◌᷺◌̖◌𐻴◌֚b; a◌᷺◌̖◌𐻴◌֚b; a◌᷺◌̖◌𐻴◌֚b; a◌᷺◌̖◌𐻴◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW NOON WITH FATHA, LATIN SMALL LETTER B +0061 10EF4 059A 0316 1DFA 0062;0061 1DFA 10EF4 0316 059A 0062;0061 1DFA 10EF4 0316 059A 0062;0061 1DFA 10EF4 0316 059A 0062;0061 1DFA 10EF4 0316 059A 0062; # (a◌𐻴◌֚◌̖◌᷺b; a◌᷺◌𐻴◌̖◌֚b; a◌᷺◌𐻴◌̖◌֚b; a◌᷺◌𐻴◌̖◌֚b; a◌᷺◌𐻴◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW NOON WITH FATHA, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 10EF5 0062;00E0 05AE 10EF5 0315 0062;0061 05AE 0300 10EF5 0315 0062;00E0 05AE 10EF5 0315 0062;0061 05AE 0300 10EF5 0315 0062; # (a◌̕◌̀◌֮◌𐻵b; à◌֮◌𐻵◌̕b; a◌֮◌̀◌𐻵◌̕b; à◌֮◌𐻵◌̕b; a◌֮◌̀◌𐻵◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC SMALL HIGH NOON WITH DAMMA, LATIN SMALL LETTER B +0061 10EF5 0315 0300 05AE 0062;0061 05AE 10EF5 0300 0315 0062;0061 05AE 10EF5 0300 0315 0062;0061 05AE 10EF5 0300 0315 0062;0061 05AE 10EF5 0300 0315 0062; # (a◌𐻵◌̕◌̀◌֮b; a◌֮◌𐻵◌̀◌̕b; a◌֮◌𐻵◌̀◌̕b; a◌֮◌𐻵◌̀◌̕b; a◌֮◌𐻵◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC SMALL HIGH NOON WITH DAMMA, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 059A 0316 1DFA 10EF6 0062;0061 1DFA 0316 10EF6 059A 0062;0061 1DFA 0316 10EF6 059A 0062;0061 1DFA 0316 10EF6 059A 0062;0061 1DFA 0316 10EF6 059A 0062; # (a◌֚◌̖◌᷺◌𐻶b; a◌᷺◌̖◌𐻶◌֚b; a◌᷺◌̖◌𐻶◌֚b; a◌᷺◌̖◌𐻶◌֚b; a◌᷺◌̖◌𐻶◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW NOON WITH DAMMA, LATIN SMALL LETTER B +0061 10EF6 059A 0316 1DFA 0062;0061 1DFA 10EF6 0316 059A 0062;0061 1DFA 10EF6 0316 059A 0062;0061 1DFA 10EF6 0316 059A 0062;0061 1DFA 10EF6 0316 059A 0062; # (a◌𐻶◌֚◌̖◌᷺b; a◌᷺◌𐻶◌̖◌֚b; a◌᷺◌𐻶◌̖◌֚b; a◌᷺◌𐻶◌̖◌֚b; a◌᷺◌𐻶◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW NOON WITH DAMMA, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 10EF7 0062;00E0 05AE 10EF7 0315 0062;0061 05AE 0300 10EF7 0315 0062;00E0 05AE 10EF7 0315 0062;0061 05AE 0300 10EF7 0315 0062; # (a◌̕◌̀◌֮◌𐻷b; à◌֮◌𐻷◌̕b; a◌֮◌̀◌𐻷◌̕b; à◌֮◌𐻷◌̕b; a◌֮◌̀◌𐻷◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC SMALL HIGH HEH INITIAL FORM, LATIN SMALL LETTER B +0061 10EF7 0315 0300 05AE 0062;0061 05AE 10EF7 0300 0315 0062;0061 05AE 10EF7 0300 0315 0062;0061 05AE 10EF7 0300 0315 0062;0061 05AE 10EF7 0300 0315 0062; # (a◌𐻷◌̕◌̀◌֮b; a◌֮◌𐻷◌̀◌̕b; a◌֮◌𐻷◌̀◌̕b; a◌֮◌𐻷◌̀◌̕b; a◌֮◌𐻷◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC SMALL HIGH HEH INITIAL FORM, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10EF8 0062;00E0 05AE 10EF8 0315 0062;0061 05AE 0300 10EF8 0315 0062;00E0 05AE 10EF8 0315 0062;0061 05AE 0300 10EF8 0315 0062; # (a◌̕◌̀◌֮◌𐻸b; à◌֮◌𐻸◌̕b; a◌֮◌̀◌𐻸◌̕b; à◌֮◌𐻸◌̕b; a◌֮◌̀◌𐻸◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC SMALL HIGH WORD KABBIR, LATIN SMALL LETTER B +0061 10EF8 0315 0300 05AE 0062;0061 05AE 10EF8 0300 0315 0062;0061 05AE 10EF8 0300 0315 0062;0061 05AE 10EF8 0300 0315 0062;0061 05AE 10EF8 0300 0315 0062; # (a◌𐻸◌̕◌̀◌֮b; a◌֮◌𐻸◌̀◌̕b; a◌֮◌𐻸◌̀◌̕b; a◌֮◌𐻸◌̀◌̕b; a◌֮◌𐻸◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC SMALL HIGH WORD KABBIR, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFD 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062;0061 1DFA 0316 10EFD 059A 0062; # (a◌֚◌̖◌᷺◌𐻽b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; a◌᷺◌̖◌𐻽◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD SAKTA, LATIN SMALL LETTER B 0061 10EFD 059A 0316 1DFA 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062;0061 1DFA 10EFD 0316 059A 0062; # (a◌𐻽◌֚◌̖◌᷺b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; a◌᷺◌𐻽◌̖◌֚b; ) LATIN SMALL LETTER A, ARABIC SMALL LOW WORD SAKTA, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 10EFE 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062;0061 1DFA 0316 10EFE 059A 0062; # (a◌֚◌̖◌᷺◌𐻾b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; a◌᷺◌̖◌𐻾◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD QASR, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index e857282ef..fb84e3a50 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,8 +1,5 @@ -10EF4 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW NOON WITH FATHA -10EF6 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW NOON WITH DAMMA - # PropList-16.0.0.txt -# Date: 2024-05-08, 03:40:06 GMT +# Date: 2024-07-05, 18:55:26 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1920,7 +1917,9 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT 08CD..08CF ; Modifier_Combining_Mark # Mn [3] ARABIC SMALL HIGH ZAH..ARABIC LARGE ROUND DOT BELOW 08D3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW WAW 08F3 ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH WAW +10EF4 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW NOON WITH FATHA +10EF6 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW NOON WITH DAMMA -# Total code points: 14 +# Total code points: 16 # EOF diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index b6c5a64a3..dd2608059 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -10EC9..10EF8 ; Arabic # Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Date: 2024-07-05, 18:55:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -891,6 +890,10 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; Arabic # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB..10ECF ; Arabic # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10ED0..10EEF ; Arabic # Cn [32] .. +10EF0..10EF8 ; Arabic # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -927,7 +930,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1373 +# Total code points: 1421 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 60b985026..dd27794da 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,19 +1,3 @@ -10EC9;ARABIC SMALL BASELINE FATHA;Lo;0;AL;;;;;N;;;;; -10ECA;ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH;Lo;0;AL;;;;;N;;;;; -10ECB;ARABIC NORTHEAST POINTING ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; -10ECC;ARABIC NORTHEAST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; -10ECD;ARABIC SOUTHWEST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; -10ECE;ARABIC SMALL CIRCLE ABOVE;Mn;230;NSM;;;;;N;;;;; -10ECF;ARABIC LARGE CIRCLE ABOVE;Mn;230;NSM;;;;;N;;;;; -10EF0;ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO;Mn;220;NSM;;;;;N;;;;; -10EF1;ARABIC SQUARE BELOW;Mn;220;NSM;;;;;N;;;;; -10EF2;ARABIC FILLED SQUARE BELOW;Mn;220;NSM;;;;;N;;;;; -10EF3;ARABIC SMALL HIGH NOON WITH FATHA;Mn;230;NSM;;;;;N;;;;; -10EF4;ARABIC SMALL LOW NOON WITH FATHA;Mn;220;NSM;;;;;N;;;;; -10EF5;ARABIC SMALL HIGH NOON WITH DAMMA;Mn;230;NSM;;;;;N;;;;; -10EF6;ARABIC SMALL LOW NOON WITH DAMMA;Mn;220;NSM;;;;;N;;;;; -10EF7;ARABIC SMALL HIGH HEH INITIAL FORM;Mn;230;NSM;;;;;N;;;;; -10EF8;ARABIC SMALL HIGH WORD KABBIR;Mn;230;NSM;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -19557,6 +19541,22 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10EC9;ARABIC SMALL BASELINE FATHA;Lo;0;AL;;;;;N;;;;; +10ECA;ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH;Lo;0;AL;;;;;N;;;;; +10ECB;ARABIC NORTHEAST POINTING ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +10ECC;ARABIC NORTHEAST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +10ECD;ARABIC SOUTHWEST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +10ECE;ARABIC SMALL CIRCLE ABOVE;Mn;230;NSM;;;;;N;;;;; +10ECF;ARABIC LARGE CIRCLE ABOVE;Mn;230;NSM;;;;;N;;;;; +10EF0;ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO;Mn;220;NSM;;;;;N;;;;; +10EF1;ARABIC SQUARE BELOW;Mn;220;NSM;;;;;N;;;;; +10EF2;ARABIC FILLED SQUARE BELOW;Mn;220;NSM;;;;;N;;;;; +10EF3;ARABIC SMALL HIGH NOON WITH FATHA;Mn;230;NSM;;;;;N;;;;; +10EF4;ARABIC SMALL LOW NOON WITH FATHA;Mn;220;NSM;;;;;N;;;;; +10EF5;ARABIC SMALL HIGH NOON WITH DAMMA;Mn;230;NSM;;;;;N;;;;; +10EF6;ARABIC SMALL LOW NOON WITH DAMMA;Mn;220;NSM;;;;;N;;;;; +10EF7;ARABIC SMALL HIGH HEH INITIAL FORM;Mn;230;NSM;;;;;N;;;;; +10EF8;ARABIC SMALL HIGH WORD KABBIR;Mn;230;NSM;;;;;N;;;;; 10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; 10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..5de144640 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# Date: 2024-07-05, 18:55:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1784,6 +1784,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; R # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB..10ECF ; R # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; R # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 1d6329408..0cc8aa789 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-07-05, 18:55:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -334,6 +334,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; Extend # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; Extend # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -490,7 +492,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2190 +# Total code points: 2204 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 39fdb57c0..8f4f0617b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-05-13, 20:53:44 GMT +# Date: 2024-07-05, 18:55:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -373,6 +373,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; Extend # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; Extend # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -586,7 +588,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2601 +# Total code points: 2615 # ================================================ @@ -2389,6 +2391,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; OLetter # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2585,7 +2588,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136910 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 925ea3c48..00e2d91e1 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-04-30, 21:48:43 GMT +# Date: 2024-07-05, 18:55:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -409,6 +409,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; Extend # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; Extend # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -623,7 +625,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2605 +# Total code points: 2619 # ================================================ @@ -1142,6 +1144,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; ALetter # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1355,7 +1358,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33793 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..b390f03f1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# Date: 2024-07-05, 18:55:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2274,6 +2274,8 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; NSM # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; NSM # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -2408,7 +2410,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2028 +# Total code points: 2042 # ================================================ @@ -2456,6 +2458,7 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; AL # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2501,8 +2504,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 293 code points not listed here. -# Total code points: 1767 +# The above property value applies to 277 code points not listed here. +# Total code points: 1753 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..88f091fff 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# Date: 2024-07-05, 18:55:10 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1475,6 +1475,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; 0 # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -2060,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. -# Total code points: 1113178 +# The above property value applies to 821565 code points not listed here. +# Total code points: 1113164 # ================================================ @@ -2615,6 +2616,10 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 10A0D ; 220 # Mn KHAROSHTHI SIGN DOUBLE RING BELOW 10A3A ; 220 # Mn KHAROSHTHI SIGN DOT BELOW 10AE6 ; 220 # Mn MANICHAEAN ABBREVIATION MARK BELOW +10ECC..10ECD ; 220 # Mn [2] ARABIC NORTHEAST POINTING ARROWHEAD BELOW..ARABIC SOUTHWEST POINTING ARROWHEAD BELOW +10EF0..10EF2 ; 220 # Mn [3] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC FILLED SQUARE BELOW +10EF4 ; 220 # Mn ARABIC SMALL LOW NOON WITH FATHA +10EF6 ; 220 # Mn ARABIC SMALL LOW NOON WITH DAMMA 10EFD..10EFF ; 220 # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F47 ; 220 # Mn [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW 10F4B ; 220 # Mn SOGDIAN COMBINING CURVE BELOW @@ -2627,7 +2632,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 1E5EF ; 220 # Mn OL ONAL SIGN IKIR 1E8D0..1E8D6 ; 220 # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 182 +# Total code points: 189 # ================================================ @@ -2778,6 +2783,11 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 10D24..10D27 ; 230 # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; 230 # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; 230 # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB ; 230 # Mn ARABIC NORTHEAST POINTING ARROWHEAD ABOVE +10ECE..10ECF ; 230 # Mn [2] ARABIC SMALL CIRCLE ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF3 ; 230 # Mn ARABIC SMALL HIGH NOON WITH FATHA +10EF5 ; 230 # Mn ARABIC SMALL HIGH NOON WITH DAMMA +10EF7..10EF8 ; 230 # Mn [2] ARABIC SMALL HIGH HEH INITIAL FORM..ARABIC SMALL HIGH WORD KABBIR 10F48..10F4A ; 230 # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE 10F4C ; 230 # Mn SOGDIAN COMBINING HOOK ABOVE 10F82 ; 230 # Mn OLD UYGHUR COMBINING DOT ABOVE @@ -2803,7 +2813,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E5EE ; 230 # Mn OL ONAL SIGN MU 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 517 +# Total code points: 524 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..15aeea75e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-07-05, 18:55:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1486,6 +1486,9 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; N # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB..10ECF ; N # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; N # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF @@ -2103,7 +2106,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761083 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..76b18bd4f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-07-05, 18:55:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -439,7 +439,9 @@ FFFE..FFFF ; Cn # [2] .. 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. 10EB2..10EC1 ; Cn # [16] .. -10EC5..10EFB ; Cn # [55] .. +10EC5..10EC8 ; Cn # [4] .. +10ED0..10EEF ; Cn # [32] .. +10EF9..10EFB ; Cn # [3] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -747,7 +749,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819517 # ================================================ @@ -2525,6 +2527,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; Lo # [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2708,7 +2711,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136477 +# Total code points: 136479 # ================================================ @@ -2938,6 +2941,8 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; Mn # [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; Mn # [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -3072,7 +3077,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2020 +# Total code points: 2034 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index 3841a92cc..cef92d591 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ # DerivedJoiningType-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# Date: 2024-07-05, 18:55:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -441,6 +441,8 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; T # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; T # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; T # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; T # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -581,6 +583,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2185 +# Total code points: 2199 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 866536783..415e97496 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:14 GMT +# Date: 2024-07-05, 18:55:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757637 code points not listed here. +# Total code points: 895105 # ================================================ @@ -1317,6 +1317,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9..10ECA ; AL # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1615,7 +1616,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26690 +# Total code points: 26692 # ================================================ @@ -2184,6 +2185,8 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10ECB..10ECF ; CM # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10EF0..10EF8 ; CM # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -2389,7 +2392,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2470 +# Total code points: 2484 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..8313cdcf0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# Date: 2024-07-05, 18:55:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -30206,6 +30206,22 @@ FFFD ; REPLACEMENT CHARACTER 10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW 10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW 10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EC9 ; ARABIC SMALL BASELINE FATHA +10ECA ; ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10ECB ; ARABIC NORTHEAST POINTING ARROWHEAD ABOVE +10ECC ; ARABIC NORTHEAST POINTING ARROWHEAD BELOW +10ECD ; ARABIC SOUTHWEST POINTING ARROWHEAD BELOW +10ECE ; ARABIC SMALL CIRCLE ABOVE +10ECF ; ARABIC LARGE CIRCLE ABOVE +10EF0 ; ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO +10EF1 ; ARABIC SQUARE BELOW +10EF2 ; ARABIC FILLED SQUARE BELOW +10EF3 ; ARABIC SMALL HIGH NOON WITH FATHA +10EF4 ; ARABIC SMALL LOW NOON WITH FATHA +10EF5 ; ARABIC SMALL HIGH NOON WITH DAMMA +10EF6 ; ARABIC SMALL LOW NOON WITH DAMMA +10EF7 ; ARABIC SMALL HIGH HEH INITIAL FORM +10EF8 ; ARABIC SMALL HIGH WORD KABBIR 10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA 10EFE ; ARABIC SMALL LOW WORD QASR @@ -45367,6 +45383,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 155014 # EOF From 9c5d014c20bbf48dddce3e9a423b2a85c4dbbdaf Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 13:53:40 +0200 Subject: [PATCH 06/15] mind the gap --- unicodetools/data/ucd/dev/Scripts.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index dd2608059..681c4e575 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -892,7 +892,6 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC9..10ECA ; Arabic # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; Arabic # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE -10ED0..10EEF ; Arabic # Cn [32] .. 10EF0..10EF8 ; Arabic # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL From 3dbb66332d1b152f19c010680192dbe2da2c0a08 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 13:59:08 +0200 Subject: [PATCH 07/15] Regenerate UCD --- unicodetools/data/ucd/dev/Scripts.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 681c4e575..22d6105e4 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2024-07-05, 18:55:42 GMT +# Date: 2024-07-25, 11:56:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -929,7 +929,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1421 +# Total code points: 1389 # ================================================ From 98c248fc21321dc592e9d1ed605f95600d93e031 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 29 Aug 2024 15:32:29 +0200 Subject: [PATCH 08/15] Failing test --- .../unicode/text/UCD/AdditionComparisons.txt | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt index 9db1fdf1d..ff17ca78f 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt @@ -37,6 +37,50 @@ CorrespondTo [\N{OLD HUNGARIAN SMALL LETTER A}] # Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status: +Ignoring Block: + +# We test Joining_Type, but not Other_Joining_Type, which distinguishes characters explicitly listed +# in ArabicShaping.txt from those where Joining_Type is derived. +Ignoring Other_Joining_Type: +Propertywise [ + \N{ARABIC SMALL BASELINE FATHA} + \N{ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH} + ࢇ \N{ARABIC BASELINE ROUND DOT} +] AreAlike +end Ignoring; + +Propertywise [ + \N{ARABIC NORTHEAST POINTING ARROWHEAD ABOVE} + \N{ARABIC SMALL CIRCLE ABOVE} + \N{ARABIC LARGE CIRCLE ABOVE} + ۠ \N{ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO} +] AreAlike + +Propertywise [ + \N{ARABIC SMALL HIGH NOON WITH FATHA} + \N{ARABIC SMALL HIGH NOON WITH DAMMA} + \N{ARABIC SMALL HIGH HEH INITIAL FORM} + \N{ARABIC SMALL HIGH WORD KABBIR} + ࣕ \N{ARABIC SMALL HIGH SAD} # Not like SMALL HIGH NOON, which is MCM. + ࣞ \N{ARABIC SMALL HIGH WORD QIF} +] AreAlike + +Propertywise [ + \N{ARABIC NORTHEAST POINTING ARROWHEAD BELOW} + \N{ARABIC SOUTHWEST POINTING ARROWHEAD BELOW} + \N{ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO} + \N{ARABIC SQUARE BELOW} + \N{ARABIC FILLED SQUARE BELOW} + ࣑ \N{ARABIC LARGE CIRCLE BELOW} +] AreAlike +end Ignoring; + +Propertywise [ + \N{ARABIC SMALL LOW NOON WITH FATHA} + \N{ARABIC SMALL LOW NOON WITH DAMMA} + ۣ \N{ARABIC SMALL LOW SEEN} # Not like SMALL LOW NOON WITH KASRA, which is not MCM. +] AreAlike + end Ignoring; end Ignoring; \ No newline at end of file From de7a27c93fadde86a53f33a6a651dfad8130714d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 29 Aug 2024 15:42:55 +0200 Subject: [PATCH 09/15] Sprinkle some Alphabetic and Diacritic --- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 5 +++-- unicodetools/data/ucd/dev/PropList.txt | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 92078539e..cc48db767 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-07-25, 12:34:24 GMT +# Date: 2024-08-29, 13:41:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1054,6 +1054,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10EC9..10ECA ; Alphabetic # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EF3..10EF8 ; Alphabetic # Mn [6] ARABIC SMALL HIGH NOON WITH FATHA..ARABIC SMALL HIGH WORD KABBIR 10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1442,7 +1443,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142761 +# Total code points: 142767 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 00cbc6e2d..bddf5526d 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2024-07-25, 12:34:40 GMT +# Date: 2024-08-29, 13:41:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -702,6 +702,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EF3..10EF8 ; Other_Alphabetic # Mn [6] ARABIC SMALL HIGH NOON WITH FATHA..ARABIC SMALL HIGH WORD KABBIR 10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA @@ -858,7 +859,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1495 +# Total code points: 1501 # ================================================ @@ -1077,6 +1078,9 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK 10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10ECB..10ECF ; Diacritic # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE +10ED0..10EEF ; Diacritic # Cn [32] .. +10EF0..10EF2 ; Diacritic # Mn [3] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC FILLED SQUARE BELOW 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -1150,7 +1154,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1178 +# Total code points: 1218 # ================================================ From 6cdaf790cbfbcb5337b3f4492b7613fd2dabcc8c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 29 Aug 2024 15:47:18 +0200 Subject: [PATCH 10/15] block --- .../resources/org/unicode/text/UCD/AdditionComparisons.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt index ff17ca78f..5045a2a62 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt @@ -39,8 +39,8 @@ Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status: Ignoring Block: -# We test Joining_Type, but not Other_Joining_Type, which distinguishes characters explicitly listed -# in ArabicShaping.txt from those where Joining_Type is derived. +# We test Joining_Type, but not the pseudoproperty Other_Joining_Type, which distinguishes +# characters explicitly listed in ArabicShaping.txt from those where Joining_Type is derived. Ignoring Other_Joining_Type: Propertywise [ \N{ARABIC SMALL BASELINE FATHA} @@ -73,7 +73,6 @@ Propertywise [ \N{ARABIC FILLED SQUARE BELOW} ࣑ \N{ARABIC LARGE CIRCLE BELOW} ] AreAlike -end Ignoring; Propertywise [ \N{ARABIC SMALL LOW NOON WITH FATHA} @@ -83,4 +82,6 @@ Propertywise [ end Ignoring; +end Ignoring; + end Ignoring; \ No newline at end of file From dce0abb8289bb177b073ddec932a6fae816e1b59 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 2 Oct 2024 14:36:08 +0200 Subject: [PATCH 11/15] Mind the gap --- unicodetools/data/ucd/dev/PropList.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index bddf5526d..01e01e532 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1079,7 +1079,6 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK 10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10ECB..10ECF ; Diacritic # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE -10ED0..10EEF ; Diacritic # Cn [32] .. 10EF0..10EF2 ; Diacritic # Mn [3] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC FILLED SQUARE BELOW 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW From c1b215df6d2704d3592648243a581bbc49e9bfb6 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 2 Oct 2024 14:52:57 +0200 Subject: [PATCH 12/15] The first two should be like U+0888 --- .../org/unicode/text/UCD/AdditionComparisons/138.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/138.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/138.txt index 0fd8d9fcf..8e4c7b98b 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/138.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/138.txt @@ -14,9 +14,9 @@ Ignoring Block: # characters explicitly listed in ArabicShaping.txt from those where Joining_Type is derived. Ignoring Other_Joining_Type: Propertywise [ - \N{ARABIC SMALL BASELINE FATHA} - \N{ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH} - ࢇ \N{ARABIC BASELINE ROUND DOT} + \N{ARABIC SMALL BASELINE FATHA} + \N{ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH} + \x{0888} ࢈ \N{ARABIC RAISED ROUND DOT} ] AreAlike end Ignoring; From f5cc0579b134223bbf43d2f44a17d5a3e74c4c2b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 2 Oct 2024 14:55:16 +0200 Subject: [PATCH 13/15] Regenerate UCD --- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 01e01e532..ffac1aa65 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ -# PropList-16.0.0.txt -# Date: 2024-08-29, 13:41:32 GMT +# PropList-17.0.0.txt +# Date: 2024-10-02, 12:54:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1153,7 +1153,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1218 +# Total code points: 1186 # ================================================ From df535b80c5b890cf085c07d5e66a720bebf8344b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 2 Oct 2024 15:04:54 +0200 Subject: [PATCH 14/15] gc=Sk --- unicodetools/data/ucd/dev/UnicodeData.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index dd27794da..dd874fec6 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -19541,8 +19541,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; 10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; -10EC9;ARABIC SMALL BASELINE FATHA;Lo;0;AL;;;;;N;;;;; -10ECA;ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH;Lo;0;AL;;;;;N;;;;; +10EC9;ARABIC SMALL BASELINE FATHA;Sk;0;AL;;;;;N;;;;; +10ECA;ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH;Sk;0;AL;;;;;N;;;;; 10ECB;ARABIC NORTHEAST POINTING ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; 10ECC;ARABIC NORTHEAST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; 10ECD;ARABIC SOUTHWEST POINTING ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; From 1e8f3ce6c4b10aee5c137c010d741a4acdf7afd6 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 2 Oct 2024 15:09:53 +0200 Subject: [PATCH 15/15] Regenerate UCD --- .../data/ucd/dev/DerivedCoreProperties.txt | 22 ++++++++----------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 6 ++--- unicodetools/data/ucd/dev/LineBreak.txt | 4 ++-- unicodetools/data/ucd/dev/Scripts.txt | 6 ++--- .../data/ucd/dev/VerticalOrientation.txt | 6 ++--- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++--- .../ucd/dev/auxiliary/WordBreakProperty.txt | 5 ++--- .../ucd/dev/extracted/DerivedBidiClass.txt | 4 ++-- .../dev/extracted/DerivedCombiningClass.txt | 4 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 4 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 8 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 4 ++-- 12 files changed, 36 insertions(+), 42 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 9e7d7669a..d65a75f84 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-02, 12:22:50 GMT +# Date: 2024-10-02, 13:07:02 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1053,7 +1053,6 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; Alphabetic # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10EF3..10EF8 ; Alphabetic # Mn [6] ARABIC SMALL HIGH NOON WITH FATHA..ARABIC SMALL HIGH WORD KABBIR 10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL @@ -1443,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142767 +# Total code points: 142765 # ================================================ @@ -3352,6 +3351,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EC9..10ECA ; Case_Ignorable # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; Case_Ignorable # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; Case_Ignorable # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA @@ -3509,7 +3509,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2763 +# Total code points: 2765 # ================================================ @@ -6733,7 +6733,6 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; ID_Start # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6967,7 +6966,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141271 +# Total code points: 141269 # ================================================ @@ -7900,7 +7899,6 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; ID_Continue # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; ID_Continue # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; ID_Continue # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA @@ -8378,7 +8376,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144557 +# Total code points: 144555 # ================================================ @@ -8923,7 +8921,6 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; XID_Start # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -9157,7 +9154,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141248 +# Total code points: 141246 # ================================================ @@ -10091,7 +10088,6 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; XID_Continue # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; XID_Continue # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; XID_Continue # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA @@ -10569,7 +10565,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144538 +# Total code points: 144536 # ================================================ @@ -12344,7 +12340,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 10EAD ; Grapheme_Base # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; Grapheme_Base # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; Grapheme_Base # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; Grapheme_Base # Lo OLD SOGDIAN LIGATURE AYIN-DALETH diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a8b325fce..189d525a9 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-07-05, 18:55:16 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-10-02, 13:07:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1964,7 +1964,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; N # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; N # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; N # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; N # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 77a344a22..f94331f7a 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-02, 12:22:59 GMT +# Date: 2024-10-02, 13:07:08 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2820,7 +2820,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; AL # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; AL # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; CM # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; CM # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 22d6105e4..baebabeb6 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ -# Scripts-16.0.0.txt -# Date: 2024-07-25, 11:56:11 GMT +# Scripts-17.0.0.txt +# Date: 2024-10-02, 13:07:28 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -890,7 +890,7 @@ FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; Arabic # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; Arabic # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; Arabic # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; Arabic # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 5de144640..ee5d80075 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-07-05, 18:55:45 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-10-02, 13:07:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1784,7 +1784,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; R # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; R # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; R # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; R # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 99f2df732..8b27270ec 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-02, 12:23:27 GMT +# Date: 2024-10-02, 13:07:29 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2391,7 +2391,6 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; OLetter # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2588,7 +2587,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136910 +# Total code points: 136908 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 76d3dbd6b..98a61c8b5 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2024-10-02, 12:23:30 GMT +# Date: 2024-10-02, 13:07:31 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1144,7 +1144,6 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; ALetter # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1358,7 +1357,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33793 +# Total code points: 33791 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 331e22c6a..61c685cf2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2024-10-02, 12:22:47 GMT +# Date: 2024-10-02, 13:07:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2458,7 +2458,7 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; AL # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; AL # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3bcd92fce..35e6fdb69 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2024-10-02, 12:22:49 GMT +# Date: 2024-10-02, 13:07:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1475,7 +1475,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; 0 # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; 0 # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 77a57f391..b09d6a8d3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2024-10-02, 12:22:52 GMT +# Date: 2024-10-02, 13:07:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1486,7 +1486,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; N # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; N # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10ECB..10ECF ; N # Mn [5] ARABIC NORTHEAST POINTING ARROWHEAD ABOVE..ARABIC LARGE CIRCLE ABOVE 10EF0..10EF8 ; N # Mn [9] ARABIC SMALL LOW UPRIGHT RECTANGULAR ZERO..ARABIC SMALL HIGH WORD KABBIR 10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 08d79ac9a..3cfe69f87 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2024-10-02, 12:22:53 GMT +# Date: 2024-10-02, 13:07:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2527,7 +2527,6 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; Lo # [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2711,7 +2710,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136479 +# Total code points: 136477 # ================================================ @@ -4092,9 +4091,10 @@ FBB2..FBC2 ; Sk # [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE FF3E ; Sk # FULLWIDTH CIRCUMFLEX ACCENT FF40 ; Sk # FULLWIDTH GRAVE ACCENT FFE3 ; Sk # FULLWIDTH MACRON +10EC9..10ECA ; Sk # [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 1F3FB..1F3FF ; Sk # [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 -# Total code points: 125 +# Total code points: 127 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 1bb7bb1e5..c62ff5fe2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2024-10-02, 12:22:54 GMT +# Date: 2024-10-02, 13:07:05 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1315,7 +1315,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE 10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -10EC9..10ECA ; AL # Lo [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH +10EC9..10ECA ; AL # Sk [2] ARABIC SMALL BASELINE FATHA..ARABIC SMALL BASELINE DOTLESS HEAD OF KHAH 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH