Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into pipeline-gap-174-C20
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Nov 13, 2024
2 parents a19b6aa + a3e3740 commit 1393ed7
Show file tree
Hide file tree
Showing 32 changed files with 526 additions and 370 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/cache_retain.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ jobs:
retain-maven-cache:
name: Run all tests with Maven
runs-on: ubuntu-latest
# Only run this on the upstream repo. Otherwise, running in a personal fork will cause
# Github to disable the personal fork copy of the workflow
# (Github complains about running a scheduled workflow on a repo with > 60 days of inactivity)
if: github.ref == 'refs/heads/main' && github.repository == 'unicode-org/unicodetools'
steps:
- name: Checkout and setup
uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions unicodetools/data/ucd/dev/ArabicShaping.txt
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@
088C; TAH WITH 3 DOTS BELOW; D; TAH
088D; KEHEH WITH VERTICAL 2 DOTS BELOW; D; GAF
088E; VERTICAL TAIL; R; VERTICAL TAIL
088F; DOTLESS NOON WITH SEPARATE RING ABOVE; D; NOON
0890; ARABIC POUND MARK ABOVE; U; No_Joining_Group
0891; ARABIC PIASTRE MARK ABOVE; U; No_Joining_Group

Expand Down
5 changes: 3 additions & 2 deletions unicodetools/data/ucd/dev/DerivedAge.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedAge-17.0.0.txt
# Date: 2024-10-16, 14:19:34 GMT
# Date: 2024-11-13, 16:03:37 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -2065,8 +2065,9 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L

# Newly assigned in Unicode 17.0.0 (September, 2025)

088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE
09FF ; 17.0 # BENGALI LETTER SANSKRIT BA

# Total code points: 1
# Total code points: 2

# EOF
26 changes: 13 additions & 13 deletions unicodetools/data/ucd/dev/DerivedCoreProperties.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedCoreProperties-17.0.0.txt
# Date: 2024-10-16, 14:20:01 GMT
# Date: 2024-11-13, 16:03:55 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -344,7 +344,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; Alphabetic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
0897 ; Alphabetic # Mn ARABIC PEPET
08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH
Expand Down Expand Up @@ -1442,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 142760
# Total code points: 142761

# ================================================

Expand Down Expand Up @@ -6260,7 +6260,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER
0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0860..086A ; ID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; ID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; ID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; ID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
08A0..08C8 ; ID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; ID_Start # Lm ARABIC SMALL FARSI YEH
0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
Expand Down Expand Up @@ -6964,7 +6964,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 141270
# Total code points: 141271

# ================================================

Expand Down Expand Up @@ -7070,7 +7070,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; ID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH
Expand Down Expand Up @@ -8373,7 +8373,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

# Total code points: 144542
# Total code points: 144543

# ================================================

Expand Down Expand Up @@ -8444,7 +8444,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR
0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0860..086A ; XID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; XID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; XID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; XID_Start # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
08A0..08C8 ; XID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; XID_Start # Lm ARABIC SMALL FARSI YEH
0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
Expand Down Expand Up @@ -9152,7 +9152,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 141247
# Total code points: 141248

# ================================================

Expand Down Expand Up @@ -9254,7 +9254,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; XID_Continue # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH
Expand Down Expand Up @@ -10562,7 +10562,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

# Total code points: 144523
# Total code points: 144524

# ================================================

Expand Down Expand Up @@ -11195,7 +11195,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
0860..086A ; Grapheme_Base # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; Grapheme_Base # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0888 ; Grapheme_Base # Sk ARABIC RAISED ROUND DOT
0889..088E ; Grapheme_Base # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; Grapheme_Base # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
08A0..08C8 ; Grapheme_Base # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; Grapheme_Base # Lm ARABIC SMALL FARSI YEH
0903 ; Grapheme_Base # Mc DEVANAGARI SIGN VISARGA
Expand Down Expand Up @@ -12818,7 +12818,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 152731
# Total code points: 152732

# ================================================

Expand Down
6 changes: 3 additions & 3 deletions unicodetools/data/ucd/dev/EastAsianWidth.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# EastAsianWidth-16.0.0.txt
# Date: 2024-06-06, 09:53:44 GMT
# EastAsianWidth-17.0.0.txt
# Date: 2024-11-13, 16:04:00 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -332,7 +332,7 @@
0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0888 ; N # Sk ARABIC RAISED ROUND DOT
0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; N # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
Expand Down
4 changes: 2 additions & 2 deletions unicodetools/data/ucd/dev/LineBreak.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# LineBreak-17.0.0.txt
# Date: 2024-10-16, 14:20:09 GMT
# Date: 2024-11-13, 16:04:00 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -278,7 +278,7 @@
0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0888 ; AL # Sk ARABIC RAISED ROUND DOT
0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; AL # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
0890..0891 ; NU # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
Expand Down
8 changes: 4 additions & 4 deletions unicodetools/data/ucd/dev/Scripts.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Scripts-16.0.0.txt
# Date: 2024-06-06, 09:54:09 GMT
# Scripts-17.0.0.txt
# Date: 2024-11-13, 16:04:20 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -869,7 +869,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
0870..0887 ; Arabic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0888 ; Arabic # Sk ARABIC RAISED ROUND DOT
0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; Arabic # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
0897..089F ; Arabic # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
Expand Down Expand Up @@ -926,7 +926,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL

# Total code points: 1373
# Total code points: 1374

# ================================================

Expand Down
1 change: 1 addition & 0 deletions unicodetools/data/ucd/dev/UnicodeData.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2121,6 +2121,7 @@
088C;ARABIC LETTER TAH WITH THREE DOTS BELOW;Lo;0;AL;;;;;N;;;;;
088D;ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;;
088E;ARABIC VERTICAL TAIL;Lo;0;AL;;;;;N;;;;;
088F;ARABIC LETTER NOON WITH RING ABOVE;Lo;0;AL;;;;;N;;;;;
0890;ARABIC POUND MARK ABOVE;Cf;0;AN;;;;;N;;;;;
0891;ARABIC PIASTRE MARK ABOVE;Cf;0;AN;;;;;N;;;;;
0897;ARABIC PEPET;Mn;230;NSM;;;;;N;;;;;
Expand Down
6 changes: 3 additions & 3 deletions unicodetools/data/ucd/dev/VerticalOrientation.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# VerticalOrientation-16.0.0.txt
# Date: 2024-06-06, 09:54:11 GMT
# VerticalOrientation-17.0.0.txt
# Date: 2024-11-13, 16:04:22 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -266,7 +266,7 @@
0860..086A ; R # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; R # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0888 ; R # Sk ARABIC RAISED ROUND DOT
0889..088E ; R # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0889..088F ; R # Lo [7] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER NOON WITH RING ABOVE
0890..0891 ; R # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
0897..089F ; R # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; R # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
Expand Down
Loading

0 comments on commit 1393ed7

Please sign in to comment.