Skip to content

Commit

Permalink
Update Unicode data to Unicode 15.1.
Browse files Browse the repository at this point in the history
  • Loading branch information
zufuliu committed Oct 14, 2023
1 parent 2c61b23 commit 3a4635e
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 84 deletions.
83 changes: 43 additions & 40 deletions scintilla/lexlib/CharacterCategory.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace {

#define CharacterCategoryUseRangeList 0
//++Autogenerated -- start of section automatically generated
// Created with Python 3.12.0, Unicode 15.0.0
// Created with Python 3.13.0a1, Unicode 15.1.0
#if CharacterCategoryUseRangeList
const int catRanges[] = {
};
Expand All @@ -49,9 +49,9 @@ const uint8_t catTable[] = {
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 180,
52, 52, 52, 52, 52, 52, 52, 182, 184, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 186,
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 188, 116, 116, 116, 116, 116, 116,
52, 190, 116, 116, 52, 52, 52, 52, 52, 52, 52, 52, 52, 192, 52, 52, 52, 52, 52, 52,
52, 194, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 188, 52, 190, 116, 116, 116, 116,
52, 192, 116, 116, 52, 52, 52, 52, 52, 52, 52, 52, 52, 194, 52, 52, 52, 52, 52, 52,
52, 196, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
Expand Down Expand Up @@ -120,7 +120,7 @@ const uint8_t catTable[] = {
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 196, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 198, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116,
Expand All @@ -133,13 +133,13 @@ const uint8_t catTable[] = {
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 198, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 200, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 198,
70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 200,
// catTable index 3
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, 7, 7, 7, 7,
7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 13, 14,
Expand Down Expand Up @@ -271,10 +271,10 @@ const uint8_t catTable[] = {
2, 108, 121, 2, 121, 4, 4, 4, 4, 2, 2, 88, 2, 2, 2, 2, 2, 120, 2, 2,
108, 151, 2, 2, 2, 2, 2, 2, 67, 2, 152, 148, 148, 148, 153, 44, 67, 67, 67, 67,
67, 55, 67, 67, 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 44, 44, 44, 44, 44,
67, 67, 67, 67, 67, 67, 44, 44, 1, 2, 154, 155, 4, 4, 4, 4, 4, 67, 4, 4,
4, 4, 156, 157, 158, 105, 105, 105, 105, 43, 43, 86, 159, 40, 40, 67, 105, 160, 63, 67,
36, 36, 36, 61, 57, 161, 162, 69, 36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 62, 36,
36, 36, 36, 36, 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 92,
1, 2, 154, 155, 4, 4, 4, 4, 4, 67, 4, 4, 4, 4, 156, 157, 158, 105, 105, 105,
105, 43, 43, 86, 159, 40, 40, 67, 105, 160, 63, 67, 36, 36, 36, 61, 57, 161, 162, 69,
36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 62, 36, 36, 36, 36, 36, 67, 27, 27, 67,
67, 67, 67, 67, 67, 67, 44, 44, 44, 44, 44, 55, 67, 67, 67, 67, 67, 67, 67, 92,
27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, 163, 27, 27, 27,
27, 27, 27, 27, 36, 36, 83, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 164, 2,
7, 7, 7, 7, 7, 36, 44, 44, 32, 32, 32, 32, 32, 32, 32, 70, 51, 165, 43, 43,
Expand Down Expand Up @@ -408,13 +408,14 @@ const uint8_t catTable[] = {
62, 61, 61, 62, 62, 36, 36, 36, 36, 61, 36, 36, 62, 62, 44, 44, 44, 61, 44, 62,
62, 62, 62, 36, 62, 61, 61, 62, 62, 62, 62, 62, 62, 61, 61, 62, 36, 61, 36, 36,
36, 61, 36, 36, 62, 36, 61, 61, 36, 36, 36, 36, 36, 62, 36, 36, 62, 36, 62, 36,
36, 62, 36, 36, 8, 44, 44, 44, 44, 44, 44, 44, 55, 67, 67, 67, 67, 67, 67, 67,
27, 27, 27, 27, 27, 27, 91, 67, 67, 67, 67, 67, 67, 67, 67, 44, 44, 44, 44, 67,
67, 67, 67, 67, 67, 92, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 92, 44, 44, 44,
67, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 25, 41, 41, 67, 67, 67, 67,
44, 44, 67, 67, 67, 67, 67, 92, 44, 55, 67, 67, 67, 67, 67, 67, 44, 44, 44, 44,
67, 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 44, 44, 44, 44, 67, 67, 92, 67, 67,
67, 67, 67, 67, 79, 44, 44, 44, 44, 44, 44, 44, 171, 171, 171, 171, 171, 171, 171, 44,
36, 62, 36, 36, 8, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 44, 44,
55, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, 27, 27, 91, 67, 67, 67, 67, 67,
67, 67, 67, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 92, 44, 44, 44, 44, 44, 44,
67, 67, 67, 67, 92, 44, 44, 44, 67, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67,
67, 25, 41, 41, 67, 67, 67, 67, 44, 44, 67, 67, 67, 67, 67, 92, 44, 55, 67, 67,
67, 67, 67, 67, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 44,
44, 44, 44, 67, 67, 92, 67, 67, 67, 67, 67, 67, 79, 44, 44, 44, 44, 44, 44, 44,
171, 171, 171, 171, 171, 171, 171, 44,
// catTable values
25, 25, 22, 17, 17, 17, 19, 17, 13, 14, 17, 18, 17, 12, 8, 8, 18, 18, 18, 17,
17, 0, 0, 0, 0, 13, 17, 14, 20, 11, 20, 1, 1, 1, 1, 13, 18, 14, 18, 25,
Expand Down Expand Up @@ -477,8 +478,8 @@ const uint16_t catTableIndex[] = {
3344, 3344, 3344, 3344, 144, 144, 144, 176, 176, 176, 4912, 4928, 208, 208, 208, 208, 208, 208, 4944, 4960,
176, 176, 4976, 768, 768, 768, 4992, 5008, 768, 5024, 5040, 5040, 5040, 5040, 512, 512, 5056, 5072, 5088, 5104,
5120, 5136, 2240, 2240, 3344, 5152, 3344, 3344, 3344, 3344, 3344, 5168, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344,
3344, 3344, 3344, 3344, 3344, 5184, 2240, 5200, 5216, 5232, 5248, 5264, 2176, 768, 768, 768, 768, 5280, 2848, 768,
768, 768, 768, 5296, 5312, 768, 768, 2176, 768, 768, 768, 768, 3200, 5328, 768, 768, 3344, 3344, 5168, 768,
3344, 3344, 3344, 3344, 3344, 5184, 2240, 3344, 5200, 5216, 5232, 5248, 2176, 768, 768, 768, 768, 5264, 2848, 768,
768, 768, 768, 5280, 5296, 768, 768, 2176, 768, 768, 768, 768, 3200, 5312, 768, 768, 3344, 3344, 5328, 768,
3344, 5344, 5360, 3344, 5376, 5392, 3344, 3344, 5360, 3344, 3344, 5392, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344,
3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
Expand Down Expand Up @@ -575,30 +576,32 @@ const uint16_t catTableIndex[] = {
2240, 2240, 2240, 7984, 4336, 4336, 10480, 10496, 2240, 2240, 2240, 2240, 7984, 4336, 10512, 10528, 2240, 2240, 2240, 2240,
2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 10544, 768, 10560, 10576, 10592, 10608, 10624, 10640, 10656, 3296, 10672, 3296,
2240, 2240, 2240, 10688, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240,
3344, 3344, 5200, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 5344, 10704, 10704, 10704, 3344, 5184, 10720, 3344, 3344, 3344,
3344, 3344, 3344, 3344, 3344, 3344, 10736, 2240, 2240, 2240, 10752, 3344, 10768, 3344, 3344, 5200, 10784, 10800, 5184, 2240,
3344, 3344, 10704, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 5344, 10720, 10720, 10720, 3344, 5184, 10736, 3344, 3344, 3344,
3344, 3344, 3344, 3344, 3344, 3344, 10752, 2240, 2240, 2240, 10768, 3344, 10784, 3344, 3344, 10704, 10800, 10816, 5184, 2240,
2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344,
3344, 3344, 3344, 10816, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10832, 6816, 6816,
3344, 3344, 3344, 3344, 3344, 3344, 3344, 10848, 3344, 3344, 3344, 3344, 3344, 2816, 5200, 6832, 5200, 3344, 3344, 3344,
10864, 2816, 3344, 3344, 10864, 3344, 10736, 10800, 2240, 2240, 2240, 2240, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344,
3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 10736, 6816, 10784, 3344, 3344, 10880,
10896, 5200, 10784, 10784, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10912, 3344, 3344, 4608, 2240, 2240, 3072,
3344, 3344, 3344, 10832, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10848, 6816, 6816,
3344, 3344, 3344, 3344, 3344, 3344, 3344, 10864, 3344, 3344, 3344, 3344, 3344, 2816, 10704, 6832, 10704, 3344, 3344, 3344,
10880, 2816, 3344, 3344, 10880, 3344, 10752, 10816, 2240, 2240, 2240, 2240, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344,
3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 10752, 6816, 10800, 3344, 3344, 10896,
10912, 10704, 10800, 10800, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10928, 3344, 3344, 4608, 2240, 2240, 3072,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 2240, 2240, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 3312, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 3264, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 7648, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 1600, 2240,
768, 3264, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240,
2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 1136, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 1600, 768,
768, 768, 768, 768, 768, 3264, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240,
2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 768, 3264, 2240, 2240, 2240, 2240, 2240, 2240,
2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240,
2240, 2240, 2240, 2240, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 1136, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768,
768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 2240, 2240, 2240, 2240, 2240, 10928, 2240, 9120, 9120,
9120, 9120, 9120, 9120, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 512, 512, 512, 512, 512, 512, 512, 512,
512, 512, 512, 512, 512, 512, 512, 2240, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256,
6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 10944,
768, 768, 768, 2240, 2240, 2240, 2240, 2240, 10944, 2240, 9120, 9120, 9120, 9120, 9120, 9120, 2240, 2240, 2240, 2240,
2240, 2240, 2240, 2240, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 2240,
6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256,
6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 10960,
};

const uint16_t CatTableRLE_BMP[] = {
Expand Down Expand Up @@ -713,10 +716,10 @@ const uint16_t CatTableRLE_BMP[] = {
228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 1029, 81, 47, 48,
47, 48, 113, 47, 48, 49, 47, 48, 305, 44, 81, 44, 49, 47, 48, 81, 47, 48, 45, 46,
45, 46, 45, 46, 45, 46, 177, 35, 337, 76, 145, 44, 49, 45, 433, 85, 113, 45, 46, 45,
46, 45, 46, 45, 46, 44, 1117, 853, 61, 2869, 413, 6869, 861, 405, 157, 54, 113, 53, 35, 36,
41, 45, 46, 45, 46, 45, 46, 45, 46, 45, 46, 85, 45, 46, 45, 46, 45, 46, 45, 46,
44, 45, 78, 53, 297, 133, 70, 44, 163, 85, 105, 35, 36, 49, 85, 61, 2756, 93, 69, 84,
67, 36, 44, 2884, 49, 99, 36, 189, 1380, 61, 3012, 61, 85, 138, 341, 1028, 1173, 413, 516, 1013,
46, 45, 46, 45, 46, 44, 1117, 853, 61, 2869, 413, 6869, 861, 533, 54, 113, 53, 35, 36, 41,
45, 46, 45, 46, 45, 46, 45, 46, 45, 46, 85, 45, 46, 45, 46, 45, 46, 45, 46, 44,
45, 78, 53, 297, 133, 70, 44, 163, 85, 105, 35, 36, 49, 85, 61, 2756, 93, 69, 84, 67,
36, 44, 2884, 49, 99, 36, 189, 1380, 61, 3012, 61, 85, 138, 341, 1028, 1173, 381, 53, 516, 1013,
61, 330, 981, 266, 53, 490, 1045, 330, 1269, 490, 10261, 65508, 65508, 65508, 14436, 2069, 65508, 65508, 65508, 65508,
65508, 65508, 65508, 65508, 65508, 65508, 17380, 35, 36580, 125, 1781, 317, 1284, 195, 81, 8580, 35, 113, 516, 328,
68, 669, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
Expand Down Expand Up @@ -788,7 +791,7 @@ CharacterCategory CategoriseCharacter(int character) noexcept {
character = (catTable[character >> 9] << 8) | (character & 511);
character = catTableIndex[character >> 4] | (character & 15);
character = (catTable[(character >> 1) + 2176] << 1) | (character & 1);
return static_cast<CharacterCategory>(catTable[character + 7656]);
return static_cast<CharacterCategory>(catTable[character + 7664]);
}
//function--Autogenerated -- end of section automatically generated
#endif
Expand Down
2 changes: 1 addition & 1 deletion scintilla/scripts/GenerateCharacterCategory.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def isPrivateChar(c):
(0x2B820, 0x2CEAF), # U+2B820..U+2CEAF CJK Unified Ideographs Extension E
(0x2CEB0, 0x2EBEF), # U+2CEB0..U+2EBEF CJK Unified Ideographs Extension F
(0x2F800, 0x2FA1F), # U+2F800..U+2FA1F CJK Compatibility Ideographs Supplement
(0x2EBF0, 0x2EE5D), # U+2EBF0..U+2EE5D CJK Unified Ideographs Extension I
(0x2EBF0, 0x2EE5F), # U+2EBF0..U+2EE5F CJK Unified Ideographs Extension I
(0x30000, 0x3134F), # U+30000..U+3134F CJK Unified Ideographs Extension G
(0x31350, 0x323AF), # U+31350..U+323AF CJK Unified Ideographs Extension H

Expand Down
40 changes: 20 additions & 20 deletions scintilla/scripts/GenerateGraphemeBreak.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,26 @@ def buildGraphemeClusterBoundary():
table = graphemeClusterBoundary

# https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html
notBreak = [
['Other', 'Extend', 'SpacingMark', 'ZWJ'],
['CR', 'LF'],
['Extend', 'Extend', 'SpacingMark', 'ZWJ'],
['RI', 'Extend', 'RI', 'SpacingMark', 'ZWJ'],
['Prepend', 'Other', 'Extend', 'RI', 'Prepend', 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'ExtPict', 'ZWJ'],
['SpacingMark', 'Extend', 'SpacingMark', 'ZWJ'],
['L', 'Extend', 'SpacingMark', 'L', 'V', 'LV', 'LVT', 'ZWJ'],
['V', 'Extend', 'SpacingMark', 'V', 'T', 'ZWJ'],
['T', 'Extend', 'SpacingMark', 'T', 'ZWJ'],
['LV', 'Extend', 'SpacingMark', 'V', 'T', 'ZWJ'],
['LVT', 'Extend', 'SpacingMark', 'T', 'ZWJ'],
['ExtPict', 'Extend', 'SpacingMark', 'ZWJ'],
#['ZWJ', 'Extend', 'SpacingMark', 'ZWJ'],
['ZWJ', 'Extend', 'SpacingMark', 'ExtPict', 'ZWJ'],
]

for row in notBreak:
first = GraphemeBreakPropertyMap[row[0]]
for item in row[1:]:
notBreak = {
'Other': ['Extend', 'SpacingMark', 'ZWJ'],
'CR': ['LF'],
'Extend': ['Extend', 'SpacingMark', 'ZWJ'],
'RI': ['Extend', 'RI', 'SpacingMark', 'ZWJ'],
'Prepend': ['Other', 'Extend', 'RI', 'Prepend', 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'ExtPict', 'ZWJ'],
'SpacingMark': ['Extend', 'SpacingMark', 'ZWJ'],
'L': ['Extend', 'SpacingMark', 'L', 'V', 'LV', 'LVT', 'ZWJ'],
'V': ['Extend', 'SpacingMark', 'V', 'T', 'ZWJ'],
'T': ['Extend', 'SpacingMark', 'T', 'ZWJ'],
'LV': ['Extend', 'SpacingMark', 'V', 'T', 'ZWJ'],
'LVT': ['Extend', 'SpacingMark', 'T', 'ZWJ'],
'ExtPict': ['Extend', 'SpacingMark', 'ZWJ'],
#'ZWJ': ['Extend', 'SpacingMark', 'ZWJ'],
'ZWJ': ['Extend', 'SpacingMark', 'ExtPict', 'ZWJ'],
}

for key, row in notBreak.items():
first = GraphemeBreakPropertyMap[key]
for item in row:
second = GraphemeBreakPropertyMap[item]
table[first] &= ~(1 << second)

Expand Down
7 changes: 6 additions & 1 deletion scintilla/scripts/GenerateLineBreak.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ class LineBreak(IntFlag):
# https://www.unicode.org/reports/tr14/#Properties
LineBreakPropertyMap = {
LineBreak.NonBreak: [
'AK', # Aksara (XB/XA)
'AL', # Ordinary Alphabetic and Symbol Characters (XP)
'B2', # Break Opportunity Before and After (B/A/XP)
'AS', # Aksara Start (XB/XA)
'EM', # Emoji Modifier (A), Do not break between an emoji base and an emoji modifier.
'GL', # Non-breaking (“Glue”) (XB/XA) (Non-tailorable)
'HL', # Hebrew Letter (XB)
Expand All @@ -34,11 +35,13 @@ class LineBreak(IntFlag):
'RI', # Regional Indicator (B/A/XP)
'SA', # Complex-Context Dependent (South East Asian) (P)
'SG', # Surrogate (XP) (Non-tailorable)
'VI', # Virama (XB/XA)
'WJ', # Word Joiner (XB/XA) (Non-tailorable)
'XX', # Unknown (XP)
'ZWJ', # Zero Width Joiner (XA/XB) (Non-tailorable)
],
LineBreak.BreakBefore: [
'AP', # Aksara Pre-Base (B/XA)
'BB', # Break Before (B)
'EB', # Emoji Base (B/A), may followed by emoji modifier

Expand All @@ -63,9 +66,11 @@ class LineBreak(IntFlag):
'IS', # Infix Numeric Separator (XB)
'NS', # Nonstarters (XB)
'PO', # Postfix Numeric (XB)
'VF', # Virama Final (XB/A)
],
LineBreak.BreakAny: [
'AI', # Ambiguous (Alphabetic or Ideograph)
'B2', # Break Opportunity Before and After (B/A/XP)
'CB', # Contingent Break Opportunity (B/A)
'CJ', # Conditional Japanese Starter, treat as ID: CSS normal breaking
'H2', # Hangul LV Syllable (B/A)
Expand Down
2 changes: 1 addition & 1 deletion scintilla/scripts/UnicodeData.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def readUnicodePropertyFile(path, propertyIndex=1):
else:
propertyList[propertyName] = [code]

print(path, version, 'property:', ', '.join(propertyList.keys()))
print(path, version, 'property:', ', '.join(sorted(propertyList.keys())))
return version, propertyList

def updateUnicodePropertyTable(table, propertyMap, propertyList):
Expand Down
Loading

0 comments on commit 3a4635e

Please sign in to comment.