From 3a4635e189a1e4e14388fbc74ce9ff3184c5fbec Mon Sep 17 00:00:00 2001 From: zufuliu Date: Sat, 14 Oct 2023 08:18:58 +0800 Subject: [PATCH] Update Unicode data to Unicode 15.1. --- scintilla/lexlib/CharacterCategory.cxx | 83 ++++++++++--------- .../scripts/GenerateCharacterCategory.py | 2 +- scintilla/scripts/GenerateGraphemeBreak.py | 40 ++++----- scintilla/scripts/GenerateLineBreak.py | 7 +- scintilla/scripts/UnicodeData.py | 2 +- scintilla/src/CharClassify.cxx | 32 +++---- scintilla/src/CharClassify.h | 2 +- scintilla/src/PositionCache.cxx | 2 +- scintilla/src/RESearch.h | 2 +- src/EditEncoding.c | 4 +- 10 files changed, 92 insertions(+), 84 deletions(-) diff --git a/scintilla/lexlib/CharacterCategory.cxx b/scintilla/lexlib/CharacterCategory.cxx index c3b2c106db..0b1d8ce2fe 100644 --- a/scintilla/lexlib/CharacterCategory.cxx +++ b/scintilla/lexlib/CharacterCategory.cxx @@ -22,7 +22,7 @@ namespace { #define CharacterCategoryUseRangeList 0 //++Autogenerated -- start of section automatically generated -// Created with Python 3.12.0, Unicode 15.0.0 +// Created with Python 3.13.0a1, Unicode 15.1.0 #if CharacterCategoryUseRangeList const int catRanges[] = { }; @@ -49,9 +49,9 @@ const uint8_t catTable[] = { 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 180, 52, 52, 52, 52, 52, 52, 52, 182, 184, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 186, -52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 188, 116, 116, 116, 116, 116, 116, -52, 190, 116, 116, 52, 52, 52, 52, 52, 52, 52, 52, 52, 192, 52, 52, 52, 52, 52, 52, -52, 194, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, +52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 188, 52, 190, 116, 116, 116, 116, +52, 192, 116, 116, 52, 52, 52, 52, 52, 52, 52, 52, 52, 194, 52, 52, 52, 52, 52, 52, +52, 196, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, @@ -120,7 +120,7 @@ const uint8_t catTable[] = { 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, -116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 196, 116, 116, 116, 116, 116, 116, 116, +116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 198, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, 116, @@ -133,13 +133,13 @@ const uint8_t catTable[] = { 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -70, 70, 70, 70, 70, 70, 70, 198, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, +70, 70, 70, 70, 70, 70, 70, 200, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 198, +70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 200, // catTable index 3 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 13, 14, @@ -271,10 +271,10 @@ const uint8_t catTable[] = { 2, 108, 121, 2, 121, 4, 4, 4, 4, 2, 2, 88, 2, 2, 2, 2, 2, 120, 2, 2, 108, 151, 2, 2, 2, 2, 2, 2, 67, 2, 152, 148, 148, 148, 153, 44, 67, 67, 67, 67, 67, 55, 67, 67, 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 44, 44, 44, 44, 44, -67, 67, 67, 67, 67, 67, 44, 44, 1, 2, 154, 155, 4, 4, 4, 4, 4, 67, 4, 4, -4, 4, 156, 157, 158, 105, 105, 105, 105, 43, 43, 86, 159, 40, 40, 67, 105, 160, 63, 67, -36, 36, 36, 61, 57, 161, 162, 69, 36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 62, 36, -36, 36, 36, 36, 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 92, +1, 2, 154, 155, 4, 4, 4, 4, 4, 67, 4, 4, 4, 4, 156, 157, 158, 105, 105, 105, +105, 43, 43, 86, 159, 40, 40, 67, 105, 160, 63, 67, 36, 36, 36, 61, 57, 161, 162, 69, +36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 62, 36, 36, 36, 36, 36, 67, 27, 27, 67, +67, 67, 67, 67, 67, 67, 44, 44, 44, 44, 44, 55, 67, 67, 67, 67, 67, 67, 67, 92, 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, 163, 27, 27, 27, 27, 27, 27, 27, 36, 36, 83, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 164, 2, 7, 7, 7, 7, 7, 36, 44, 44, 32, 32, 32, 32, 32, 32, 32, 70, 51, 165, 43, 43, @@ -408,13 +408,14 @@ const uint8_t catTable[] = { 62, 61, 61, 62, 62, 36, 36, 36, 36, 61, 36, 36, 62, 62, 44, 44, 44, 61, 44, 62, 62, 62, 62, 36, 62, 61, 61, 62, 62, 62, 62, 62, 62, 61, 61, 62, 36, 61, 36, 36, 36, 61, 36, 36, 62, 36, 61, 61, 36, 36, 36, 36, 36, 62, 36, 36, 62, 36, 62, 36, -36, 62, 36, 36, 8, 44, 44, 44, 44, 44, 44, 44, 55, 67, 67, 67, 67, 67, 67, 67, -27, 27, 27, 27, 27, 27, 91, 67, 67, 67, 67, 67, 67, 67, 67, 44, 44, 44, 44, 67, -67, 67, 67, 67, 67, 92, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 92, 44, 44, 44, -67, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 25, 41, 41, 67, 67, 67, 67, -44, 44, 67, 67, 67, 67, 67, 92, 44, 55, 67, 67, 67, 67, 67, 67, 44, 44, 44, 44, -67, 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 44, 44, 44, 44, 67, 67, 92, 67, 67, -67, 67, 67, 67, 79, 44, 44, 44, 44, 44, 44, 44, 171, 171, 171, 171, 171, 171, 171, 44, +36, 62, 36, 36, 8, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 44, 44, +55, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, 27, 27, 91, 67, 67, 67, 67, 67, +67, 67, 67, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 92, 44, 44, 44, 44, 44, 44, +67, 67, 67, 67, 92, 44, 44, 44, 67, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, +67, 25, 41, 41, 67, 67, 67, 67, 44, 44, 67, 67, 67, 67, 67, 92, 44, 55, 67, 67, +67, 67, 67, 67, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 44, +44, 44, 44, 67, 67, 92, 67, 67, 67, 67, 67, 67, 79, 44, 44, 44, 44, 44, 44, 44, +171, 171, 171, 171, 171, 171, 171, 44, // catTable values 25, 25, 22, 17, 17, 17, 19, 17, 13, 14, 17, 18, 17, 12, 8, 8, 18, 18, 18, 17, 17, 0, 0, 0, 0, 13, 17, 14, 20, 11, 20, 1, 1, 1, 1, 13, 18, 14, 18, 25, @@ -477,8 +478,8 @@ const uint16_t catTableIndex[] = { 3344, 3344, 3344, 3344, 144, 144, 144, 176, 176, 176, 4912, 4928, 208, 208, 208, 208, 208, 208, 4944, 4960, 176, 176, 4976, 768, 768, 768, 4992, 5008, 768, 5024, 5040, 5040, 5040, 5040, 512, 512, 5056, 5072, 5088, 5104, 5120, 5136, 2240, 2240, 3344, 5152, 3344, 3344, 3344, 3344, 3344, 5168, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, -3344, 3344, 3344, 3344, 3344, 5184, 2240, 5200, 5216, 5232, 5248, 5264, 2176, 768, 768, 768, 768, 5280, 2848, 768, -768, 768, 768, 5296, 5312, 768, 768, 2176, 768, 768, 768, 768, 3200, 5328, 768, 768, 3344, 3344, 5168, 768, +3344, 3344, 3344, 3344, 3344, 5184, 2240, 3344, 5200, 5216, 5232, 5248, 2176, 768, 768, 768, 768, 5264, 2848, 768, +768, 768, 768, 5280, 5296, 768, 768, 2176, 768, 768, 768, 768, 3200, 5312, 768, 768, 3344, 3344, 5328, 768, 3344, 5344, 5360, 3344, 5376, 5392, 3344, 3344, 5360, 3344, 3344, 5392, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, @@ -575,14 +576,14 @@ const uint16_t catTableIndex[] = { 2240, 2240, 2240, 7984, 4336, 4336, 10480, 10496, 2240, 2240, 2240, 2240, 7984, 4336, 10512, 10528, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 10544, 768, 10560, 10576, 10592, 10608, 10624, 10640, 10656, 3296, 10672, 3296, 2240, 2240, 2240, 10688, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, -3344, 3344, 5200, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 5344, 10704, 10704, 10704, 3344, 5184, 10720, 3344, 3344, 3344, -3344, 3344, 3344, 3344, 3344, 3344, 10736, 2240, 2240, 2240, 10752, 3344, 10768, 3344, 3344, 5200, 10784, 10800, 5184, 2240, +3344, 3344, 10704, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 5344, 10720, 10720, 10720, 3344, 5184, 10736, 3344, 3344, 3344, +3344, 3344, 3344, 3344, 3344, 3344, 10752, 2240, 2240, 2240, 10768, 3344, 10784, 3344, 3344, 10704, 10800, 10816, 5184, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, -3344, 3344, 3344, 10816, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10832, 6816, 6816, -3344, 3344, 3344, 3344, 3344, 3344, 3344, 10848, 3344, 3344, 3344, 3344, 3344, 2816, 5200, 6832, 5200, 3344, 3344, 3344, -10864, 2816, 3344, 3344, 10864, 3344, 10736, 10800, 2240, 2240, 2240, 2240, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, -3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 10736, 6816, 10784, 3344, 3344, 10880, -10896, 5200, 10784, 10784, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10912, 3344, 3344, 4608, 2240, 2240, 3072, +3344, 3344, 3344, 10832, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10848, 6816, 6816, +3344, 3344, 3344, 3344, 3344, 3344, 3344, 10864, 3344, 3344, 3344, 3344, 3344, 2816, 10704, 6832, 10704, 3344, 3344, 3344, +10880, 2816, 3344, 3344, 10880, 3344, 10752, 10816, 2240, 2240, 2240, 2240, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, +3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 5168, 10752, 6816, 10800, 3344, 3344, 10896, +10912, 10704, 10800, 10800, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 3344, 10928, 3344, 3344, 4608, 2240, 2240, 3072, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 2240, 2240, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 3312, 768, 768, 768, 768, 768, 768, 768, 768, @@ -590,15 +591,17 @@ const uint16_t catTableIndex[] = { 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 7648, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, -768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 1600, 2240, -768, 3264, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, -2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 768, 768, 768, 768, 768, 768, 768, 768, -768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 1136, 768, 768, 768, 768, 768, 768, 768, +768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 1600, 768, +768, 768, 768, 768, 768, 3264, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, +2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 768, 3264, 2240, 2240, 2240, 2240, 2240, 2240, +2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, +2240, 2240, 2240, 2240, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, +768, 768, 768, 768, 1136, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, -768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 2240, 2240, 2240, 2240, 2240, 10928, 2240, 9120, 9120, -9120, 9120, 9120, 9120, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 2240, 512, 512, 512, 512, 512, 512, 512, 512, -512, 512, 512, 512, 512, 512, 512, 2240, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, -6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 10944, +768, 768, 768, 2240, 2240, 2240, 2240, 2240, 10944, 2240, 9120, 9120, 9120, 9120, 9120, 9120, 2240, 2240, 2240, 2240, +2240, 2240, 2240, 2240, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 2240, +6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, +6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 6256, 10960, }; const uint16_t CatTableRLE_BMP[] = { @@ -713,10 +716,10 @@ const uint16_t CatTableRLE_BMP[] = { 228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 228, 61, 1029, 81, 47, 48, 47, 48, 113, 47, 48, 49, 47, 48, 305, 44, 81, 44, 49, 47, 48, 81, 47, 48, 45, 46, 45, 46, 45, 46, 45, 46, 177, 35, 337, 76, 145, 44, 49, 45, 433, 85, 113, 45, 46, 45, -46, 45, 46, 45, 46, 44, 1117, 853, 61, 2869, 413, 6869, 861, 405, 157, 54, 113, 53, 35, 36, -41, 45, 46, 45, 46, 45, 46, 45, 46, 45, 46, 85, 45, 46, 45, 46, 45, 46, 45, 46, -44, 45, 78, 53, 297, 133, 70, 44, 163, 85, 105, 35, 36, 49, 85, 61, 2756, 93, 69, 84, -67, 36, 44, 2884, 49, 99, 36, 189, 1380, 61, 3012, 61, 85, 138, 341, 1028, 1173, 413, 516, 1013, +46, 45, 46, 45, 46, 44, 1117, 853, 61, 2869, 413, 6869, 861, 533, 54, 113, 53, 35, 36, 41, +45, 46, 45, 46, 45, 46, 45, 46, 45, 46, 85, 45, 46, 45, 46, 45, 46, 45, 46, 44, +45, 78, 53, 297, 133, 70, 44, 163, 85, 105, 35, 36, 49, 85, 61, 2756, 93, 69, 84, 67, +36, 44, 2884, 49, 99, 36, 189, 1380, 61, 3012, 61, 85, 138, 341, 1028, 1173, 381, 53, 516, 1013, 61, 330, 981, 266, 53, 490, 1045, 330, 1269, 490, 10261, 65508, 65508, 65508, 14436, 2069, 65508, 65508, 65508, 65508, 65508, 65508, 65508, 65508, 65508, 65508, 17380, 35, 36580, 125, 1781, 317, 1284, 195, 81, 8580, 35, 113, 516, 328, 68, 669, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, @@ -788,7 +791,7 @@ CharacterCategory CategoriseCharacter(int character) noexcept { character = (catTable[character >> 9] << 8) | (character & 511); character = catTableIndex[character >> 4] | (character & 15); character = (catTable[(character >> 1) + 2176] << 1) | (character & 1); - return static_cast(catTable[character + 7656]); + return static_cast(catTable[character + 7664]); } //function--Autogenerated -- end of section automatically generated #endif diff --git a/scintilla/scripts/GenerateCharacterCategory.py b/scintilla/scripts/GenerateCharacterCategory.py index 1e2513bf0c..805aa53f37 100644 --- a/scintilla/scripts/GenerateCharacterCategory.py +++ b/scintilla/scripts/GenerateCharacterCategory.py @@ -102,7 +102,7 @@ def isPrivateChar(c): (0x2B820, 0x2CEAF), # U+2B820..U+2CEAF CJK Unified Ideographs Extension E (0x2CEB0, 0x2EBEF), # U+2CEB0..U+2EBEF CJK Unified Ideographs Extension F (0x2F800, 0x2FA1F), # U+2F800..U+2FA1F CJK Compatibility Ideographs Supplement - (0x2EBF0, 0x2EE5D), # U+2EBF0..U+2EE5D CJK Unified Ideographs Extension I + (0x2EBF0, 0x2EE5F), # U+2EBF0..U+2EE5F CJK Unified Ideographs Extension I (0x30000, 0x3134F), # U+30000..U+3134F CJK Unified Ideographs Extension G (0x31350, 0x323AF), # U+31350..U+323AF CJK Unified Ideographs Extension H diff --git a/scintilla/scripts/GenerateGraphemeBreak.py b/scintilla/scripts/GenerateGraphemeBreak.py index a7b4d0823e..c609edd5fc 100644 --- a/scintilla/scripts/GenerateGraphemeBreak.py +++ b/scintilla/scripts/GenerateGraphemeBreak.py @@ -43,26 +43,26 @@ def buildGraphemeClusterBoundary(): table = graphemeClusterBoundary # https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html - notBreak = [ - ['Other', 'Extend', 'SpacingMark', 'ZWJ'], - ['CR', 'LF'], - ['Extend', 'Extend', 'SpacingMark', 'ZWJ'], - ['RI', 'Extend', 'RI', 'SpacingMark', 'ZWJ'], - ['Prepend', 'Other', 'Extend', 'RI', 'Prepend', 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'ExtPict', 'ZWJ'], - ['SpacingMark', 'Extend', 'SpacingMark', 'ZWJ'], - ['L', 'Extend', 'SpacingMark', 'L', 'V', 'LV', 'LVT', 'ZWJ'], - ['V', 'Extend', 'SpacingMark', 'V', 'T', 'ZWJ'], - ['T', 'Extend', 'SpacingMark', 'T', 'ZWJ'], - ['LV', 'Extend', 'SpacingMark', 'V', 'T', 'ZWJ'], - ['LVT', 'Extend', 'SpacingMark', 'T', 'ZWJ'], - ['ExtPict', 'Extend', 'SpacingMark', 'ZWJ'], - #['ZWJ', 'Extend', 'SpacingMark', 'ZWJ'], - ['ZWJ', 'Extend', 'SpacingMark', 'ExtPict', 'ZWJ'], - ] - - for row in notBreak: - first = GraphemeBreakPropertyMap[row[0]] - for item in row[1:]: + notBreak = { + 'Other': ['Extend', 'SpacingMark', 'ZWJ'], + 'CR': ['LF'], + 'Extend': ['Extend', 'SpacingMark', 'ZWJ'], + 'RI': ['Extend', 'RI', 'SpacingMark', 'ZWJ'], + 'Prepend': ['Other', 'Extend', 'RI', 'Prepend', 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'ExtPict', 'ZWJ'], + 'SpacingMark': ['Extend', 'SpacingMark', 'ZWJ'], + 'L': ['Extend', 'SpacingMark', 'L', 'V', 'LV', 'LVT', 'ZWJ'], + 'V': ['Extend', 'SpacingMark', 'V', 'T', 'ZWJ'], + 'T': ['Extend', 'SpacingMark', 'T', 'ZWJ'], + 'LV': ['Extend', 'SpacingMark', 'V', 'T', 'ZWJ'], + 'LVT': ['Extend', 'SpacingMark', 'T', 'ZWJ'], + 'ExtPict': ['Extend', 'SpacingMark', 'ZWJ'], + #'ZWJ': ['Extend', 'SpacingMark', 'ZWJ'], + 'ZWJ': ['Extend', 'SpacingMark', 'ExtPict', 'ZWJ'], + } + + for key, row in notBreak.items(): + first = GraphemeBreakPropertyMap[key] + for item in row: second = GraphemeBreakPropertyMap[item] table[first] &= ~(1 << second) diff --git a/scintilla/scripts/GenerateLineBreak.py b/scintilla/scripts/GenerateLineBreak.py index 50b051b58c..0f052333d0 100644 --- a/scintilla/scripts/GenerateLineBreak.py +++ b/scintilla/scripts/GenerateLineBreak.py @@ -22,8 +22,9 @@ class LineBreak(IntFlag): # https://www.unicode.org/reports/tr14/#Properties LineBreakPropertyMap = { LineBreak.NonBreak: [ + 'AK', # Aksara (XB/XA) 'AL', # Ordinary Alphabetic and Symbol Characters (XP) - 'B2', # Break Opportunity Before and After (B/A/XP) + 'AS', # Aksara Start (XB/XA) 'EM', # Emoji Modifier (A), Do not break between an emoji base and an emoji modifier. 'GL', # Non-breaking (“Glue”) (XB/XA) (Non-tailorable) 'HL', # Hebrew Letter (XB) @@ -34,11 +35,13 @@ class LineBreak(IntFlag): 'RI', # Regional Indicator (B/A/XP) 'SA', # Complex-Context Dependent (South East Asian) (P) 'SG', # Surrogate (XP) (Non-tailorable) + 'VI', # Virama (XB/XA) 'WJ', # Word Joiner (XB/XA) (Non-tailorable) 'XX', # Unknown (XP) 'ZWJ', # Zero Width Joiner (XA/XB) (Non-tailorable) ], LineBreak.BreakBefore: [ + 'AP', # Aksara Pre-Base (B/XA) 'BB', # Break Before (B) 'EB', # Emoji Base (B/A), may followed by emoji modifier @@ -63,9 +66,11 @@ class LineBreak(IntFlag): 'IS', # Infix Numeric Separator (XB) 'NS', # Nonstarters (XB) 'PO', # Postfix Numeric (XB) + 'VF', # Virama Final (XB/A) ], LineBreak.BreakAny: [ 'AI', # Ambiguous (Alphabetic or Ideograph) + 'B2', # Break Opportunity Before and After (B/A/XP) 'CB', # Contingent Break Opportunity (B/A) 'CJ', # Conditional Japanese Starter, treat as ID: CSS normal breaking 'H2', # Hangul LV Syllable (B/A) diff --git a/scintilla/scripts/UnicodeData.py b/scintilla/scripts/UnicodeData.py index 158f564066..15dc79aa56 100644 --- a/scintilla/scripts/UnicodeData.py +++ b/scintilla/scripts/UnicodeData.py @@ -52,7 +52,7 @@ def readUnicodePropertyFile(path, propertyIndex=1): else: propertyList[propertyName] = [code] - print(path, version, 'property:', ', '.join(propertyList.keys())) + print(path, version, 'property:', ', '.join(sorted(propertyList.keys()))) return version, propertyList def updateUnicodePropertyTable(table, propertyMap, propertyList): diff --git a/scintilla/src/CharClassify.cxx b/scintilla/src/CharClassify.cxx index 24ef908b7b..1167e1c0ee 100644 --- a/scintilla/src/CharClassify.cxx +++ b/scintilla/src/CharClassify.cxx @@ -173,7 +173,7 @@ void ExpandSkipBlock(const IndexType (&indexList)[IndexSize], const ValueType (& } //++Autogenerated -- start of section automatically generated -// Created with Python 3.12.0, Unicode 15.0.0 +// Created with Python 3.13.0a1, Unicode 15.1.0 const uint16_t CharClassifyRLE_BMP[] = { 264, 122, 83, 58, 211, 50, 211, 34, 272, 74, 11, 18, 8, 34, 19, 10, 11, 26, 19, 10, 27, 10, 187, 10, 251, 10, 3667, 34, 99, 114, 43, 58, 11, 10, 11, 138, 939, 10, 19, 16, @@ -213,18 +213,18 @@ const uint16_t CharClassifyRLE_BMP[] = { 467, 18, 32, 5306, 200, 90, 168, 483, 626, 179, 5042, 243, 7938, 16, 258, 8, 842, 1835, 50, 75, 40, 34, 11, 18, 307, 8, 11, 40, 11, 16, 451, 56, 11, 10, 112, 195, 72, 59, 8, 59, 8, 59, 8, 59, 8, 59, 8, 59, 8, 59, 8, 59, 8, 259, 378, 11, 370, 272, 210, 8, -714, 96, 1714, 208, 98, 40, 34, 28, 202, 124, 10, 44, 18, 44, 26, 8, 692, 16, 20, 18, -28, 10, 724, 10, 36, 40, 348, 8, 756, 8, 18, 36, 82, 260, 290, 96, 132, 250, 8, 84, -242, 68, 10, 124, 258, 84, 314, 124, 2562, 52740, 514, 65532, 65532, 46204, 24, 442, 72, 371, 18, 2155, -26, 227, 160, 411, 10, 83, 10, 923, 50, 64, 186, 75, 18, 827, 18, 515, 40, 19, 8, 11, -8, 43, 192, 435, 34, 11, 24, 51, 34, 48, 419, 34, 64, 563, 64, 18, 83, 48, 195, 26, -11, 10, 395, 18, 291, 88, 10, 236, 24, 523, 106, 8, 91, 32, 18, 251, 8, 443, 72, 115, -16, 83, 16, 34, 187, 26, 587, 192, 27, 18, 131, 18, 43, 80, 51, 16, 51, 16, 51, 72, -59, 8, 59, 8, 347, 10, 115, 18, 32, 987, 10, 19, 16, 83, 48, 65532, 23852, 96, 188, 32, -396, 65528, 2088, 2932, 16, 852, 304, 59, 96, 43, 40, 99, 10, 107, 8, 43, 8, 11, 8, 19, -8, 19, 8, 867, 138, 128, 2907, 146, 515, 16, 435, 56, 10, 256, 99, 34, 131, 82, 48, 131, -282, 8, 154, 8, 34, 32, 43, 8, 1083, 32, 122, 83, 58, 211, 50, 211, 90, 716, 24, 52, -16, 52, 16, 52, 16, 28, 24, 58, 8, 58, 104, 18, 16, +714, 96, 1714, 208, 130, 8, 34, 28, 202, 124, 10, 44, 18, 44, 26, 8, 692, 16, 20, 18, +28, 10, 724, 10, 36, 40, 348, 8, 756, 8, 18, 36, 82, 260, 290, 88, 10, 132, 250, 8, +84, 242, 68, 10, 124, 258, 84, 314, 124, 2562, 52740, 514, 65532, 65532, 46204, 24, 442, 72, 371, 18, +2155, 26, 227, 160, 411, 10, 83, 10, 923, 50, 64, 186, 75, 18, 827, 18, 515, 40, 19, 8, +11, 8, 43, 192, 435, 34, 11, 24, 51, 34, 48, 419, 34, 64, 563, 64, 18, 83, 48, 195, +26, 11, 10, 395, 18, 291, 88, 10, 236, 24, 523, 106, 8, 91, 32, 18, 251, 8, 443, 72, +115, 16, 83, 16, 34, 187, 26, 587, 192, 27, 18, 131, 18, 43, 80, 51, 16, 51, 16, 51, +72, 59, 8, 59, 8, 347, 10, 115, 18, 32, 987, 10, 19, 16, 83, 48, 65532, 23852, 96, 188, +32, 396, 65528, 2088, 2932, 16, 852, 304, 59, 96, 43, 40, 99, 10, 107, 8, 43, 8, 11, 8, +19, 8, 19, 8, 867, 138, 128, 2907, 146, 515, 16, 435, 56, 10, 256, 99, 34, 131, 82, 48, +131, 282, 8, 154, 8, 34, 32, 43, 8, 1083, 32, 122, 83, 58, 211, 50, 211, 90, 716, 24, +52, 16, 52, 16, 52, 16, 28, 24, 58, 8, 58, 104, 18, 16, }; } @@ -302,7 +302,7 @@ const uint8_t CharClassify::CharClassifyTable[] = { 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 185, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 186, -8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 108, 108, 108, 108, +108, 108, 108, 108, 108, 108, 108, 108, 108, 187, 8, 8, 8, 8, 8, 8, 108, 108, 108, 108, 108, 108, 108, 108, 187, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 188, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, @@ -385,7 +385,7 @@ const uint8_t CharClassify::CharClassifyTable[] = { 9, 47, 9, 12, 9, 13, 9, 9, 9, 9, 9, 127, 47, 124, 9, 96, 9, 13, 9, 13, 9, 9, 128, 9, 9, 9, 9, 9, 9, 76, 5, 5, 5, 5, 0, 21, 87, 87, 87, 87, 5, 5, 5, 5, 87, 87, 87, 87, 87, 87, 87, 86, 87, 87, 87, 88, 87, 87, 87, 87, -87, 87, 87, 87, 86, 5, 87, 87, 87, 87, 87, 87, 89, 5, 5, 5, 87, 87, 87, 88, +87, 87, 87, 87, 86, 5, 87, 87, 87, 87, 87, 87, 89, 5, 87, 87, 87, 87, 87, 88, 5, 5, 5, 5, 87, 90, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 5, 5, // CharClassifyTable values 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, @@ -683,7 +683,7 @@ inline void SetRange(bool (&bs)[256], int low, int high) noexcept { } //dbcs++Autogenerated -- start of section automatically generated -// Created with Python 3.12.0, Unicode 15.0.0 +// Created with Python 3.13.0a1, Unicode 15.1.0 const uint16_t CharClassifyRLE_CP932[] = { 264, 122, 83, 58, 211, 50, 211, 34, 264, 12, 42, 468, 232, 28, 65528, 65528, 65528, 65528, 552, 138, 36, 10, 44, 282, 8, 394, 60, 450, 11, 98, 536, 122, 83, 58, 211, 10, 19, 10, 11, 8, diff --git a/scintilla/src/CharClassify.h b/scintilla/src/CharClassify.h index cd672039fa..66deccef9c 100644 --- a/scintilla/src/CharClassify.h +++ b/scintilla/src/CharClassify.h @@ -95,7 +95,7 @@ class CharClassify { static void InitUnicodeData() noexcept; //++Autogenerated -- start of section automatically generated -// Created with Python 3.12.0, Unicode 15.0.0 +// Created with Python 3.13.0a1, Unicode 15.1.0 static CharacterClass ClassifyCharacter(uint32_t ch) noexcept { if (ch < sizeof(classifyMap)) { return static_cast(classifyMap[ch]); diff --git a/scintilla/src/PositionCache.cxx b/scintilla/src/PositionCache.cxx index 4d796952f4..6f9eef03e7 100644 --- a/scintilla/src/PositionCache.cxx +++ b/scintilla/src/PositionCache.cxx @@ -352,7 +352,7 @@ enum class WrapBreak { constexpr uint8_t ASCIIWrapBreakTable[128] = { //++Autogenerated -- start of section automatically generated -// Created with Python 3.12.0, Unicode 15.0.0 +// Created with Python 3.13.0a1, Unicode 15.1.0 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 1, 2, 2, 2, 2, diff --git a/scintilla/src/RESearch.h b/scintilla/src/RESearch.h index b8b1c4ff24..0d9ea37eb6 100644 --- a/scintilla/src/RESearch.h +++ b/scintilla/src/RESearch.h @@ -53,7 +53,7 @@ class RESearch { int sta; int failure; - // cache for previous pattern with same address, length and flags + // cache for previous pattern to avoid recompile Scintilla::FindOption previousFlags; std::string cachedPattern; diff --git a/src/EditEncoding.c b/src/EditEncoding.c index 1c716042e9..e592b737c1 100644 --- a/src/EditEncoding.c +++ b/src/EditEncoding.c @@ -363,7 +363,7 @@ static NP2EncodingGroup sEncodingGroupList[] = { }; //++Autogenerated -- start of section automatically generated -// Created with Python 3.12.0, Unicode 15.0.0 +// Created with Python 3.13.0a1, Unicode 15.1.0 static const uint8_t ANSICharClassifyTable[] = { // Central European (Windows-1250) 0x22, 0xAA, 0xB8, 0xFF, 0xA8, 0xAA, 0xB8, 0xFF, 0xEC, 0xAE, 0xBA, 0xE2, 0xEA, 0xAE, 0xBE, 0xFB, @@ -2455,7 +2455,7 @@ int EditDetermineEncoding(LPCWSTR pszFile, char *lpData, DWORD cbData, int *enco } //case++Autogenerated -- start of section automatically generated -// Created with Python 3.12.0, Unicode 15.0.0 +// Created with Python 3.13.0a1, Unicode 15.1.0 #define kUnicodeCaseSensitiveFirst 0x0600U #define kUnicodeCaseSensitiveMax 0x1e943U